diff options
69 files changed, 2651 insertions, 465 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ca74617cc3..06635ba81a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -281,6 +281,7 @@ F: target/ppc/ F: hw/ppc/ F: include/hw/ppc/ F: disas/ppc.c +F: tests/acceptance/machine_ppc.py RISC-V TCG CPUs M: Palmer Dabbelt <palmer@dabbelt.com> @@ -1118,6 +1119,7 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com> S: Maintained F: hw/microblaze/petalogix_s3adsp1800_mmu.c F: include/hw/char/xilinx_uartlite.h +F: tests/acceptance/machine_microblaze.py petalogix_ml605 M: Edgar E. Iglesias <edgar.iglesias@gmail.com> @@ -305,7 +305,7 @@ endif @echo 'Test targets:' $(call print-help,check,Run all tests (check-help for details)) $(call print-help,bench,Run all benchmarks) - $(call print-help,docker,Help about targets running tests inside containers) + $(call print-help,docker-help,Help about targets running tests inside containers) $(call print-help,vm-help,Help about targets running tests inside VM) @echo '' @echo 'Documentation targets:' diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index afd75ab628..75d7fa9510 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -900,10 +900,11 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) ImageEntry *image_entry, *next_ie; SnapshotEntry *snapshot_entry; + Error *err = NULL; - bs = bdrv_all_find_vmstate_bs(); + bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, &err); if (!bs) { - monitor_printf(mon, "No available block device supports snapshots\n"); + error_report_err(err); return; } aio_context = bdrv_get_aio_context(bs); @@ -953,7 +954,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) total = 0; for (i = 0; i < nb_sns; i++) { SnapshotEntry *next_sn; - if (bdrv_all_find_snapshot(sn_tab[i].name, &bs1) == 0) { + if (bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL) == 1) { global_snapshots[total] = i; total++; QTAILQ_FOREACH(image_entry, &image_list, next) { diff --git a/block/snapshot.c b/block/snapshot.c index a2bf3a54eb..e8ae9a28c1 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -447,6 +447,41 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, return ret; } + +static int bdrv_all_get_snapshot_devices(bool has_devices, strList *devices, + GList **all_bdrvs, + Error **errp) +{ + g_autoptr(GList) bdrvs = NULL; + + if (has_devices) { + if (!devices) { + error_setg(errp, "At least one device is required for snapshot"); + return -1; + } + + while (devices) { + BlockDriverState *bs = bdrv_find_node(devices->value); + if (!bs) { + error_setg(errp, "No block device node '%s'", devices->value); + return -1; + } + bdrvs = g_list_append(bdrvs, bs); + devices = devices->next; + } + } else { + BlockDriverState *bs; + BdrvNextIterator it; + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + bdrvs = g_list_append(bdrvs, bs); + } + } + + *all_bdrvs = g_steal_pointer(&bdrvs); + return 0; +} + + static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) { if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { @@ -462,44 +497,59 @@ static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) * These functions will properly handle dataplane (take aio_context_acquire * when appropriate for appropriate block drivers) */ -bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) +bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + Error **errp) { - bool ok = true; - BlockDriverState *bs; - BdrvNextIterator it; + g_autoptr(GList) bdrvs = NULL; + GList *iterbdrvs; + + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + return false; + } - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); + bool ok = true; aio_context_acquire(ctx); - if (bdrv_all_snapshots_includes_bs(bs)) { + if (devices || bdrv_all_snapshots_includes_bs(bs)) { ok = bdrv_can_snapshot(bs); } aio_context_release(ctx); if (!ok) { - bdrv_next_cleanup(&it); - goto fail; + error_setg(errp, "Device '%s' is writable but does not support " + "snapshots", bdrv_get_device_or_node_name(bs)); + return false; } + + iterbdrvs = iterbdrvs->next; } -fail: - *first_bad_bs = bs; - return ok; + return true; } -int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, +int bdrv_all_delete_snapshot(const char *name, + bool has_devices, strList *devices, Error **errp) { - int ret = 0; - BlockDriverState *bs; - BdrvNextIterator it; - QEMUSnapshotInfo sn1, *snapshot = &sn1; + g_autoptr(GList) bdrvs = NULL; + GList *iterbdrvs; + + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + return -1; + } - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); + QEMUSnapshotInfo sn1, *snapshot = &sn1; + int ret = 0; aio_context_acquire(ctx); - if (bdrv_all_snapshots_includes_bs(bs) && + if ((devices || bdrv_all_snapshots_includes_bs(bs)) && bdrv_snapshot_find(bs, snapshot, name) >= 0) { ret = bdrv_snapshot_delete(bs, snapshot->id_str, @@ -507,118 +557,180 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, } aio_context_release(ctx); if (ret < 0) { - bdrv_next_cleanup(&it); - goto fail; + error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", + name, bdrv_get_device_or_node_name(bs)); + return -1; } + + iterbdrvs = iterbdrvs->next; } -fail: - *first_bad_bs = bs; - return ret; + return 0; } -int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, +int bdrv_all_goto_snapshot(const char *name, + bool has_devices, strList *devices, Error **errp) { - int ret = 0; - BlockDriverState *bs; - BdrvNextIterator it; + g_autoptr(GList) bdrvs = NULL; + GList *iterbdrvs; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + return -1; + } + + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); + int ret = 0; aio_context_acquire(ctx); - if (bdrv_all_snapshots_includes_bs(bs)) { + if (devices || bdrv_all_snapshots_includes_bs(bs)) { ret = bdrv_snapshot_goto(bs, name, errp); } aio_context_release(ctx); if (ret < 0) { - bdrv_next_cleanup(&it); - goto fail; + error_prepend(errp, "Could not load snapshot '%s' on '%s': ", + name, bdrv_get_device_or_node_name(bs)); + return -1; } + + iterbdrvs = iterbdrvs->next; } -fail: - *first_bad_bs = bs; - return ret; + return 0; } -int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) +int bdrv_all_has_snapshot(const char *name, + bool has_devices, strList *devices, + Error **errp) { - QEMUSnapshotInfo sn; - int err = 0; - BlockDriverState *bs; - BdrvNextIterator it; + g_autoptr(GList) bdrvs = NULL; + GList *iterbdrvs; - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + return -1; + } + + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); + QEMUSnapshotInfo sn; + int ret = 0; aio_context_acquire(ctx); - if (bdrv_all_snapshots_includes_bs(bs)) { - err = bdrv_snapshot_find(bs, &sn, name); + if (devices || bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_find(bs, &sn, name); } aio_context_release(ctx); - if (err < 0) { - bdrv_next_cleanup(&it); - goto fail; + if (ret < 0) { + if (ret == -ENOENT) { + return 0; + } else { + error_setg_errno(errp, errno, + "Could not check snapshot '%s' on '%s'", + name, bdrv_get_device_or_node_name(bs)); + return -1; + } } + + iterbdrvs = iterbdrvs->next; } -fail: - *first_bad_bs = bs; - return err; + return 1; } int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, BlockDriverState *vm_state_bs, uint64_t vm_state_size, - BlockDriverState **first_bad_bs) + bool has_devices, strList *devices, + Error **errp) { - int err = 0; - BlockDriverState *bs; - BdrvNextIterator it; + g_autoptr(GList) bdrvs = NULL; + GList *iterbdrvs; + + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + return -1; + } - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); + int ret = 0; aio_context_acquire(ctx); if (bs == vm_state_bs) { sn->vm_state_size = vm_state_size; - err = bdrv_snapshot_create(bs, sn); - } else if (bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_create(bs, sn); + } else if (devices || bdrv_all_snapshots_includes_bs(bs)) { sn->vm_state_size = 0; - err = bdrv_snapshot_create(bs, sn); + ret = bdrv_snapshot_create(bs, sn); } aio_context_release(ctx); - if (err < 0) { - bdrv_next_cleanup(&it); - goto fail; + if (ret < 0) { + error_setg(errp, "Could not create snapshot '%s' on '%s'", + sn->name, bdrv_get_device_or_node_name(bs)); + return -1; } + + iterbdrvs = iterbdrvs->next; } -fail: - *first_bad_bs = bs; - return err; + return 0; } -BlockDriverState *bdrv_all_find_vmstate_bs(void) + +BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, + bool has_devices, strList *devices, + Error **errp) { - BlockDriverState *bs; - BdrvNextIterator it; + g_autoptr(GList) bdrvs = NULL; + GList *iterbdrvs; + + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + return NULL; + } - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); - bool found; + bool found = false; aio_context_acquire(ctx); - found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs); + found = (devices || bdrv_all_snapshots_includes_bs(bs)) && + bdrv_can_snapshot(bs); aio_context_release(ctx); - if (found) { - bdrv_next_cleanup(&it); - break; + if (vmstate_bs) { + if (g_str_equal(vmstate_bs, + bdrv_get_node_name(bs))) { + if (found) { + return bs; + } else { + error_setg(errp, + "vmstate block device '%s' does not support snapshots", + vmstate_bs); + return NULL; + } + } + } else if (found) { + return bs; } + + iterbdrvs = iterbdrvs->next; + } + + if (vmstate_bs) { + error_setg(errp, + "vmstate block device '%s' does not exist", vmstate_bs); + } else { + error_setg(errp, + "no block device can store vmstate for snapshot"); } - return bs; + return NULL; } @@ -198,8 +198,8 @@ has() { } version_ge () { - local_ver1=`echo $1 | tr . ' '` - local_ver2=`echo $2 | tr . ' '` + local_ver1=$(expr "$1" : '\([0-9.]*\)' | tr . ' ') + local_ver2=$(echo "$2" | tr . ' ') while true; do set x $local_ver1 local_first=${2-0} @@ -6115,7 +6115,7 @@ fi if test -n "$gdb_bin"; then gdb_version=$($gdb_bin --version | head -n 1) - if version_ge ${gdb_version##* } 8.3.1; then + if version_ge ${gdb_version##* } 9.1; then echo "HAVE_GDB_BIN=$gdb_bin" >> $config_host_mak fi fi diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst index 809af69725..209f9d8172 100644 --- a/docs/devel/testing.rst +++ b/docs/devel/testing.rst @@ -765,9 +765,6 @@ and hypothetical example follows: class MultipleMachines(Test): - """ - :avocado: enable - """ def test_multiple_machines(self): first_machine = self.get_vm() second_machine = self.get_vm() diff --git a/docs/system/arm/versatile.rst b/docs/system/arm/versatile.rst index 51221c30a4..2ae792bac3 100644 --- a/docs/system/arm/versatile.rst +++ b/docs/system/arm/versatile.rst @@ -27,3 +27,37 @@ The Arm Versatile baseboard is emulated with the following devices: devices. - PL181 MultiMedia Card Interface with SD card. + +Booting a Linux kernel +---------------------- + +Building a current Linux kernel with ``versatile_defconfig`` should be +enough to get something running. Nowadays an out-of-tree build is +recommended (and also useful if you build a lot of different targets). +In the following example $BLD points to the build directory and $SRC +points to the root of the Linux source tree. You can drop $SRC if you +are running from there. + +.. code-block:: bash + + $ make O=$BLD -C $SRC ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- versatile_defconfig + $ make O=$BLD -C $SRC ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- + +You may want to enable some additional modules if you want to boot +something from the SCSI interface:: + + CONFIG_PCI=y + CONFIG_PCI_VERSATILE=y + CONFIG_SCSI=y + CONFIG_SCSI_SYM53C8XX_2=y + +You can then boot with a command line like: + +.. code-block:: bash + + $ qemu-system-arm -machine type=versatilepb \ + -serial mon:stdio \ + -drive if=scsi,driver=file,filename=debian-buster-armel-rootfs.ext4 \ + -kernel zImage \ + -dtb versatile-pb.dtb \ + -append "console=ttyAMA0 ro root=/dev/sda" diff --git a/docs/system/arm/vexpress.rst b/docs/system/arm/vexpress.rst index 7f1bcbef07..3e3839e923 100644 --- a/docs/system/arm/vexpress.rst +++ b/docs/system/arm/vexpress.rst @@ -58,3 +58,31 @@ Other differences between the hardware and the QEMU model: ``vexpress-a15``, and have IRQs from 40 upwards. If a dtb is provided on the command line then QEMU will edit it to include suitable entries describing these transports for the guest. + +Booting a Linux kernel +---------------------- + +Building a current Linux kernel with ``multi_v7_defconfig`` should be +enough to get something running. Nowadays an out-of-tree build is +recommended (and also useful if you build a lot of different targets). +In the following example $BLD points to the build directory and $SRC +points to the root of the Linux source tree. You can drop $SRC if you +are running from there. + +.. code-block:: bash + + $ make O=$BLD -C $SRC ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- multi_v7_defconfig + $ make O=$BLD -C $SRC ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- + +By default you will want to boot your rootfs off the sdcard interface. +Your rootfs will need to be padded to the right size. With a suitable +DTB you could also add devices to the virtio-mmio bus. + +.. code-block:: bash + + $ qemu-system-arm -cpu cortex-a15 -smp 4 -m 4096 \ + -machine type=vexpress-a15 -serial mon:stdio \ + -drive if=sd,driver=file,filename=armel-rootfs.ext4 \ + -kernel zImage \ + -dtb vexpress-v2p-ca15-tc1.dtb \ + -append "console=ttyAMA0 root=/dev/mmcblk0 ro" @@ -2245,7 +2245,6 @@ static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx) { TaskState *ts; unsigned long offset, len, saved_auxv, auxv_len; - const char *mem; if (gdb_ctx->num_params < 2) { put_packet("E22"); @@ -2257,8 +2256,8 @@ static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx) ts = gdbserver_state.c_cpu->opaque; saved_auxv = ts->info->saved_auxv; auxv_len = ts->info->auxv_len; - mem = (const char *)(saved_auxv + offset); - if (offset > auxv_len) { + + if (offset >= auxv_len) { put_packet("E00"); return; } @@ -2269,12 +2268,20 @@ static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx) if (len < auxv_len - offset) { g_string_assign(gdbserver_state.str_buf, "m"); - memtox(gdbserver_state.str_buf, mem, len); } else { g_string_assign(gdbserver_state.str_buf, "l"); - memtox(gdbserver_state.str_buf, mem, auxv_len - offset); + len = auxv_len - offset; + } + + g_byte_array_set_size(gdbserver_state.mem_buf, len); + if (target_memory_rw_debug(gdbserver_state.g_cpu, saved_auxv + offset, + gdbserver_state.mem_buf->data, len, false)) { + put_packet("E14"); + return; } + memtox(gdbserver_state.str_buf, + (const char *)gdbserver_state.mem_buf->data, len); put_packet_binary(gdbserver_state.str_buf->str, gdbserver_state.str_buf->len, true); } diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index b89f810034..24b4972300 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -2183,6 +2183,16 @@ static int spapr_pci_pre_save(void *opaque) return 0; } +static int spapr_pci_post_save(void *opaque) +{ + SpaprPhbState *sphb = opaque; + + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + sphb->msi_devs_num = 0; + return 0; +} + static int spapr_pci_post_load(void *opaque, int version_id) { SpaprPhbState *sphb = opaque; @@ -2215,6 +2225,7 @@ static const VMStateDescription vmstate_spapr_pci = { .version_id = 2, .minimum_version_id = 2, .pre_save = spapr_pci_pre_save, + .post_save = spapr_pci_post_save, .post_load = spapr_pci_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL), diff --git a/include/block/snapshot.h b/include/block/snapshot.h index b0fe42993d..940345692f 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -25,7 +25,7 @@ #ifndef SNAPSHOT_H #define SNAPSHOT_H - +#include "qapi/qapi-builtin-types.h" #define SNAPSHOT_OPT_BASE "snapshot." #define SNAPSHOT_OPT_ID "snapshot.id" @@ -77,17 +77,26 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, * These functions will properly handle dataplane (take aio_context_acquire * when appropriate for appropriate block drivers */ -bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); -int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, +bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + Error **errp); +int bdrv_all_delete_snapshot(const char *name, + bool has_devices, strList *devices, Error **errp); -int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, +int bdrv_all_goto_snapshot(const char *name, + bool has_devices, strList *devices, Error **errp); -int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_has_snapshot(const char *name, + bool has_devices, strList *devices, + Error **errp); int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, BlockDriverState *vm_state_bs, uint64_t vm_state_size, - BlockDriverState **first_bad_bs); + bool has_devices, + strList *devices, + Error **errp); -BlockDriverState *bdrv_all_find_vmstate_bs(void); +BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, + bool has_devices, strList *devices, + Error **errp); #endif diff --git a/include/exec/memory.h b/include/exec/memory.h index ecba90bfd8..ed292767cd 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -147,6 +147,14 @@ typedef struct IOMMUTLBEvent { /* RAM is a persistent kind memory */ #define RAM_PMEM (1 << 5) + +/* + * UFFDIO_WRITEPROTECT is used on this RAMBlock to + * support 'write-tracking' migration type. + * Implies ram_state->ram_wt_enabled. + */ +#define RAM_UF_WRITEPROTECT (1 << 6) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h index c85b6ec75b..e72083b117 100644 --- a/include/migration/snapshot.h +++ b/include/migration/snapshot.h @@ -15,7 +15,50 @@ #ifndef QEMU_MIGRATION_SNAPSHOT_H #define QEMU_MIGRATION_SNAPSHOT_H -int save_snapshot(const char *name, Error **errp); -int load_snapshot(const char *name, Error **errp); +#include "qapi/qapi-builtin-types.h" + +/** + * save_snapshot: Save an internal snapshot. + * @name: name of internal snapshot + * @overwrite: replace existing snapshot with @name + * @vmstate: blockdev node name to store VM state in + * @has_devices: whether to use explicit device list + * @devices: explicit device list to snapshot + * @errp: pointer to error object + * On success, return %true. + * On failure, store an error through @errp and return %false. + */ +bool save_snapshot(const char *name, bool overwrite, + const char *vmstate, + bool has_devices, strList *devices, + Error **errp); + +/** + * load_snapshot: Load an internal snapshot. + * @name: name of internal snapshot + * @vmstate: blockdev node name to load VM state from + * @has_devices: whether to use explicit device list + * @devices: explicit device list to snapshot + * @errp: pointer to error object + * On success, return %true. + * On failure, store an error through @errp and return %false. + */ +bool load_snapshot(const char *name, + const char *vmstate, + bool has_devices, strList *devices, + Error **errp); + +/** + * delete_snapshot: Delete a snapshot. + * @name: path to snapshot + * @has_devices: whether to use explicit device list + * @devices: explicit device list to snapshot + * @errp: pointer to error object + * On success, return %true. + * On failure, store an error through @errp and return %false. + */ +bool delete_snapshot(const char *name, + bool has_devices, strList *devices, + Error **errp); #endif diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h new file mode 100644 index 0000000000..6b74f92792 --- /dev/null +++ b/include/qemu/userfaultfd.h @@ -0,0 +1,35 @@ +/* + * Linux UFFD-WP support + * + * Copyright Virtuozzo GmbH, 2020 + * + * Authors: + * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef USERFAULTFD_H +#define USERFAULTFD_H + +#include "qemu/osdep.h" +#include "exec/hwaddr.h" +#include <linux/userfaultfd.h> + +int uffd_query_features(uint64_t *features); +int uffd_create_fd(uint64_t features, bool non_blocking); +void uffd_close_fd(int uffd_fd); +int uffd_register_memory(int uffd_fd, void *addr, uint64_t length, + uint64_t mode, uint64_t *ioctls); +int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length); +int uffd_change_protection(int uffd_fd, void *addr, uint64_t length, + bool wp, bool dont_wake); +int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, + uint64_t length, bool dont_wake); +int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake); +int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); +int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); +bool uffd_poll_events(int uffd_fd, int tmo); + +#endif /* USERFAULTFD_H */ diff --git a/migration/migration.c b/migration/migration.c index 1986cb8573..a5ddf43559 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -58,6 +58,7 @@ #include "qemu/queue.h" #include "multifd.h" #include "qemu/yank.h" +#include "sysemu/cpus.h" #ifdef CONFIG_VFIO #include "hw/vfio/vfio-common.h" @@ -134,6 +135,38 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; +/* Migration capabilities set */ +struct MigrateCapsSet { + int size; /* Capability set size */ + MigrationCapability caps[]; /* Variadic array of capabilities */ +}; +typedef struct MigrateCapsSet MigrateCapsSet; + +/* Define and initialize MigrateCapsSet */ +#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ + MigrateCapsSet _name = { \ + .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ + .caps = { __VA_ARGS__ } \ + } + +/* Background-snapshot compatibility check list */ +static const +INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_POSTCOPY_RAM, + MIGRATION_CAPABILITY_DIRTY_BITMAPS, + MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, + MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, + MIGRATION_CAPABILITY_RETURN_PATH, + MIGRATION_CAPABILITY_MULTIFD, + MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, + MIGRATION_CAPABILITY_AUTO_CONVERGE, + MIGRATION_CAPABILITY_RELEASE_RAM, + MIGRATION_CAPABILITY_RDMA_PIN_ALL, + MIGRATION_CAPABILITY_COMPRESS, + MIGRATION_CAPABILITY_XBZRLE, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); + /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -141,6 +174,8 @@ enum mig_rp_message_type { static MigrationState *current_migration; static MigrationIncomingState *current_incoming; +static GSList *migration_blockers; + static bool migration_object_check(MigrationState *ms, Error **errp); static int migration_maybe_pause(MigrationState *s, int *current_active_state, @@ -1041,6 +1076,27 @@ static void fill_source_migration_info(MigrationInfo *info) { MigrationState *s = migrate_get_current(); + info->blocked = migration_is_blocked(NULL); + info->has_blocked_reasons = info->blocked; + info->blocked_reasons = NULL; + if (info->blocked) { + GSList *cur_blocker = migration_blockers; + + /* + * There are two types of reasons a migration might be blocked; + * a) devices marked in VMState as non-migratable, and + * b) Explicit migration blockers + * We need to add both of them here. + */ + qemu_savevm_non_migratable_list(&info->blocked_reasons); + + while (cur_blocker) { + QAPI_LIST_PREPEND(info->blocked_reasons, + g_strdup(error_get_pretty(cur_blocker->data))); + cur_blocker = g_slist_next(cur_blocker); + } + } + switch (s->state) { case MIGRATION_STATUS_NONE: /* no migration has happened ever */ @@ -1089,6 +1145,31 @@ static void fill_source_migration_info(MigrationInfo *info) info->status = s->state; } +typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, + WT_SUPPORT_AVAILABLE, + WT_SUPPORT_COMPATIBLE +} WriteTrackingSupport; + +static +WriteTrackingSupport migrate_query_write_tracking(void) +{ + /* Check if kernel supports required UFFD features */ + if (!ram_write_tracking_available()) { + return WT_SUPPORT_ABSENT; + } + /* + * Check if current memory configuration is + * compatible with required UFFD features. + */ + if (!ram_write_tracking_compatible()) { + return WT_SUPPORT_AVAILABLE; + } + + return WT_SUPPORT_COMPATIBLE; +} + /** * @migration_caps_check - check capability validity * @@ -1150,6 +1231,39 @@ static bool migrate_caps_check(bool *cap_list, } } + if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { + WriteTrackingSupport wt_support; + int idx; + /* + * Check if 'background-snapshot' capability is supported by + * host kernel and compatible with guest memory configuration. + */ + wt_support = migrate_query_write_tracking(); + if (wt_support < WT_SUPPORT_AVAILABLE) { + error_setg(errp, "Background-snapshot is not supported by host kernel"); + return false; + } + if (wt_support < WT_SUPPORT_COMPATIBLE) { + error_setg(errp, "Background-snapshot is not compatible " + "with guest memory configuration"); + return false; + } + + /* + * Check if there are any migration capabilities + * incompatible with 'background-snapshot'. + */ + for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { + int incomp_cap = check_caps_background_snapshot.caps[idx]; + if (cap_list[incomp_cap]) { + error_setg(errp, + "Background-snapshot is not compatible with %s", + MigrationCapability_str(incomp_cap)); + return false; + } + } + } + return true; } @@ -1226,21 +1340,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) if (params->has_compress_level && (params->compress_level > 9)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", - "is invalid, it should be in the range of 0 to 9"); + "a value between 0 and 9"); return false; } if (params->has_compress_threads && (params->compress_threads < 1)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_threads", - "is invalid, it should be in the range of 1 to 255"); + "a value between 1 and 255"); return false; } if (params->has_decompress_threads && (params->decompress_threads < 1)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "decompress_threads", - "is invalid, it should be in the range of 1 to 255"); + "a value between 1 and 255"); return false; } @@ -1293,21 +1407,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) if (params->has_multifd_channels && (params->multifd_channels < 1)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_channels", - "is invalid, it should be in the range of 1 to 255"); + "a value between 1 and 255"); return false; } if (params->has_multifd_zlib_level && (params->multifd_zlib_level > 9)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", - "is invalid, it should be in the range of 0 to 9"); + "a value between 0 and 9"); return false; } if (params->has_multifd_zstd_level && (params->multifd_zstd_level > 20)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", - "is invalid, it should be in the range of 0 to 20"); + "a value between 0 and 20"); return false; } @@ -1316,8 +1430,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) !is_power_of_2(params->xbzrle_cache_size))) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "xbzrle_cache_size", - "is invalid, it should be bigger than target page size" - " and a power of 2"); + "a power of two no less than the target page size"); return false; } @@ -1334,21 +1447,21 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) params->announce_initial > 100000) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "announce_initial", - "is invalid, it must be less than 100000 ms"); + "a value between 0 and 100000"); return false; } if (params->has_announce_max && params->announce_max > 100000) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "announce_max", - "is invalid, it must be less than 100000 ms"); + "a value between 0 and 100000"); return false; } if (params->has_announce_rounds && params->announce_rounds > 1000) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "announce_rounds", - "is invalid, it must be in the range of 0 to 1000"); + "a value between 0 and 1000"); return false; } if (params->has_announce_step && @@ -1356,7 +1469,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) params->announce_step > 10000)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "announce_step", - "is invalid, it must be in the range of 1 to 10000 ms"); + "a value between 0 and 10000"); return false; } @@ -1909,6 +2022,7 @@ void migrate_init(MigrationState *s) * locks. */ s->cleanup_bh = 0; + s->vm_start_bh = 0; s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; s->rp_state.from_dst_file = NULL; @@ -1934,8 +2048,6 @@ void migrate_init(MigrationState *s) s->threshold_size = 0; } -static GSList *migration_blockers; - int migrate_add_blocker(Error *reason, Error **errp) { if (only_migratable) { @@ -2216,7 +2328,7 @@ void qmp_migrate_set_cache_size(int64_t value, Error **errp) qmp_migrate_set_parameters(&p, errp); } -int64_t qmp_query_migrate_cache_size(Error **errp) +uint64_t qmp_query_migrate_cache_size(Error **errp) { return migrate_xbzrle_cache_size(); } @@ -2446,7 +2558,7 @@ int migrate_use_xbzrle(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; } -int64_t migrate_xbzrle_cache_size(void) +uint64_t migrate_xbzrle_cache_size(void) { MigrationState *s; @@ -2491,6 +2603,15 @@ bool migrate_use_block_incremental(void) return s->parameters.block_incremental; } +bool migrate_background_snapshot(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; +} + /* migration thread support */ /* * Something bad happened to the RP stream, mark an error @@ -3117,6 +3238,50 @@ fail: MIGRATION_STATUS_FAILED); } +/** + * bg_migration_completion: Used by bg_migration_thread when after all the + * RAM has been saved. The caller 'breaks' the loop when this returns. + * + * @s: Current migration state + */ +static void bg_migration_completion(MigrationState *s) +{ + int current_active_state = s->state; + + /* + * Stop tracking RAM writes - un-protect memory, un-register UFFD + * memory ranges, flush kernel wait queues and wake up threads + * waiting for write fault to be resolved. + */ + ram_write_tracking_stop(); + + if (s->state == MIGRATION_STATUS_ACTIVE) { + /* + * By this moment we have RAM content saved into the migration stream. + * The next step is to flush the non-RAM content (device state) + * right after the ram content. The device state has been stored into + * the temporary buffer before RAM saving started. + */ + qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); + qemu_fflush(s->to_dst_file); + } else if (s->state == MIGRATION_STATUS_CANCELLING) { + goto fail; + } + + if (qemu_file_get_error(s->to_dst_file)) { + trace_migration_completion_file_err(); + goto fail; + } + + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_COMPLETED); + return; + +fail: + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_FAILED); +} + bool migrate_colo_enabled(void) { MigrationState *s = migrate_get_current(); @@ -3457,6 +3622,47 @@ static void migration_iteration_finish(MigrationState *s) qemu_mutex_unlock_iothread(); } +static void bg_migration_iteration_finish(MigrationState *s) +{ + qemu_mutex_lock_iothread(); + switch (s->state) { + case MIGRATION_STATUS_COMPLETED: + migration_calculate_complete(s); + break; + + case MIGRATION_STATUS_ACTIVE: + case MIGRATION_STATUS_FAILED: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_CANCELLING: + break; + + default: + /* Should not reach here, but if so, forgive the VM. */ + error_report("%s: Unknown ending state %d", __func__, s->state); + break; + } + + migrate_fd_cleanup_schedule(s); + qemu_mutex_unlock_iothread(); +} + +/* + * Return true if continue to the next iteration directly, false + * otherwise. + */ +static MigIterateState bg_migration_iteration_run(MigrationState *s) +{ + int res; + + res = qemu_savevm_state_iterate(s->to_dst_file, false); + if (res > 0) { + bg_migration_completion(s); + return MIG_ITERATE_BREAK; + } + + return MIG_ITERATE_RESUME; +} + void migration_make_urgent_request(void) { qemu_sem_post(&migrate_get_current()->rate_limit_sem); @@ -3604,6 +3810,165 @@ static void *migration_thread(void *opaque) return NULL; } +static void bg_migration_vm_start_bh(void *opaque) +{ + MigrationState *s = opaque; + + qemu_bh_delete(s->vm_start_bh); + s->vm_start_bh = NULL; + + vm_start(); + s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; +} + +/** + * Background snapshot thread, based on live migration code. + * This is an alternative implementation of live migration mechanism + * introduced specifically to support background snapshots. + * + * It takes advantage of userfault_fd write protection mechanism introduced + * in v5.7 kernel. Compared to existing dirty page logging migration much + * lesser stream traffic is produced resulting in smaller snapshot images, + * simply cause of no page duplicates can get into the stream. + * + * Another key point is that generated vmstate stream reflects machine state + * 'frozen' at the beginning of snapshot creation compared to dirty page logging + * mechanism, which effectively results in that saved snapshot is the state of VM + * at the end of the process. + */ +static void *bg_migration_thread(void *opaque) +{ + MigrationState *s = opaque; + int64_t setup_start; + MigThrError thr_error; + QEMUFile *fb; + bool early_fail = true; + + rcu_register_thread(); + object_ref(OBJECT(s)); + + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + + setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); + /* + * We want to save vmstate for the moment when migration has been + * initiated but also we want to save RAM content while VM is running. + * The RAM content should appear first in the vmstate. So, we first + * stash the non-RAM part of the vmstate to the temporary buffer, + * then write RAM part of the vmstate to the migration stream + * with vCPUs running and, finally, write stashed non-RAM part of + * the vmstate from the buffer to the migration stream. + */ + s->bioc = qio_channel_buffer_new(128 * 1024); + qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); + fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc)); + object_unref(OBJECT(s->bioc)); + + update_iteration_initial_status(s); + + qemu_savevm_state_header(s->to_dst_file); + qemu_savevm_state_setup(s->to_dst_file); + + if (qemu_savevm_state_guest_unplug_pending()) { + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_WAIT_UNPLUG); + + while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && + qemu_savevm_state_guest_unplug_pending()) { + qemu_sem_timedwait(&s->wait_unplug_sem, 250); + } + + migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, + MIGRATION_STATUS_ACTIVE); + } else { + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_ACTIVE); + } + s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; + + trace_migration_thread_setup_complete(); + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + qemu_mutex_lock_iothread(); + + /* + * If VM is currently in suspended state, then, to make a valid runstate + * transition in vm_stop_force_state() we need to wakeup it up. + */ + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); + s->vm_was_running = runstate_is_running(); + + if (global_state_store()) { + goto fail; + } + /* Forcibly stop VM before saving state of vCPUs and devices */ + if (vm_stop_force_state(RUN_STATE_PAUSED)) { + goto fail; + } + /* + * Put vCPUs in sync with shadow context structures, then + * save their state to channel-buffer along with devices. + */ + cpu_synchronize_all_states(); + if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { + goto fail; + } + /* Now initialize UFFD context and start tracking RAM writes */ + if (ram_write_tracking_start()) { + goto fail; + } + early_fail = false; + + /* + * Start VM from BH handler to avoid write-fault lock here. + * UFFD-WP protection for the whole RAM is already enabled so + * calling VM state change notifiers from vm_start() would initiate + * writes to virtio VQs memory which is in write-protected region. + */ + s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); + qemu_bh_schedule(s->vm_start_bh); + + qemu_mutex_unlock_iothread(); + + while (migration_is_active(s)) { + MigIterateState iter_state = bg_migration_iteration_run(s); + if (iter_state == MIG_ITERATE_SKIP) { + continue; + } else if (iter_state == MIG_ITERATE_BREAK) { + break; + } + + /* + * Try to detect any kind of failures, and see whether we + * should stop the migration now. + */ + thr_error = migration_detect_error(s); + if (thr_error == MIG_THR_ERR_FATAL) { + /* Stop migration */ + break; + } + + migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); + } + + trace_migration_thread_after_loop(); + +fail: + if (early_fail) { + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); + qemu_mutex_unlock_iothread(); + } + + bg_migration_iteration_finish(s); + + qemu_fclose(fb); + object_unref(OBJECT(s)); + rcu_unregister_thread(); + + return NULL; +} + void migrate_fd_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; @@ -3667,8 +4032,14 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) migrate_fd_cleanup(s); return; } - qemu_thread_create(&s->thread, "live_migration", migration_thread, s, - QEMU_THREAD_JOINABLE); + + if (migrate_background_snapshot()) { + qemu_thread_create(&s->thread, "bg_snapshot", + bg_migration_thread, s, QEMU_THREAD_JOINABLE); + } else { + qemu_thread_create(&s->thread, "live_migration", + migration_thread, s, QEMU_THREAD_JOINABLE); + } s->migration_thread_running = true; } @@ -3784,6 +4155,8 @@ static Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), DEFINE_PROP_END_OF_LIST(), }; diff --git a/migration/migration.h b/migration/migration.h index d096b77f74..db6708326b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -20,6 +20,7 @@ #include "qemu/thread.h" #include "qemu/coroutine_int.h" #include "io/channel.h" +#include "io/channel-buffer.h" #include "net/announce.h" #include "qom/object.h" @@ -147,8 +148,10 @@ struct MigrationState { /*< public >*/ QemuThread thread; + QEMUBH *vm_start_bh; QEMUBH *cleanup_bh; QEMUFile *to_dst_file; + QIOChannelBuffer *bioc; /* * Protects to_dst_file pointer. We need to make sure we won't * yield or hang during the critical section, since this lock will @@ -324,7 +327,7 @@ int migrate_multifd_zlib_level(void); int migrate_multifd_zstd_level(void); int migrate_use_xbzrle(void); -int64_t migrate_xbzrle_cache_size(void); +uint64_t migrate_xbzrle_cache_size(void); bool migrate_colo_enabled(void); bool migrate_use_block(void); @@ -341,6 +344,7 @@ int migrate_compress_wait_thread(void); int migrate_decompress_threads(void); bool migrate_use_events(void); bool migrate_postcopy_blocktime(void); +bool migrate_background_snapshot(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/migration/page_cache.c b/migration/page_cache.c index 098b436223..6d4f7a9bbc 100644 --- a/migration/page_cache.c +++ b/migration/page_cache.c @@ -38,7 +38,7 @@ struct PageCache { size_t num_items; }; -PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp) +PageCache *cache_init(uint64_t new_size, size_t page_size, Error **errp) { int64_t i; size_t num_pages = new_size / page_size; @@ -60,8 +60,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp) /* We prefer not to abort if there is no memory */ cache = g_try_malloc(sizeof(*cache)); if (!cache) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "Failed to allocate cache"); + error_setg(errp, "Failed to allocate cache"); return NULL; } cache->page_size = page_size; @@ -74,8 +73,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp) cache->page_cache = g_try_malloc((cache->max_num_items) * sizeof(*cache->page_cache)); if (!cache->page_cache) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "Failed to allocate page cache"); + error_setg(errp, "Failed to allocate page cache"); g_free(cache); return NULL; } diff --git a/migration/page_cache.h b/migration/page_cache.h index 0cb94498a0..8733b4df6e 100644 --- a/migration/page_cache.h +++ b/migration/page_cache.h @@ -28,7 +28,7 @@ typedef struct PageCache PageCache; * @page_size: cache page size * @errp: set *errp if the check failed, with reason */ -PageCache *cache_init(int64_t cache_size, size_t page_size, Error **errp); +PageCache *cache_init(uint64_t cache_size, size_t page_size, Error **errp); /** * cache_fini: free all cache resources * @cache pointer to the PageCache struct diff --git a/migration/qemu-file.c b/migration/qemu-file.c index be21518c57..d6e03dbc0e 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -595,7 +595,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) { if (size < IO_BUF_SIZE) { size_t res; - uint8_t *src; + uint8_t *src = NULL; res = qemu_peek_buffer(f, &src, size, 0); diff --git a/migration/ram.c b/migration/ram.c index 7811cde643..72143da0ac 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -56,6 +56,11 @@ #include "savevm.h" #include "qemu/iov.h" #include "multifd.h" +#include "sysemu/runstate.h" + +#if defined(__linux__) +#include "qemu/userfaultfd.h" +#endif /* defined(__linux__) */ /***********************************************************/ /* ram save/restore */ @@ -126,7 +131,7 @@ static void XBZRLE_cache_unlock(void) * @new_size: new cache size * @errp: set *errp if the check failed, with reason */ -int xbzrle_cache_resize(int64_t new_size, Error **errp) +int xbzrle_cache_resize(uint64_t new_size, Error **errp) { PageCache *new_cache; int64_t ret = 0; @@ -298,6 +303,8 @@ struct RAMSrcPageRequest { struct RAMState { /* QEMUFile used for this migration */ QEMUFile *f; + /* UFFD file descriptor, used in 'write-tracking' migration */ + int uffdio_fd; /* Last block that we have visited searching for dirty pages */ RAMBlock *last_seen_block; /* Last block from where we have sent data */ @@ -1434,6 +1441,269 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) return block; } +#if defined(__linux__) +/** + * poll_fault_page: try to get next UFFD write fault page and, if pending fault + * is found, return RAM block pointer and page offset + * + * Returns pointer to the RAMBlock containing faulting page, + * NULL if no write faults are pending + * + * @rs: current RAM state + * @offset: page offset from the beginning of the block + */ +static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset) +{ + struct uffd_msg uffd_msg; + void *page_address; + RAMBlock *bs; + int res; + + if (!migrate_background_snapshot()) { + return NULL; + } + + res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1); + if (res <= 0) { + return NULL; + } + + page_address = (void *)(uintptr_t) uffd_msg.arg.pagefault.address; + bs = qemu_ram_block_from_host(page_address, false, offset); + assert(bs && (bs->flags & RAM_UF_WRITEPROTECT) != 0); + return bs; +} + +/** + * ram_save_release_protection: release UFFD write protection after + * a range of pages has been saved + * + * @rs: current RAM state + * @pss: page-search-status structure + * @start_page: index of the first page in the range relative to pss->block + * + * Returns 0 on success, negative value in case of an error +*/ +static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss, + unsigned long start_page) +{ + int res = 0; + + /* Check if page is from UFFD-managed region. */ + if (pss->block->flags & RAM_UF_WRITEPROTECT) { + void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS); + uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS; + + /* Flush async buffers before un-protect. */ + qemu_fflush(rs->f); + /* Un-protect memory range. */ + res = uffd_change_protection(rs->uffdio_fd, page_address, run_length, + false, false); + } + + return res; +} + +/* ram_write_tracking_available: check if kernel supports required UFFD features + * + * Returns true if supports, false otherwise + */ +bool ram_write_tracking_available(void) +{ + uint64_t uffd_features; + int res; + + res = uffd_query_features(&uffd_features); + return (res == 0 && + (uffd_features & UFFD_FEATURE_PAGEFAULT_FLAG_WP) != 0); +} + +/* ram_write_tracking_compatible: check if guest configuration is + * compatible with 'write-tracking' + * + * Returns true if compatible, false otherwise + */ +bool ram_write_tracking_compatible(void) +{ + const uint64_t uffd_ioctls_mask = BIT(_UFFDIO_WRITEPROTECT); + int uffd_fd; + RAMBlock *bs; + bool ret = false; + + /* Open UFFD file descriptor */ + uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, false); + if (uffd_fd < 0) { + return false; + } + + RCU_READ_LOCK_GUARD(); + + RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + uint64_t uffd_ioctls; + + /* Nothing to do with read-only and MMIO-writable regions */ + if (bs->mr->readonly || bs->mr->rom_device) { + continue; + } + /* Try to register block memory via UFFD-IO to track writes */ + if (uffd_register_memory(uffd_fd, bs->host, bs->max_length, + UFFDIO_REGISTER_MODE_WP, &uffd_ioctls)) { + goto out; + } + if ((uffd_ioctls & uffd_ioctls_mask) != uffd_ioctls_mask) { + goto out; + } + } + ret = true; + +out: + uffd_close_fd(uffd_fd); + return ret; +} + +/* + * ram_write_tracking_start: start UFFD-WP memory tracking + * + * Returns 0 for success or negative value in case of error + */ +int ram_write_tracking_start(void) +{ + int uffd_fd; + RAMState *rs = ram_state; + RAMBlock *bs; + + /* Open UFFD file descriptor */ + uffd_fd = uffd_create_fd(UFFD_FEATURE_PAGEFAULT_FLAG_WP, true); + if (uffd_fd < 0) { + return uffd_fd; + } + rs->uffdio_fd = uffd_fd; + + RCU_READ_LOCK_GUARD(); + + RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + /* Nothing to do with read-only and MMIO-writable regions */ + if (bs->mr->readonly || bs->mr->rom_device) { + continue; + } + + /* Register block memory with UFFD to track writes */ + if (uffd_register_memory(rs->uffdio_fd, bs->host, + bs->max_length, UFFDIO_REGISTER_MODE_WP, NULL)) { + goto fail; + } + /* Apply UFFD write protection to the block memory range */ + if (uffd_change_protection(rs->uffdio_fd, bs->host, + bs->max_length, true, false)) { + goto fail; + } + bs->flags |= RAM_UF_WRITEPROTECT; + memory_region_ref(bs->mr); + + trace_ram_write_tracking_ramblock_start(bs->idstr, bs->page_size, + bs->host, bs->max_length); + } + + return 0; + +fail: + error_report("ram_write_tracking_start() failed: restoring initial memory state"); + + RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + if ((bs->flags & RAM_UF_WRITEPROTECT) == 0) { + continue; + } + /* + * In case some memory block failed to be write-protected + * remove protection and unregister all succeeded RAM blocks + */ + uffd_change_protection(rs->uffdio_fd, bs->host, bs->max_length, false, false); + uffd_unregister_memory(rs->uffdio_fd, bs->host, bs->max_length); + /* Cleanup flags and remove reference */ + bs->flags &= ~RAM_UF_WRITEPROTECT; + memory_region_unref(bs->mr); + } + + uffd_close_fd(uffd_fd); + rs->uffdio_fd = -1; + return -1; +} + +/** + * ram_write_tracking_stop: stop UFFD-WP memory tracking and remove protection + */ +void ram_write_tracking_stop(void) +{ + RAMState *rs = ram_state; + RAMBlock *bs; + + RCU_READ_LOCK_GUARD(); + + RAMBLOCK_FOREACH_NOT_IGNORED(bs) { + if ((bs->flags & RAM_UF_WRITEPROTECT) == 0) { + continue; + } + /* Remove protection and unregister all affected RAM blocks */ + uffd_change_protection(rs->uffdio_fd, bs->host, bs->max_length, false, false); + uffd_unregister_memory(rs->uffdio_fd, bs->host, bs->max_length); + + trace_ram_write_tracking_ramblock_stop(bs->idstr, bs->page_size, + bs->host, bs->max_length); + + /* Cleanup flags and remove reference */ + bs->flags &= ~RAM_UF_WRITEPROTECT; + memory_region_unref(bs->mr); + } + + /* Finally close UFFD file descriptor */ + uffd_close_fd(rs->uffdio_fd); + rs->uffdio_fd = -1; +} + +#else +/* No target OS support, stubs just fail or ignore */ + +static RAMBlock *poll_fault_page(RAMState *rs, ram_addr_t *offset) +{ + (void) rs; + (void) offset; + + return NULL; +} + +static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss, + unsigned long start_page) +{ + (void) rs; + (void) pss; + (void) start_page; + + return 0; +} + +bool ram_write_tracking_available(void) +{ + return false; +} + +bool ram_write_tracking_compatible(void) +{ + assert(0); + return false; +} + +int ram_write_tracking_start(void) +{ + assert(0); + return -1; +} + +void ram_write_tracking_stop(void) +{ + assert(0); +} +#endif /* defined(__linux__) */ + /** * get_queued_page: unqueue a page from the postcopy requests * @@ -1473,6 +1743,14 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) } while (block && !dirty); + if (!block) { + /* + * Poll write faults too if background snapshot is enabled; that's + * when we have vcpus got blocked by the write protected pages. + */ + block = poll_fault_page(rs, &offset); + } + if (block) { /* * As soon as we start servicing pages out of order, then we have @@ -1715,6 +1993,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, int tmppages, pages = 0; size_t pagesize_bits = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; + unsigned long start_page = pss->page; + int res; if (ramblock_is_ignored(pss->block)) { error_report("block %s should not be migrated !", pss->block->idstr); @@ -1740,10 +2020,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, } while ((pss->page & (pagesize_bits - 1)) && offset_in_ramblock(pss->block, ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); - /* The offset we leave with is the last one we looked at */ pss->page--; - return pages; + + res = ram_save_release_protection(rs, pss, start_page); + return (res < 0 ? res : pages); } /** @@ -1880,10 +2161,13 @@ static void ram_save_cleanup(void *opaque) RAMState **rsp = opaque; RAMBlock *block; - /* caller have hold iothread lock or is in a bh, so there is - * no writing race against the migration bitmap - */ - memory_global_dirty_log_stop(); + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { + /* caller have hold iothread lock or is in a bh, so there is + * no writing race against the migration bitmap + */ + memory_global_dirty_log_stop(); + } RAMBLOCK_FOREACH_NOT_IGNORED(block) { g_free(block->clear_bmap); @@ -2343,8 +2627,11 @@ static void ram_init_bitmaps(RAMState *rs) WITH_RCU_READ_LOCK_GUARD() { ram_list_init_bitmaps(); - memory_global_dirty_log_start(); - migration_bitmap_sync_precopy(rs); + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { + memory_global_dirty_log_start(); + migration_bitmap_sync_precopy(rs); + } } qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); @@ -3521,7 +3808,7 @@ static int ram_load_precopy(QEMUFile *f) } } /* For postcopy we need to check hugepage sizes match */ - if (postcopy_advised && + if (postcopy_advised && migrate_postcopy_ram() && block->page_size != qemu_host_page_size) { uint64_t remote_page_size = qemu_get_be64(f); if (remote_page_size != block->page_size) { diff --git a/migration/ram.h b/migration/ram.h index 011e85414e..6378bb3ebc 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -47,7 +47,7 @@ bool ramblock_is_ignored(RAMBlock *block); INTERNAL_RAMBLOCK_FOREACH(block) \ if (!qemu_ram_is_migratable(block)) {} else -int xbzrle_cache_resize(int64_t new_size, Error **errp); +int xbzrle_cache_resize(uint64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); @@ -79,4 +79,10 @@ void colo_flush_ram_cache(void); void colo_release_ram_cache(void); void colo_incoming_start_dirty_log(void); +/* Background snapshot */ +bool ram_write_tracking_available(void); +bool ram_write_tracking_compatible(void); +int ram_write_tracking_start(void); +void ram_write_tracking_stop(void); + #endif diff --git a/migration/savevm.c b/migration/savevm.c index 4f3b69ecfc..52e2d72e4b 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -43,6 +43,8 @@ #include "qapi/error.h" #include "qapi/qapi-commands-migration.h" #include "qapi/qmp/json-writer.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-builtin-visit.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "sysemu/cpus.h" @@ -315,6 +317,16 @@ static int configuration_pre_save(void *opaque) return 0; } +static int configuration_post_save(void *opaque) +{ + SaveState *state = opaque; + + g_free(state->capabilities); + state->capabilities = NULL; + state->caps_count = 0; + return 0; +} + static int configuration_pre_load(void *opaque) { SaveState *state = opaque; @@ -365,24 +377,36 @@ static int configuration_post_load(void *opaque, int version_id) { SaveState *state = opaque; const char *current_name = MACHINE_GET_CLASS(current_machine)->name; + int ret = 0; if (strncmp(state->name, current_name, state->len) != 0) { error_report("Machine type received is '%.*s' and local is '%s'", (int) state->len, state->name, current_name); - return -EINVAL; + ret = -EINVAL; + goto out; } if (state->target_page_bits != qemu_target_page_bits()) { error_report("Received TARGET_PAGE_BITS is %d but local is %d", state->target_page_bits, qemu_target_page_bits()); - return -EINVAL; + ret = -EINVAL; + goto out; } if (!configuration_validate_capabilities(state)) { - return -EINVAL; + ret = -EINVAL; + goto out; } - return 0; +out: + g_free((void *)state->name); + state->name = NULL; + state->len = 0; + g_free(state->capabilities); + state->capabilities = NULL; + state->caps_count = 0; + + return ret; } static int get_capability(QEMUFile *f, void *pv, size_t size, @@ -516,6 +540,7 @@ static const VMStateDescription vmstate_configuration = { .pre_load = configuration_pre_load, .post_load = configuration_post_load, .pre_save = configuration_pre_save, + .post_save = configuration_post_save, .fields = (VMStateField[]) { VMSTATE_UINT32(len, SaveState), VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len), @@ -1131,6 +1156,19 @@ bool qemu_savevm_state_blocked(Error **errp) return false; } +void qemu_savevm_non_migratable_list(strList **reasons) +{ + SaveStateEntry *se; + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (se->vmsd && se->vmsd->unmigratable) { + QAPI_LIST_PREPEND(*reasons, + g_strdup_printf("non-migratable device: %s", + se->idstr)); + } + } +} + void qemu_savevm_state_header(QEMUFile *f) { trace_savevm_state_header(); @@ -1355,7 +1393,6 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) return 0; } -static int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, bool in_postcopy, bool inactivate_disks) @@ -2729,9 +2766,10 @@ int qemu_load_device_state(QEMUFile *f) return 0; } -int save_snapshot(const char *name, Error **errp) +bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + bool has_devices, strList *devices, Error **errp) { - BlockDriverState *bs, *bs1; + BlockDriverState *bs; QEMUSnapshotInfo sn1, *sn = &sn1; int ret = -1, ret2; QEMUFile *f; @@ -2742,35 +2780,43 @@ int save_snapshot(const char *name, Error **errp) AioContext *aio_context; if (migration_is_blocked(errp)) { - return ret; + return false; } if (!replay_can_snapshot()) { error_setg(errp, "Record/replay does not allow making snapshot " "right now. Try once more later."); - return ret; + return false; } - if (!bdrv_all_can_snapshot(&bs)) { - error_setg(errp, "Device '%s' is writable but does not support " - "snapshots", bdrv_get_device_or_node_name(bs)); - return ret; + if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { + return false; } /* Delete old snapshots of the same name */ if (name) { - ret = bdrv_all_delete_snapshot(name, &bs1, errp); - if (ret < 0) { - error_prepend(errp, "Error while deleting snapshot on device " - "'%s': ", bdrv_get_device_or_node_name(bs1)); - return ret; + if (overwrite) { + if (bdrv_all_delete_snapshot(name, has_devices, + devices, errp) < 0) { + return false; + } + } else { + ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp); + if (ret2 < 0) { + return false; + } + if (ret2 == 1) { + error_setg(errp, + "Snapshot '%s' already exists in one or more devices", + name); + return false; + } } } - bs = bdrv_all_find_vmstate_bs(); + bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp); if (bs == NULL) { - error_setg(errp, "No block device can accept snapshots"); - return ret; + return false; } aio_context = bdrv_get_aio_context(bs); @@ -2779,7 +2825,7 @@ int save_snapshot(const char *name, Error **errp) ret = global_state_store(); if (ret) { error_setg(errp, "Error saving global state"); - return ret; + return false; } vm_stop(RUN_STATE_SAVE_VM); @@ -2833,11 +2879,10 @@ int save_snapshot(const char *name, Error **errp) aio_context_release(aio_context); aio_context = NULL; - ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs); + ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, + has_devices, devices, errp); if (ret < 0) { - error_setg(errp, "Error while creating snapshot on '%s'", - bdrv_get_device_or_node_name(bs)); - bdrv_all_delete_snapshot(sn->name, &bs, NULL); + bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL); goto the_end; } @@ -2853,7 +2898,7 @@ int save_snapshot(const char *name, Error **errp) if (saved_vm_running) { vm_start(); } - return ret; + return ret == 0; } void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live, @@ -2938,33 +2983,32 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp) migration_incoming_state_destroy(); } -int load_snapshot(const char *name, Error **errp) +bool load_snapshot(const char *name, const char *vmstate, + bool has_devices, strList *devices, Error **errp) { - BlockDriverState *bs, *bs_vm_state; + BlockDriverState *bs_vm_state; QEMUSnapshotInfo sn; QEMUFile *f; int ret; AioContext *aio_context; MigrationIncomingState *mis = migration_incoming_get_current(); - if (!bdrv_all_can_snapshot(&bs)) { - error_setg(errp, - "Device '%s' is writable but does not support snapshots", - bdrv_get_device_or_node_name(bs)); - return -ENOTSUP; + if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { + return false; } - ret = bdrv_all_find_snapshot(name, &bs); + ret = bdrv_all_has_snapshot(name, has_devices, devices, errp); if (ret < 0) { - error_setg(errp, - "Device '%s' does not have the requested snapshot '%s'", - bdrv_get_device_or_node_name(bs), name); - return ret; + return false; + } + if (ret == 0) { + error_setg(errp, "Snapshot '%s' does not exist in one or more devices", + name); + return false; } - bs_vm_state = bdrv_all_find_vmstate_bs(); + bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp); if (!bs_vm_state) { - error_setg(errp, "No block device supports snapshots"); - return -ENOTSUP; + return false; } aio_context = bdrv_get_aio_context(bs_vm_state); @@ -2973,11 +3017,11 @@ int load_snapshot(const char *name, Error **errp) ret = bdrv_snapshot_find(bs_vm_state, &sn, name); aio_context_release(aio_context); if (ret < 0) { - return ret; + return false; } else if (sn.vm_state_size == 0) { error_setg(errp, "This is a disk-only snapshot. Revert to it " " offline using qemu-img"); - return -EINVAL; + return false; } /* @@ -2989,10 +3033,8 @@ int load_snapshot(const char *name, Error **errp) /* Flush all IO requests so they don't interfere with the new state. */ bdrv_drain_all_begin(); - ret = bdrv_all_goto_snapshot(name, &bs, errp); + ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp); if (ret < 0) { - error_prepend(errp, "Could not load snapshot '%s' on '%s': ", - name, bdrv_get_device_or_node_name(bs)); goto err_drain; } @@ -3000,7 +3042,6 @@ int load_snapshot(const char *name, Error **errp) f = qemu_fopen_bdrv(bs_vm_state, 0); if (!f) { error_setg(errp, "Could not open VM state file"); - ret = -EINVAL; goto err_drain; } @@ -3020,14 +3061,28 @@ int load_snapshot(const char *name, Error **errp) if (ret < 0) { error_setg(errp, "Error %d while loading VM state", ret); - return ret; + return false; } - return 0; + return true; err_drain: bdrv_drain_all_end(); - return ret; + return false; +} + +bool delete_snapshot(const char *name, bool has_devices, + strList *devices, Error **errp) +{ + if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { + return false; + } + + if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) { + return false; + } + + return true; } void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) @@ -3057,3 +3112,187 @@ bool vmstate_check_only_migratable(const VMStateDescription *vmsd) return !(vmsd && vmsd->unmigratable); } + +typedef struct SnapshotJob { + Job common; + char *tag; + char *vmstate; + strList *devices; + Coroutine *co; + Error **errp; + bool ret; +} SnapshotJob; + +static void qmp_snapshot_job_free(SnapshotJob *s) +{ + g_free(s->tag); + g_free(s->vmstate); + qapi_free_strList(s->devices); +} + + +static void snapshot_load_job_bh(void *opaque) +{ + Job *job = opaque; + SnapshotJob *s = container_of(job, SnapshotJob, common); + int orig_vm_running; + + job_progress_set_remaining(&s->common, 1); + + orig_vm_running = runstate_is_running(); + vm_stop(RUN_STATE_RESTORE_VM); + + s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp); + if (s->ret && orig_vm_running) { + vm_start(); + } + + job_progress_update(&s->common, 1); + + qmp_snapshot_job_free(s); + aio_co_wake(s->co); +} + +static void snapshot_save_job_bh(void *opaque) +{ + Job *job = opaque; + SnapshotJob *s = container_of(job, SnapshotJob, common); + + job_progress_set_remaining(&s->common, 1); + s->ret = save_snapshot(s->tag, false, s->vmstate, + true, s->devices, s->errp); + job_progress_update(&s->common, 1); + + qmp_snapshot_job_free(s); + aio_co_wake(s->co); +} + +static void snapshot_delete_job_bh(void *opaque) +{ + Job *job = opaque; + SnapshotJob *s = container_of(job, SnapshotJob, common); + + job_progress_set_remaining(&s->common, 1); + s->ret = delete_snapshot(s->tag, true, s->devices, s->errp); + job_progress_update(&s->common, 1); + + qmp_snapshot_job_free(s); + aio_co_wake(s->co); +} + +static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp) +{ + SnapshotJob *s = container_of(job, SnapshotJob, common); + s->errp = errp; + s->co = qemu_coroutine_self(); + aio_bh_schedule_oneshot(qemu_get_aio_context(), + snapshot_save_job_bh, job); + qemu_coroutine_yield(); + return s->ret ? 0 : -1; +} + +static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp) +{ + SnapshotJob *s = container_of(job, SnapshotJob, common); + s->errp = errp; + s->co = qemu_coroutine_self(); + aio_bh_schedule_oneshot(qemu_get_aio_context(), + snapshot_load_job_bh, job); + qemu_coroutine_yield(); + return s->ret ? 0 : -1; +} + +static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp) +{ + SnapshotJob *s = container_of(job, SnapshotJob, common); + s->errp = errp; + s->co = qemu_coroutine_self(); + aio_bh_schedule_oneshot(qemu_get_aio_context(), + snapshot_delete_job_bh, job); + qemu_coroutine_yield(); + return s->ret ? 0 : -1; +} + + +static const JobDriver snapshot_load_job_driver = { + .instance_size = sizeof(SnapshotJob), + .job_type = JOB_TYPE_SNAPSHOT_LOAD, + .run = snapshot_load_job_run, +}; + +static const JobDriver snapshot_save_job_driver = { + .instance_size = sizeof(SnapshotJob), + .job_type = JOB_TYPE_SNAPSHOT_SAVE, + .run = snapshot_save_job_run, +}; + +static const JobDriver snapshot_delete_job_driver = { + .instance_size = sizeof(SnapshotJob), + .job_type = JOB_TYPE_SNAPSHOT_DELETE, + .run = snapshot_delete_job_run, +}; + + +void qmp_snapshot_save(const char *job_id, + const char *tag, + const char *vmstate, + strList *devices, + Error **errp) +{ + SnapshotJob *s; + + s = job_create(job_id, &snapshot_save_job_driver, NULL, + qemu_get_aio_context(), JOB_MANUAL_DISMISS, + NULL, NULL, errp); + if (!s) { + return; + } + + s->tag = g_strdup(tag); + s->vmstate = g_strdup(vmstate); + s->devices = QAPI_CLONE(strList, devices); + + job_start(&s->common); +} + +void qmp_snapshot_load(const char *job_id, + const char *tag, + const char *vmstate, + strList *devices, + Error **errp) +{ + SnapshotJob *s; + + s = job_create(job_id, &snapshot_load_job_driver, NULL, + qemu_get_aio_context(), JOB_MANUAL_DISMISS, + NULL, NULL, errp); + if (!s) { + return; + } + + s->tag = g_strdup(tag); + s->vmstate = g_strdup(vmstate); + s->devices = QAPI_CLONE(strList, devices); + + job_start(&s->common); +} + +void qmp_snapshot_delete(const char *job_id, + const char *tag, + strList *devices, + Error **errp) +{ + SnapshotJob *s; + + s = job_create(job_id, &snapshot_delete_job_driver, NULL, + qemu_get_aio_context(), JOB_MANUAL_DISMISS, + NULL, NULL, errp); + if (!s) { + return; + } + + s->tag = g_strdup(tag); + s->devices = QAPI_CLONE(strList, devices); + + job_start(&s->common); +} diff --git a/migration/savevm.h b/migration/savevm.h index ba64a7e271..6461342cb4 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -30,6 +30,7 @@ #define QEMU_VM_SECTION_FOOTER 0x7e bool qemu_savevm_state_blocked(Error **errp); +void qemu_savevm_non_migratable_list(strList **reasons); void qemu_savevm_state_setup(QEMUFile *f); bool qemu_savevm_state_guest_unplug_pending(void); int qemu_savevm_state_resume_prepare(MigrationState *s); @@ -64,5 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); void qemu_loadvm_state_cleanup(void); int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); int qemu_load_device_state(QEMUFile *f); +int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + bool in_postcopy, bool inactivate_disks); #endif diff --git a/migration/trace-events b/migration/trace-events index 75de5004ac..668c562fed 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -111,6 +111,8 @@ save_xbzrle_page_skipping(void) "" save_xbzrle_page_overflow(void) "" ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations" ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 +ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" +ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" # multifd.c multifd_new_send_channel_async(uint8_t id) "channel %d" diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index a48bc1e904..3c88a4faef 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -224,6 +224,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) migration_global_dump(mon); + if (info->blocked) { + strList *reasons = info->blocked_reasons; + monitor_printf(mon, "Outgoing migration blocked:\n"); + while (reasons) { + monitor_printf(mon, " %s\n", reasons->value); + reasons = reasons->next; + } + } + if (info->has_status) { monitor_printf(mon, "Migration status: %s", MigrationStatus_str(info->status)); @@ -1130,7 +1139,7 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict) vm_stop(RUN_STATE_RESTORE_VM); - if (load_snapshot(name, &err) == 0 && saved_vm_running) { + if (!load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { vm_start(); } hmp_handle_error(mon, err); @@ -1140,21 +1149,17 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) { Error *err = NULL; - save_snapshot(qdict_get_try_str(qdict, "name"), &err); + save_snapshot(qdict_get_try_str(qdict, "name"), + true, NULL, false, NULL, &err); hmp_handle_error(mon, err); } void hmp_delvm(Monitor *mon, const QDict *qdict) { - BlockDriverState *bs; Error *err = NULL; const char *name = qdict_get_str(qdict, "name"); - if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) { - error_prepend(&err, - "deleting snapshot on device '%s': ", - bdrv_get_device_name(bs)); - } + delete_snapshot(name, false, NULL, &err); hmp_handle_error(mon, err); } @@ -1294,11 +1299,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) switch (val) { case MIGRATION_PARAMETER_COMPRESS_LEVEL: p->has_compress_level = true; - visit_type_int(v, param, &p->compress_level, &err); + visit_type_uint8(v, param, &p->compress_level, &err); break; case MIGRATION_PARAMETER_COMPRESS_THREADS: p->has_compress_threads = true; - visit_type_int(v, param, &p->compress_threads, &err); + visit_type_uint8(v, param, &p->compress_threads, &err); break; case MIGRATION_PARAMETER_COMPRESS_WAIT_THREAD: p->has_compress_wait_thread = true; @@ -1306,19 +1311,19 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) break; case MIGRATION_PARAMETER_DECOMPRESS_THREADS: p->has_decompress_threads = true; - visit_type_int(v, param, &p->decompress_threads, &err); + visit_type_uint8(v, param, &p->decompress_threads, &err); break; case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD: p->has_throttle_trigger_threshold = true; - visit_type_int(v, param, &p->throttle_trigger_threshold, &err); + visit_type_uint8(v, param, &p->throttle_trigger_threshold, &err); break; case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL: p->has_cpu_throttle_initial = true; - visit_type_int(v, param, &p->cpu_throttle_initial, &err); + visit_type_uint8(v, param, &p->cpu_throttle_initial, &err); break; case MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT: p->has_cpu_throttle_increment = true; - visit_type_int(v, param, &p->cpu_throttle_increment, &err); + visit_type_uint8(v, param, &p->cpu_throttle_increment, &err); break; case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW: p->has_cpu_throttle_tailslow = true; @@ -1326,7 +1331,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) break; case MIGRATION_PARAMETER_MAX_CPU_THROTTLE: p->has_max_cpu_throttle = true; - visit_type_int(v, param, &p->max_cpu_throttle, &err); + visit_type_uint8(v, param, &p->max_cpu_throttle, &err); break; case MIGRATION_PARAMETER_TLS_CREDS: p->has_tls_creds = true; @@ -1362,11 +1367,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) break; case MIGRATION_PARAMETER_DOWNTIME_LIMIT: p->has_downtime_limit = true; - visit_type_int(v, param, &p->downtime_limit, &err); + visit_type_size(v, param, &p->downtime_limit, &err); break; case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY: p->has_x_checkpoint_delay = true; - visit_type_int(v, param, &p->x_checkpoint_delay, &err); + visit_type_uint32(v, param, &p->x_checkpoint_delay, &err); break; case MIGRATION_PARAMETER_BLOCK_INCREMENTAL: p->has_block_incremental = true; @@ -1374,7 +1379,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) break; case MIGRATION_PARAMETER_MULTIFD_CHANNELS: p->has_multifd_channels = true; - visit_type_int(v, param, &p->multifd_channels, &err); + visit_type_uint8(v, param, &p->multifd_channels, &err); break; case MIGRATION_PARAMETER_MULTIFD_COMPRESSION: p->has_multifd_compression = true; @@ -1383,11 +1388,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) break; case MIGRATION_PARAMETER_MULTIFD_ZLIB_LEVEL: p->has_multifd_zlib_level = true; - visit_type_int(v, param, &p->multifd_zlib_level, &err); + visit_type_uint8(v, param, &p->multifd_zlib_level, &err); break; case MIGRATION_PARAMETER_MULTIFD_ZSTD_LEVEL: p->has_multifd_zstd_level = true; - visit_type_int(v, param, &p->multifd_zstd_level, &err); + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); break; case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: p->has_xbzrle_cache_size = true; diff --git a/qapi/job.json b/qapi/job.json index 280c2f76f1..1a6ef03451 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -22,10 +22,17 @@ # # @amend: image options amend job type, see "x-blockdev-amend" (since 5.1) # +# @snapshot-load: snapshot load job type, see "snapshot-load" (since 6.0) +# +# @snapshot-save: snapshot save job type, see "snapshot-save" (since 6.0) +# +# @snapshot-delete: snapshot delete job type, see "snapshot-delete" (since 6.0) +# # Since: 1.7 ## { 'enum': 'JobType', - 'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend'] } + 'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend', + 'snapshot-load', 'snapshot-save', 'snapshot-delete'] } ## # @JobStatus: diff --git a/qapi/migration.json b/qapi/migration.json index d1d9632c2a..ce14d78071 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -78,7 +78,7 @@ # Since: 1.2 ## { 'struct': 'XBZRLECacheStats', - 'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int', + 'data': {'cache-size': 'size', 'bytes': 'int', 'pages': 'int', 'cache-miss': 'int', 'cache-miss-rate': 'number', 'encoding-rate': 'number', 'overflow': 'int' } } @@ -224,6 +224,10 @@ # only returned if VFIO device is present, migration is supported by all # VFIO devices and status is 'active' or 'completed' (since 5.2) # +# @blocked: True if outgoing migration is blocked (since 6.0) +# +# @blocked-reasons: A list of reasons an outgoing migration is blocked (since 6.0) +# # Since: 0.14 ## { 'struct': 'MigrationInfo', @@ -237,6 +241,8 @@ '*setup-time': 'int', '*cpu-throttle-percentage': 'int', '*error-desc': 'str', + 'blocked': 'bool', + '*blocked-reasons': ['str'], '*postcopy-blocktime' : 'uint32', '*postcopy-vcpu-blocktime': ['uint32'], '*compression': 'CompressionStats', @@ -442,6 +448,11 @@ # @validate-uuid: Send the UUID of the source to allow the destination # to ensure it is the same. (since 4.2) # +# @background-snapshot: If enabled, the migration stream will be a snapshot +# of the VM exactly at the point when the migration +# procedure starts. The VM RAM is saved with running VM. +# (since 6.0) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', @@ -449,7 +460,7 @@ 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', 'block', 'return-path', 'pause-before-switchover', 'multifd', 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - 'x-ignore-shared', 'validate-uuid' ] } + 'x-ignore-shared', 'validate-uuid', 'background-snapshot'] } ## # @MigrationCapabilityStatus: @@ -885,28 +896,28 @@ '*announce-max': 'size', '*announce-rounds': 'size', '*announce-step': 'size', - '*compress-level': 'int', - '*compress-threads': 'int', + '*compress-level': 'uint8', + '*compress-threads': 'uint8', '*compress-wait-thread': 'bool', - '*decompress-threads': 'int', - '*throttle-trigger-threshold': 'int', - '*cpu-throttle-initial': 'int', - '*cpu-throttle-increment': 'int', + '*decompress-threads': 'uint8', + '*throttle-trigger-threshold': 'uint8', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', '*cpu-throttle-tailslow': 'bool', '*tls-creds': 'StrOrNull', '*tls-hostname': 'StrOrNull', '*tls-authz': 'StrOrNull', - '*max-bandwidth': 'int', - '*downtime-limit': 'int', - '*x-checkpoint-delay': 'int', + '*max-bandwidth': 'size', + '*downtime-limit': 'uint64', + '*x-checkpoint-delay': 'uint32', '*block-incremental': 'bool', - '*multifd-channels': 'int', + '*multifd-channels': 'uint8', '*xbzrle-cache-size': 'size', '*max-postcopy-bandwidth': 'size', - '*max-cpu-throttle': 'int', + '*max-cpu-throttle': 'uint8', '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'int', - '*multifd-zstd-level': 'int', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## @@ -1093,7 +1104,7 @@ '*max-bandwidth': 'size', '*downtime-limit': 'uint64', '*x-checkpoint-delay': 'uint32', - '*block-incremental': 'bool' , + '*block-incremental': 'bool', '*multifd-channels': 'uint8', '*xbzrle-cache-size': 'size', '*max-postcopy-bandwidth': 'size', @@ -1465,7 +1476,7 @@ # <- { "return": 67108864 } # ## -{ 'command': 'query-migrate-cache-size', 'returns': 'int', +{ 'command': 'query-migrate-cache-size', 'returns': 'size', 'features': [ 'deprecated' ] } ## @@ -1843,3 +1854,176 @@ # Since: 5.2 ## { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } + +## +# @snapshot-save: +# +# Save a VM snapshot +# +# @job-id: identifier for the newly created job +# @tag: name of the snapshot to create +# @vmstate: block device node name to save vmstate to +# @devices: list of block device node names to save a snapshot to +# +# Applications should not assume that the snapshot save is complete +# when this command returns. The job commands / events must be used +# to determine completion and to fetch details of any errors that arise. +# +# Note that execution of the guest CPUs may be stopped during the +# time it takes to save the snapshot. A future version of QEMU +# may ensure CPUs are executing continuously. +# +# It is strongly recommended that @devices contain all writable +# block device nodes if a consistent snapshot is required. +# +# If @tag already exists, an error will be reported +# +# Returns: nothing +# +# Example: +# +# -> { "execute": "snapshot-save", +# "data": { +# "job-id": "snapsave0", +# "tag": "my-snap", +# "vmstate": "disk0", +# "devices": ["disk0", "disk1"] +# } +# } +# <- { "return": { } } +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "created", "id": "snapsave0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "running", "id": "snapsave0"}} +# <- {"event": "STOP"} +# <- {"event": "RESUME"} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "waiting", "id": "snapsave0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "pending", "id": "snapsave0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "concluded", "id": "snapsave0"}} +# -> {"execute": "query-jobs"} +# <- {"return": [{"current-progress": 1, +# "status": "concluded", +# "total-progress": 1, +# "type": "snapshot-save", +# "id": "snapsave0"}]} +# +# Since: 6.0 +## +{ 'command': 'snapshot-save', + 'data': { 'job-id': 'str', + 'tag': 'str', + 'vmstate': 'str', + 'devices': ['str'] } } + +## +# @snapshot-load: +# +# Load a VM snapshot +# +# @job-id: identifier for the newly created job +# @tag: name of the snapshot to load. +# @vmstate: block device node name to load vmstate from +# @devices: list of block device node names to load a snapshot from +# +# Applications should not assume that the snapshot load is complete +# when this command returns. The job commands / events must be used +# to determine completion and to fetch details of any errors that arise. +# +# Note that execution of the guest CPUs will be stopped during the +# time it takes to load the snapshot. +# +# It is strongly recommended that @devices contain all writable +# block device nodes that can have changed since the original +# @snapshot-save command execution. +# +# Returns: nothing +# +# Example: +# +# -> { "execute": "snapshot-load", +# "data": { +# "job-id": "snapload0", +# "tag": "my-snap", +# "vmstate": "disk0", +# "devices": ["disk0", "disk1"] +# } +# } +# <- { "return": { } } +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "created", "id": "snapload0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "running", "id": "snapload0"}} +# <- {"event": "STOP"} +# <- {"event": "RESUME"} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "waiting", "id": "snapload0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "pending", "id": "snapload0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "concluded", "id": "snapload0"}} +# -> {"execute": "query-jobs"} +# <- {"return": [{"current-progress": 1, +# "status": "concluded", +# "total-progress": 1, +# "type": "snapshot-load", +# "id": "snapload0"}]} +# +# Since: 6.0 +## +{ 'command': 'snapshot-load', + 'data': { 'job-id': 'str', + 'tag': 'str', + 'vmstate': 'str', + 'devices': ['str'] } } + +## +# @snapshot-delete: +# +# Delete a VM snapshot +# +# @job-id: identifier for the newly created job +# @tag: name of the snapshot to delete. +# @devices: list of block device node names to delete a snapshot from +# +# Applications should not assume that the snapshot delete is complete +# when this command returns. The job commands / events must be used +# to determine completion and to fetch details of any errors that arise. +# +# Returns: nothing +# +# Example: +# +# -> { "execute": "snapshot-delete", +# "data": { +# "job-id": "snapdelete0", +# "tag": "my-snap", +# "devices": ["disk0", "disk1"] +# } +# } +# <- { "return": { } } +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "created", "id": "snapdelete0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "running", "id": "snapdelete0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "waiting", "id": "snapdelete0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "pending", "id": "snapdelete0"}} +# <- {"event": "JOB_STATUS_CHANGE", +# "data": {"status": "concluded", "id": "snapdelete0"}} +# -> {"execute": "query-jobs"} +# <- {"return": [{"current-progress": 1, +# "status": "concluded", +# "total-progress": 1, +# "type": "snapshot-delete", +# "id": "snapdelete0"}]} +# +# Since: 6.0 +## +{ 'command': 'snapshot-delete', + 'data': { 'job-id': 'str', + 'tag': 'str', + 'devices': ['str'] } } diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c index 5ec574724a..1cde50e9f3 100644 --- a/replay/replay-debugging.c +++ b/replay/replay-debugging.c @@ -143,12 +143,13 @@ static char *replay_find_nearest_snapshot(int64_t icount, QEMUSnapshotInfo *sn_tab; QEMUSnapshotInfo *nearest = NULL; char *ret = NULL; + int rv; int nb_sns, i; AioContext *aio_context; *snapshot_icount = -1; - bs = bdrv_all_find_vmstate_bs(); + bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, NULL); if (!bs) { goto fail; } @@ -159,7 +160,10 @@ static char *replay_find_nearest_snapshot(int64_t icount, aio_context_release(aio_context); for (i = 0; i < nb_sns; i++) { - if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) { + rv = bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL); + if (rv < 0) + goto fail; + if (rv == 1) { if (sn_tab[i].icount != -1ULL && sn_tab[i].icount <= icount && (!nearest || nearest->icount < sn_tab[i].icount)) { @@ -192,7 +196,7 @@ static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp) if (icount < replay_get_current_icount() || replay_get_current_icount() < snapshot_icount) { vm_stop(RUN_STATE_RESTORE_VM); - load_snapshot(snapshot, errp); + load_snapshot(snapshot, NULL, false, NULL, errp); } g_free(snapshot); } @@ -323,7 +327,7 @@ void replay_gdb_attached(void) */ if (replay_mode == REPLAY_MODE_PLAY && !replay_snapshot) { - if (save_snapshot("start_debugging", NULL) != 0) { + if (!save_snapshot("start_debugging", true, NULL, false, NULL, NULL)) { /* Can't create the snapshot. Continue conventional debugging. */ } } diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c index e26fa4c892..e8767a1937 100644 --- a/replay/replay-snapshot.c +++ b/replay/replay-snapshot.c @@ -77,13 +77,14 @@ void replay_vmstate_init(void) if (replay_snapshot) { if (replay_mode == REPLAY_MODE_RECORD) { - if (save_snapshot(replay_snapshot, &err) != 0) { + if (!save_snapshot(replay_snapshot, + true, NULL, false, NULL, &err)) { error_report_err(err); error_report("Could not create snapshot for icount record"); exit(1); } } else if (replay_mode == REPLAY_MODE_PLAY) { - if (load_snapshot(replay_snapshot, &err) != 0) { + if (!load_snapshot(replay_snapshot, NULL, false, NULL, &err)) { error_report_err(err); error_report("Could not load snapshot for icount replay"); exit(1); diff --git a/scripts/mtest2make.py b/scripts/mtest2make.py index 25ee6887cf..cbbcba100d 100644 --- a/scripts/mtest2make.py +++ b/scripts/mtest2make.py @@ -110,6 +110,7 @@ def emit_suite(name, suite, prefix): print('ifneq ($(filter %s %s, $(MAKECMDGOALS)),)' % (target, prefix)) print('.tests += $(.test.$(SPEED).%s)' % (target, )) print('endif') + print('all-%s-targets += %s' % (prefix, target)) targets = {t['id']: [os.path.relpath(f) for f in t['filename']] for t in introspect['targets']} diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py index 50978090b4..54af519f44 100644 --- a/scripts/qapi/commands.py +++ b/scripts/qapi/commands.py @@ -23,7 +23,6 @@ from typing import ( from .common import c_name, mcgen from .gen import ( QAPIGenC, - QAPIGenCCode, QAPISchemaModularCVisitor, build_params, ifcontext, @@ -126,6 +125,9 @@ def gen_marshal(name: str, boxed: bool, ret_type: Optional[QAPISchemaType]) -> str: have_args = boxed or (arg_type and not arg_type.is_empty()) + if have_args: + assert arg_type is not None + arg_type_c_name = arg_type.c_name() ret = mcgen(''' @@ -147,7 +149,7 @@ def gen_marshal(name: str, ret += mcgen(''' %(c_name)s arg = {0}; ''', - c_name=arg_type.c_name()) + c_name=arg_type_c_name) ret += mcgen(''' @@ -163,7 +165,7 @@ def gen_marshal(name: str, ok = visit_check_struct(v, errp); } ''', - c_arg_type=arg_type.c_name()) + c_arg_type=arg_type_c_name) else: ret += mcgen(''' ok = visit_check_struct(v, errp); @@ -193,7 +195,7 @@ out: ret += mcgen(''' visit_type_%(c_arg_type)s_members(v, &arg, NULL); ''', - c_arg_type=arg_type.c_name()) + c_arg_type=arg_type_c_name) ret += mcgen(''' visit_end_struct(v, NULL); @@ -234,28 +236,11 @@ def gen_register_command(name: str, return ret -def gen_registry(registry: str, prefix: str) -> str: - ret = mcgen(''' - -void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds) -{ - QTAILQ_INIT(cmds); - -''', - c_prefix=c_name(prefix, protect=False)) - ret += registry - ret += mcgen(''' -} -''') - return ret - - class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor): def __init__(self, prefix: str): super().__init__( prefix, 'qapi-commands', ' * Schema-defined QAPI/QMP commands', None, __doc__) - self._regy = QAPIGenCCode(None) self._visited_ret_types: Dict[QAPIGenC, Set[QAPISchemaType]] = {} def _begin_user_module(self, name: str) -> None: @@ -282,25 +267,36 @@ class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor): ''', types=types)) - def visit_end(self) -> None: - self._add_system_module('init', ' * QAPI Commands initialization') + def visit_begin(self, schema: QAPISchema) -> None: + self._add_module('./init', ' * QAPI Commands initialization') self._genh.add(mcgen(''' #include "qapi/qmp/dispatch.h" void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds); ''', c_prefix=c_name(self._prefix, protect=False))) - self._genc.preamble_add(mcgen(''' + self._genc.add(mcgen(''' #include "qemu/osdep.h" #include "%(prefix)sqapi-commands.h" #include "%(prefix)sqapi-init-commands.h" + +void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds) +{ + QTAILQ_INIT(cmds); + ''', - prefix=self._prefix)) - self._genc.add(gen_registry(self._regy.get_content(), self._prefix)) + prefix=self._prefix, + c_prefix=c_name(self._prefix, protect=False))) + + def visit_end(self) -> None: + with self._temp_module('./init'): + self._genc.add(mcgen(''' +} +''')) def visit_command(self, name: str, - info: QAPISourceInfo, + info: Optional[QAPISourceInfo], ifcond: List[str], features: List[QAPISchemaFeature], arg_type: Optional[QAPISchemaObjectType], @@ -321,15 +317,17 @@ void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds); if ret_type and ret_type not in self._visited_ret_types[self._genc]: self._visited_ret_types[self._genc].add(ret_type) with ifcontext(ret_type.ifcond, - self._genh, self._genc, self._regy): + self._genh, self._genc): self._genc.add(gen_marshal_output(ret_type)) - with ifcontext(ifcond, self._genh, self._genc, self._regy): + with ifcontext(ifcond, self._genh, self._genc): self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type)) self._genh.add(gen_marshal_decl(name)) self._genc.add(gen_marshal(name, arg_type, boxed, ret_type)) - self._regy.add(gen_register_command(name, success_response, - allow_oob, allow_preconfig, - coroutine)) + with self._temp_module('./init'): + with ifcontext(ifcond, self._genh, self._genc): + self._genc.add(gen_register_command(name, success_response, + allow_oob, allow_preconfig, + coroutine)) def gen_commands(schema: QAPISchema, diff --git a/scripts/qapi/events.py b/scripts/qapi/events.py index 599f3d1f56..8c57deb2b8 100644 --- a/scripts/qapi/events.py +++ b/scripts/qapi/events.py @@ -12,7 +12,7 @@ This work is licensed under the terms of the GNU GPL, version 2. See the COPYING file in the top-level directory. """ -from typing import List +from typing import List, Optional from .common import c_enum_const, c_name, mcgen from .gen import QAPISchemaModularCVisitor, build_params, ifcontext @@ -27,7 +27,7 @@ from .types import gen_enum, gen_enum_lookup def build_event_send_proto(name: str, - arg_type: QAPISchemaObjectType, + arg_type: Optional[QAPISchemaObjectType], boxed: bool) -> str: return 'void qapi_event_send_%(c_name)s(%(param)s)' % { 'c_name': c_name(name.lower()), @@ -35,7 +35,7 @@ def build_event_send_proto(name: str, def gen_event_send_decl(name: str, - arg_type: QAPISchemaObjectType, + arg_type: Optional[QAPISchemaObjectType], boxed: bool) -> str: return mcgen(''' @@ -78,7 +78,7 @@ def gen_param_var(typ: QAPISchemaObjectType) -> str: def gen_event_send(name: str, - arg_type: QAPISchemaObjectType, + arg_type: Optional[QAPISchemaObjectType], boxed: bool, event_enum_name: str, event_emit: str) -> str: @@ -99,6 +99,7 @@ def gen_event_send(name: str, proto=build_event_send_proto(name, arg_type, boxed)) if have_args: + assert arg_type is not None ret += mcgen(''' QObject *obj; Visitor *v; @@ -114,6 +115,7 @@ def gen_event_send(name: str, name=name) if have_args: + assert arg_type is not None ret += mcgen(''' v = qobject_output_visitor_new(&obj); ''') @@ -189,7 +191,7 @@ class QAPISchemaGenEventVisitor(QAPISchemaModularCVisitor): types=types)) def visit_end(self) -> None: - self._add_system_module('emit', ' * QAPI Events emission') + self._add_module('./emit', ' * QAPI Events emission') self._genc.preamble_add(mcgen(''' #include "qemu/osdep.h" #include "%(prefix)sqapi-emit-events.h" @@ -211,10 +213,10 @@ void %(event_emit)s(%(event_enum)s event, QDict *qdict); def visit_event(self, name: str, - info: QAPISourceInfo, + info: Optional[QAPISourceInfo], ifcond: List[str], features: List[QAPISchemaFeature], - arg_type: QAPISchemaObjectType, + arg_type: Optional[QAPISchemaObjectType], boxed: bool) -> None: with ifcontext(ifcond, self._genh, self._genc): self._genh.add(gen_event_send_decl(name, arg_type, boxed)) diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py index b40f18eee3..63549cc8d4 100644 --- a/scripts/qapi/gen.py +++ b/scripts/qapi/gen.py @@ -31,12 +31,16 @@ from .common import ( guardstart, mcgen, ) -from .schema import QAPISchemaObjectType, QAPISchemaVisitor +from .schema import ( + QAPISchemaModule, + QAPISchemaObjectType, + QAPISchemaVisitor, +) from .source import QAPISourceInfo class QAPIGen: - def __init__(self, fname: Optional[str]): + def __init__(self, fname: str): self.fname = fname self._preamble = '' self._body = '' @@ -121,7 +125,7 @@ def build_params(arg_type: Optional[QAPISchemaObjectType], class QAPIGenCCode(QAPIGen): - def __init__(self, fname: Optional[str]): + def __init__(self, fname: str): super().__init__(fname) self._start_if: Optional[Tuple[List[str], str, str]] = None @@ -130,15 +134,12 @@ class QAPIGenCCode(QAPIGen): self._start_if = (ifcond, self._body, self._preamble) def end_if(self) -> None: - assert self._start_if - self._wrap_ifcond() - self._start_if = None - - def _wrap_ifcond(self) -> None: + assert self._start_if is not None self._body = _wrap_ifcond(self._start_if[0], self._start_if[1], self._body) self._preamble = _wrap_ifcond(self._start_if[0], self._start_if[2], self._preamble) + self._start_if = None def get_content(self) -> str: assert self._start_if is None @@ -243,85 +244,88 @@ class QAPISchemaModularCVisitor(QAPISchemaVisitor): self._user_blurb = user_blurb self._builtin_blurb = builtin_blurb self._pydoc = pydoc - self._genc: Optional[QAPIGenC] = None - self._genh: Optional[QAPIGenH] = None - self._module: Dict[Optional[str], Tuple[QAPIGenC, QAPIGenH]] = {} + self._current_module: Optional[str] = None + self._module: Dict[str, Tuple[QAPIGenC, QAPIGenH]] = {} self._main_module: Optional[str] = None - @staticmethod - def _is_user_module(name: Optional[str]) -> bool: - return bool(name and not name.startswith('./')) + @property + def _genc(self) -> QAPIGenC: + assert self._current_module is not None + return self._module[self._current_module][0] - @staticmethod - def _is_builtin_module(name: Optional[str]) -> bool: - return not name + @property + def _genh(self) -> QAPIGenH: + assert self._current_module is not None + return self._module[self._current_module][1] - def _module_dirname(self, name: Optional[str]) -> str: - if self._is_user_module(name): + @staticmethod + def _module_dirname(name: str) -> str: + if QAPISchemaModule.is_user_module(name): return os.path.dirname(name) return '' - def _module_basename(self, what: str, name: Optional[str]) -> str: - ret = '' if self._is_builtin_module(name) else self._prefix - if self._is_user_module(name): + def _module_basename(self, what: str, name: str) -> str: + ret = '' if QAPISchemaModule.is_builtin_module(name) else self._prefix + if QAPISchemaModule.is_user_module(name): basename = os.path.basename(name) ret += what if name != self._main_module: ret += '-' + os.path.splitext(basename)[0] else: - name = name[2:] if name else 'builtin' - ret += re.sub(r'-', '-' + name + '-', what) + assert QAPISchemaModule.is_system_module(name) + ret += re.sub(r'-', '-' + name[2:] + '-', what) return ret - def _module_filename(self, what: str, name: Optional[str]) -> str: + def _module_filename(self, what: str, name: str) -> str: return os.path.join(self._module_dirname(name), self._module_basename(what, name)) - def _add_module(self, name: Optional[str], blurb: str) -> None: + def _add_module(self, name: str, blurb: str) -> None: + if QAPISchemaModule.is_user_module(name): + if self._main_module is None: + self._main_module = name basename = self._module_filename(self._what, name) genc = QAPIGenC(basename + '.c', blurb, self._pydoc) genh = QAPIGenH(basename + '.h', blurb, self._pydoc) self._module[name] = (genc, genh) - self._genc, self._genh = self._module[name] - - def _add_user_module(self, name: str, blurb: str) -> None: - assert self._is_user_module(name) - if self._main_module is None: - self._main_module = name - self._add_module(name, blurb) + self._current_module = name - def _add_system_module(self, name: Optional[str], blurb: str) -> None: - self._add_module(name and './' + name, blurb) + @contextmanager + def _temp_module(self, name: str) -> Iterator[None]: + old_module = self._current_module + self._current_module = name + yield + self._current_module = old_module def write(self, output_dir: str, opt_builtins: bool = False) -> None: for name in self._module: - if self._is_builtin_module(name) and not opt_builtins: + if QAPISchemaModule.is_builtin_module(name) and not opt_builtins: continue (genc, genh) = self._module[name] genc.write(output_dir) genh.write(output_dir) - def _begin_system_module(self, name: None) -> None: + def _begin_builtin_module(self) -> None: pass def _begin_user_module(self, name: str) -> None: pass - def visit_module(self, name: Optional[str]) -> None: - if name is None: + def visit_module(self, name: str) -> None: + if QAPISchemaModule.is_builtin_module(name): if self._builtin_blurb: - self._add_system_module(None, self._builtin_blurb) - self._begin_system_module(name) + self._add_module(name, self._builtin_blurb) + self._begin_builtin_module() else: # The built-in module has not been created. No code may # be generated. - self._genc = None - self._genh = None + self._current_module = None else: - self._add_user_module(name, self._user_blurb) + assert QAPISchemaModule.is_user_module(name) + self._add_module(name, self._user_blurb) self._begin_user_module(name) - def visit_include(self, name: str, info: QAPISourceInfo) -> None: + def visit_include(self, name: str, info: Optional[QAPISourceInfo]) -> None: relname = os.path.relpath(self._module_filename(self._what, name), os.path.dirname(self._genh.fname)) self._genh.preamble_add(mcgen(''' diff --git a/scripts/qapi/main.py b/scripts/qapi/main.py index 42517210b8..703e7ed1ed 100644 --- a/scripts/qapi/main.py +++ b/scripts/qapi/main.py @@ -23,6 +23,8 @@ from .visit import gen_visit def invalid_prefix_char(prefix: str) -> Optional[str]: match = re.match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix) + # match cannot be None, but mypy cannot infer that. + assert match is not None if match.end() != len(prefix): return prefix[match.end()] return None diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini index 74fc6c8215..04bd5db527 100644 --- a/scripts/qapi/mypy.ini +++ b/scripts/qapi/mypy.ini @@ -1,6 +1,5 @@ [mypy] strict = True -strict_optional = False disallow_untyped_calls = False python_version = 3.6 diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py index 720449feee..353e8020a2 100644 --- a/scripts/qapi/schema.py +++ b/scripts/qapi/schema.py @@ -68,7 +68,8 @@ class QAPISchemaEntity: def _set_module(self, schema, info): assert self._checked - self._module = schema.module_by_fname(info and info.fname) + fname = info.fname if info else QAPISchemaModule.BUILTIN_MODULE_NAME + self._module = schema.module_by_fname(fname) self._module.add_entity(self) def set_module(self, schema): @@ -137,10 +138,40 @@ class QAPISchemaVisitor: class QAPISchemaModule: + + BUILTIN_MODULE_NAME = './builtin' + def __init__(self, name): self.name = name self._entity_list = [] + @staticmethod + def is_system_module(name: str) -> bool: + """ + System modules are internally defined modules. + + Their names start with the "./" prefix. + """ + return name.startswith('./') + + @classmethod + def is_user_module(cls, name: str) -> bool: + """ + User modules are those defined by the user in qapi JSON files. + + They do not start with the "./" prefix. + """ + return not cls.is_system_module(name) + + @classmethod + def is_builtin_module(cls, name: str) -> bool: + """ + The built-in module is a single System module for the built-in types. + + It is always "./builtin". + """ + return name == cls.BUILTIN_MODULE_NAME + def add_entity(self, ent): self._entity_list.append(ent) @@ -825,7 +856,7 @@ class QAPISchema: self._entity_dict = {} self._module_dict = OrderedDict() self._schema_dir = os.path.dirname(fname) - self._make_module(None) # built-ins + self._make_module(QAPISchemaModule.BUILTIN_MODULE_NAME) self._make_module(fname) self._predefining = True self._def_predefineds() @@ -870,9 +901,9 @@ class QAPISchema: info, "%s uses unknown type '%s'" % (what, name)) return typ - def _module_name(self, fname): - if fname is None: - return None + def _module_name(self, fname: str) -> str: + if QAPISchemaModule.is_system_module(fname): + return fname return os.path.relpath(fname, self._schema_dir) def _make_module(self, fname): @@ -883,7 +914,6 @@ class QAPISchema: def module_by_fname(self, fname): name = self._module_name(fname) - assert name in self._module_dict return self._module_dict[name] def _def_include(self, expr, info, doc): diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py index 2b4916cdaa..2bdd626847 100644 --- a/scripts/qapi/types.py +++ b/scripts/qapi/types.py @@ -271,7 +271,7 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor): prefix, 'qapi-types', ' * Schema-defined QAPI types', ' * Built-in QAPI types', __doc__) - def _begin_system_module(self, name: None) -> None: + def _begin_builtin_module(self) -> None: self._genc.preamble_add(mcgen(''' #include "qemu/osdep.h" #include "qapi/dealloc-visitor.h" @@ -350,7 +350,7 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor): def visit_alternate_type(self, name: str, - info: QAPISourceInfo, + info: Optional[QAPISourceInfo], ifcond: List[str], features: List[QAPISchemaFeature], variants: QAPISchemaVariants) -> None: diff --git a/scripts/qapi/visit.py b/scripts/qapi/visit.py index 339f152152..22e62df901 100644 --- a/scripts/qapi/visit.py +++ b/scripts/qapi/visit.py @@ -305,7 +305,7 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor): prefix, 'qapi-visit', ' * Schema-defined QAPI visitors', ' * Built-in QAPI visitors', __doc__) - def _begin_system_module(self, name: None) -> None: + def _begin_builtin_module(self) -> None: self._genc.preamble_add(mcgen(''' #include "qemu/osdep.h" #include "qapi/error.h" @@ -336,7 +336,7 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor): def visit_enum_type(self, name: str, - info: QAPISourceInfo, + info: Optional[QAPISourceInfo], ifcond: List[str], features: List[QAPISchemaFeature], members: List[QAPISchemaEnumMember], @@ -378,7 +378,7 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor): def visit_alternate_type(self, name: str, - info: QAPISourceInfo, + info: Optional[QAPISourceInfo], ifcond: List[str], features: List[QAPISchemaFeature], variants: QAPISchemaVariants) -> None: diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py new file mode 100755 index 0000000000..0684be4e04 --- /dev/null +++ b/scripts/userfaultfd-wrlat.py @@ -0,0 +1,122 @@ +#!/usr/bin/python3 +# +# userfaultfd-wrlat Summarize userfaultfd write fault latencies. +# Events are continuously accumulated for the +# run, while latency distribution histogram is +# dumped each 'interval' seconds. +# +# For Linux, uses BCC, eBPF. +# +# USAGE: userfaultfd-lat [interval [count]] +# +# Copyright Virtuozzo GmbH, 2020 +# +# Authors: +# Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +from __future__ import print_function +from bcc import BPF +from ctypes import c_ushort, c_int, c_ulonglong +from time import sleep +from sys import argv + +def usage(): + print("USAGE: %s [interval [count]]" % argv[0]) + exit() + +# define BPF program +bpf_text = """ +#include <uapi/linux/ptrace.h> +#include <linux/mm.h> + +BPF_HASH(ev_start, u32, u64); +BPF_HISTOGRAM(ev_delta_hist, u64); + +/* Trace UFFD page fault start event. */ +static void do_event_start() +{ + /* Using "(u32)" to drop group ID which is upper 32 bits */ + u32 tid = (u32) bpf_get_current_pid_tgid(); + u64 ts = bpf_ktime_get_ns(); + + ev_start.update(&tid, &ts); +} + +/* Trace UFFD page fault end event. */ +static void do_event_end() +{ + /* Using "(u32)" to drop group ID which is upper 32 bits */ + u32 tid = (u32) bpf_get_current_pid_tgid(); + u64 ts = bpf_ktime_get_ns(); + u64 *tsp; + + tsp = ev_start.lookup(&tid); + if (tsp) { + u64 delta = ts - (*tsp); + /* Transform time delta to milliseconds */ + ev_delta_hist.increment(bpf_log2l(delta / 1000000)); + ev_start.delete(&tid); + } +} + +/* KPROBE for handle_userfault(). */ +int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf, + unsigned long reason) +{ + /* Trace only UFFD write faults. */ + if (reason & VM_UFFD_WP) { + do_event_start(); + } + return 0; +} + +/* KRETPROBE for handle_userfault(). */ +int retprobe_handle_userfault(struct pt_regs *ctx) +{ + do_event_end(); + return 0; +} +""" + +# arguments +interval = 10 +count = -1 +if len(argv) > 1: + try: + interval = int(argv[1]) + if interval == 0: + raise + if len(argv) > 2: + count = int(argv[2]) + except: # also catches -h, --help + usage() + +# load BPF program +b = BPF(text=bpf_text) +# attach KRPOBEs +b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault") +b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault") + +# header +print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.") + +# output +loop = 0 +do_exit = 0 +while (1): + if count > 0: + loop += 1 + if loop > count: + exit() + try: + sleep(interval) + except KeyboardInterrupt: + pass; do_exit = 1 + + print() + b["ev_delta_hist"].print_log2_hist("msecs") + if do_exit: + exit() diff --git a/softmmu/vl.c b/softmmu/vl.c index 9eb9dab1fc..b219ce1f35 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -2545,7 +2545,7 @@ void qmp_x_exit_preconfig(Error **errp) if (loadvm) { Error *local_err = NULL; - if (load_snapshot(loadvm, &local_err) < 0) { + if (!load_snapshot(loadvm, NULL, false, NULL, &local_err)) { error_report_err(local_err); autostart = 0; exit(1); diff --git a/tests/Makefile.include b/tests/Makefile.include index ceaf3f0d6e..d34254fb29 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -12,7 +12,7 @@ check-help: @echo " $(MAKE) check-speed Run qobject speed tests" @echo " $(MAKE) check-qapi-schema Run QAPI schema tests" @echo " $(MAKE) check-block Run block tests" -ifeq ($(CONFIG_TCG),y) +ifneq ($(filter $(all-check-targets), check-softfloat),) @echo " $(MAKE) check-tcg Run TCG tests" @echo " $(MAKE) check-softfloat Run FPU emulation tests" endif @@ -40,11 +40,13 @@ SYSEMU_TARGET_LIST := $(subst -softmmu.mak,,$(notdir \ SPEED = quick -# Per guest TCG tests +# Build up our target list from the filtered list of ninja targets +TARGETS=$(patsubst libqemu-%.fa, %, $(filter libqemu-%.fa, $(ninja-targets))) -BUILD_TCG_TARGET_RULES=$(patsubst %,build-tcg-tests-%, $(TARGET_DIRS)) -CLEAN_TCG_TARGET_RULES=$(patsubst %,clean-tcg-tests-%, $(TARGET_DIRS)) -RUN_TCG_TARGET_RULES=$(patsubst %,run-tcg-tests-%, $(TARGET_DIRS)) +# Per guest TCG tests +BUILD_TCG_TARGET_RULES=$(patsubst %,build-tcg-tests-%, $(TARGETS)) +CLEAN_TCG_TARGET_RULES=$(patsubst %,clean-tcg-tests-%, $(TARGETS)) +RUN_TCG_TARGET_RULES=$(patsubst %,run-tcg-tests-%, $(TARGETS)) # Probe for the Docker Builds needed for each build $(foreach PROBE_TARGET,$(TARGET_DIRS), \ diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py index 1da4a53d6a..bcd923bb4a 100644 --- a/tests/acceptance/boot_linux.py +++ b/tests/acceptance/boot_linux.py @@ -57,7 +57,7 @@ class BootLinuxBase(Test): self.cancel('Failed to download/prepare boot image') return boot.path - def download_cloudinit(self, ssh_pubkey=None): + def prepare_cloudinit(self, ssh_pubkey=None): self.log.info('Preparing cloudinit image') try: cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso') @@ -70,7 +70,7 @@ class BootLinuxBase(Test): phone_home_port=self.phone_home_port, authorized_key=ssh_pubkey) except Exception: - self.cancel('Failed to prepared cloudinit image') + self.cancel('Failed to prepare the cloudinit image') return cloudinit_iso class BootLinux(BootLinuxBase): @@ -85,15 +85,15 @@ class BootLinux(BootLinuxBase): super(BootLinux, self).setUp() self.vm.add_args('-smp', '2') self.vm.add_args('-m', '1024') - self.prepare_boot() - self.prepare_cloudinit(ssh_pubkey) + self.set_up_boot() + self.set_up_cloudinit(ssh_pubkey) - def prepare_boot(self): + def set_up_boot(self): path = self.download_boot() self.vm.add_args('-drive', 'file=%s' % path) - def prepare_cloudinit(self, ssh_pubkey=None): - cloudinit_iso = self.download_cloudinit(ssh_pubkey) + def set_up_cloudinit(self, ssh_pubkey=None): + cloudinit_iso = self.prepare_cloudinit(ssh_pubkey) self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso) def launch_and_wait(self): diff --git a/tests/acceptance/boot_linux_console.py b/tests/acceptance/boot_linux_console.py index fb41bb7144..eb01286799 100644 --- a/tests/acceptance/boot_linux_console.py +++ b/tests/acceptance/boot_linux_console.py @@ -802,27 +802,7 @@ class BootLinuxConsole(LinuxKernelTest): # Wait for VM to shut down gracefully self.vm.wait() - @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'), - 'Test artifacts fetched from unreliable dl.armbian.com') - @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited') - @skipUnless(P7ZIP_AVAILABLE, '7z not installed') - def test_arm_orangepi_bionic(self): - """ - :avocado: tags=arch:arm - :avocado: tags=machine:orangepi-pc - :avocado: tags=device:sd - """ - - # This test download a 196MB compressed image and expand it to 1GB - image_url = ('https://dl.armbian.com/orangepipc/archive/' - 'Armbian_19.11.3_Orangepipc_bionic_current_5.3.9.7z') - image_hash = '196a8ffb72b0123d92cea4a070894813d305c71e' - image_path_7z = self.fetch_asset(image_url, asset_hash=image_hash) - image_name = 'Armbian_19.11.3_Orangepipc_bionic_current_5.3.9.img' - image_path = os.path.join(self.workdir, image_name) - process.run("7z e -o%s %s" % (self.workdir, image_path_7z)) - image_pow2ceil_expand(image_path) - + def do_test_arm_orangepi_uboot_armbian(self, image_path): self.vm.set_console() self.vm.add_args('-drive', 'file=' + image_path + ',if=sd,format=raw', '-nic', 'user', @@ -848,6 +828,54 @@ class BootLinuxConsole(LinuxKernelTest): 'to <orangepipc>') self.wait_for_console_pattern('Starting Load Kernel Modules...') + @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'), + 'Test artifacts fetched from unreliable apt.armbian.com') + @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited') + @skipUnless(P7ZIP_AVAILABLE, '7z not installed') + def test_arm_orangepi_bionic_19_11(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:orangepi-pc + :avocado: tags=device:sd + """ + + # This test download a 196MB compressed image and expand it to 1GB + image_url = ('https://dl.armbian.com/orangepipc/archive/' + 'Armbian_19.11.3_Orangepipc_bionic_current_5.3.9.7z') + image_hash = '196a8ffb72b0123d92cea4a070894813d305c71e' + image_path_7z = self.fetch_asset(image_url, asset_hash=image_hash) + image_name = 'Armbian_19.11.3_Orangepipc_bionic_current_5.3.9.img' + image_path = os.path.join(self.workdir, image_name) + process.run("7z e -o%s %s" % (self.workdir, image_path_7z)) + image_pow2ceil_expand(image_path) + + self.do_test_arm_orangepi_uboot_armbian(image_path) + + @skipUnless(os.getenv('ARMBIAN_ARTIFACTS_CACHED'), + 'Test artifacts fetched from unreliable apt.armbian.com') + @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited') + def test_arm_orangepi_bionic_20_08(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:orangepi-pc + :avocado: tags=device:sd + """ + + # This test download a 275 MiB compressed image and expand it + # to 1036 MiB, but the underlying filesystem is 1552 MiB... + # As we expand it to 2 GiB we are safe. + + image_url = ('https://dl.armbian.com/orangepipc/archive/' + 'Armbian_20.08.1_Orangepipc_bionic_current_5.8.5.img.xz') + image_hash = ('b4d6775f5673486329e45a0586bf06b6' + 'dbe792199fd182ac6b9c7bb6c7d3e6dd') + image_path_xz = self.fetch_asset(image_url, asset_hash=image_hash, + algorithm='sha256') + image_path = archive.extract(image_path_xz, self.workdir) + image_pow2ceil_expand(image_path) + + self.do_test_arm_orangepi_uboot_armbian(image_path) + @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited') def test_arm_orangepi_uboot_netbsd9(self): """ @@ -976,25 +1004,6 @@ class BootLinuxConsole(LinuxKernelTest): console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) - def test_ppc64_pseries(self): - """ - :avocado: tags=arch:ppc64 - :avocado: tags=machine:pseries - """ - kernel_url = ('https://archives.fedoraproject.org/pub/archive' - '/fedora-secondary/releases/29/Everything/ppc64le/os' - '/ppc/ppc64/vmlinuz') - kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77' - kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) - - self.vm.set_console() - kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0' - self.vm.add_args('-kernel', kernel_path, - '-append', kernel_command_line) - self.vm.launch() - console_pattern = 'Kernel command line: %s' % kernel_command_line - self.wait_for_console_pattern(console_pattern) - def test_m68k_q800(self): """ :avocado: tags=arch:m68k @@ -1047,15 +1056,6 @@ class BootLinuxConsole(LinuxKernelTest): tar_hash = 'ac688fd00561a2b6ce1359f9ff6aa2b98c9a570c' self.do_test_advcal_2018('07', tar_hash, 'sanity-clause.elf') - @skip("Test currently broken") # Console stuck as of 5.2-rc1 - def test_microblaze_s3adsp1800(self): - """ - :avocado: tags=arch:microblaze - :avocado: tags=machine:petalogix-s3adsp1800 - """ - tar_hash = '08bf3e3bfb6b6c7ce1e54ab65d54e189f2caf13f' - self.do_test_advcal_2018('17', tar_hash, 'ballerina.bin') - def test_or1k_sim(self): """ :avocado: tags=arch:or1k diff --git a/tests/acceptance/linux_ssh_mips_malta.py b/tests/acceptance/linux_ssh_mips_malta.py index 25c5c5f741..6dbd02d49d 100644 --- a/tests/acceptance/linux_ssh_mips_malta.py +++ b/tests/acceptance/linux_ssh_mips_malta.py @@ -91,7 +91,7 @@ class LinuxSSH(Test): except: time.sleep(4) pass - self.fail("sshd timeout") + self.fail("ssh connection timeout") def ssh_disconnect_vm(self): self.ssh_session.quit() diff --git a/tests/acceptance/machine_m68k_nextcube.py b/tests/acceptance/machine_m68k_nextcube.py index 2baba5fdc2..09e2745cc5 100644 --- a/tests/acceptance/machine_m68k_nextcube.py +++ b/tests/acceptance/machine_m68k_nextcube.py @@ -1,19 +1,17 @@ # Functional test that boots a VM and run OCR on the framebuffer # -# Copyright (c) Philippe Mathieu-Daudé <f4bug@amsat.org> +# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> # # This work is licensed under the terms of the GNU GPL, version 2 or # later. See the COPYING file in the top-level directory. import os -import re import time -import logging from avocado_qemu import Test from avocado import skipUnless -from avocado.utils import process -from avocado.utils.path import find_command, CmdNotFoundError + +from tesseract_utils import tesseract_available, tesseract_ocr PIL_AVAILABLE = True try: @@ -22,25 +20,6 @@ except ImportError: PIL_AVAILABLE = False -def tesseract_available(expected_version): - try: - find_command('tesseract') - except CmdNotFoundError: - return False - res = process.run('tesseract --version') - try: - version = res.stdout_text.split()[1] - except IndexError: - version = res.stderr_text.split()[1] - return int(version.split('.')[0]) == expected_version - - match = re.match(r'tesseract\s(\d)', res) - if match is None: - return False - # now this is guaranteed to be a digit - return int(match.groups()[0]) == expected_version - - class NextCubeMachine(Test): """ :avocado: tags=arch:m68k @@ -80,12 +59,8 @@ class NextCubeMachine(Test): def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): screenshot_path = os.path.join(self.workdir, "dump.ppm") self.check_bootrom_framebuffer(screenshot_path) - - console_logger = logging.getLogger('console') - text = process.run("tesseract %s stdout" % screenshot_path).stdout_text - for line in text.split('\n'): - if len(line): - console_logger.debug(line) + lines = tesseract_ocr(screenshot_path, tesseract_version=3) + text = '\n'.join(lines) self.assertIn('Backplane', text) self.assertIn('Ethernet address', text) @@ -96,13 +71,8 @@ class NextCubeMachine(Test): def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): screenshot_path = os.path.join(self.workdir, "dump.ppm") self.check_bootrom_framebuffer(screenshot_path) - - console_logger = logging.getLogger('console') - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) - text = proc.stdout_text - for line in text.split('\n'): - if len(line): - console_logger.debug(line) + lines = tesseract_ocr(screenshot_path, tesseract_version=4) + text = '\n'.join(lines) self.assertIn('Testing the FPU, SCC', text) self.assertIn('System test failed. Error code', text) self.assertIn('Boot command', text) diff --git a/tests/acceptance/machine_microblaze.py b/tests/acceptance/machine_microblaze.py new file mode 100644 index 0000000000..7f6d18495d --- /dev/null +++ b/tests/acceptance/machine_microblaze.py @@ -0,0 +1,35 @@ +# Functional test that boots a microblaze Linux kernel and checks the console +# +# Copyright (c) 2018, 2021 Red Hat, Inc. +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +from avocado_qemu import Test +from avocado_qemu import wait_for_console_pattern +from avocado.utils import archive + +class MicroblazeMachine(Test): + + timeout = 90 + + def test_microblaze_s3adsp1800(self): + """ + :avocado: tags=arch:microblaze + :avocado: tags=machine:petalogix-s3adsp1800 + """ + + tar_url = ('https://www.qemu-advent-calendar.org' + '/2018/download/day17.tar.xz') + tar_hash = '08bf3e3bfb6b6c7ce1e54ab65d54e189f2caf13f' + file_path = self.fetch_asset(tar_url, asset_hash=tar_hash) + archive.extract(file_path, self.workdir) + self.vm.set_console() + self.vm.add_args('-kernel', self.workdir + '/day17/ballerina.bin') + self.vm.launch() + wait_for_console_pattern(self, 'This architecture does not have ' + 'kernel memory protection') + # Note: + # The kernel sometimes gets stuck after the "This architecture ..." + # message, that's why we don't test for a later string here. This + # needs some investigation by a microblaze wizard one day... diff --git a/tests/acceptance/machine_ppc.py b/tests/acceptance/machine_ppc.py new file mode 100644 index 0000000000..a836e2496f --- /dev/null +++ b/tests/acceptance/machine_ppc.py @@ -0,0 +1,69 @@ +# Test that Linux kernel boots on ppc machines and check the console +# +# Copyright (c) 2018, 2020 Red Hat, Inc. +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +from avocado.utils import archive +from avocado_qemu import Test +from avocado_qemu import wait_for_console_pattern + +class PpcMachine(Test): + + timeout = 90 + KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' + panic_message = 'Kernel panic - not syncing' + + def test_ppc64_pseries(self): + """ + :avocado: tags=arch:ppc64 + :avocado: tags=machine:pseries + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive' + '/fedora-secondary/releases/29/Everything/ppc64le/os' + '/ppc/ppc64/vmlinuz') + kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77' + kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) + + self.vm.set_console() + kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0' + self.vm.add_args('-kernel', kernel_path, + '-append', kernel_command_line) + self.vm.launch() + console_pattern = 'Kernel command line: %s' % kernel_command_line + wait_for_console_pattern(self, console_pattern, self.panic_message) + + def test_ppc_mpc8544ds(self): + """ + :avocado: tags=arch:ppc + :avocado: tags=machine:mpc8544ds + """ + tar_url = ('https://www.qemu-advent-calendar.org' + '/2020/download/day17.tar.gz') + tar_hash = '7a5239542a7c4257aa4d3b7f6ddf08fb6775c494' + file_path = self.fetch_asset(tar_url, asset_hash=tar_hash) + archive.extract(file_path, self.workdir) + self.vm.set_console() + self.vm.add_args('-kernel', self.workdir + '/creek/creek.bin') + self.vm.launch() + wait_for_console_pattern(self, 'QEMU advent calendar 2020', + self.panic_message) + + def test_ppc_virtex_ml507(self): + """ + :avocado: tags=arch:ppc + :avocado: tags=machine:virtex-ml507 + """ + tar_url = ('https://www.qemu-advent-calendar.org' + '/2020/download/hippo.tar.gz') + tar_hash = '306b95bfe7d147f125aa176a877e266db8ef914a' + file_path = self.fetch_asset(tar_url, asset_hash=tar_hash) + archive.extract(file_path, self.workdir) + self.vm.set_console() + self.vm.add_args('-kernel', self.workdir + '/hippo/hippo.linux', + '-dtb', self.workdir + '/hippo/virtex440-ml507.dtb', + '-m', '512') + self.vm.launch() + wait_for_console_pattern(self, 'QEMU advent calendar 2020', + self.panic_message) diff --git a/tests/acceptance/replay_kernel.py b/tests/acceptance/replay_kernel.py index 772633b01d..c1cb862468 100644 --- a/tests/acceptance/replay_kernel.py +++ b/tests/acceptance/replay_kernel.py @@ -31,7 +31,7 @@ class ReplayKernelBase(LinuxKernelTest): terminates. """ - timeout = 90 + timeout = 120 KERNEL_COMMON_COMMAND_LINE = 'printk.time=1 panic=-1 ' def run_vm(self, kernel_path, kernel_command_line, console_pattern, diff --git a/tests/acceptance/tesseract_utils.py b/tests/acceptance/tesseract_utils.py new file mode 100644 index 0000000000..72cd9ab798 --- /dev/null +++ b/tests/acceptance/tesseract_utils.py @@ -0,0 +1,46 @@ +# ... +# +# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +import re +import logging + +from avocado.utils import process +from avocado.utils.path import find_command, CmdNotFoundError + +def tesseract_available(expected_version): + try: + find_command('tesseract') + except CmdNotFoundError: + return False + res = process.run('tesseract --version') + try: + version = res.stdout_text.split()[1] + except IndexError: + version = res.stderr_text.split()[1] + return int(version.split('.')[0]) == expected_version + + match = re.match(r'tesseract\s(\d)', res) + if match is None: + return False + # now this is guaranteed to be a digit + return int(match.groups()[0]) == expected_version + + +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): + console_logger = logging.getLogger('tesseract') + console_logger.debug(image_path) + if tesseract_version == 4: + tesseract_args += ' --oem 1' + proc = process.run("tesseract {} {} stdout".format(tesseract_args, + image_path)) + lines = [] + for line in proc.stdout_text.split('\n'): + sline = line.strip() + if len(sline): + console_logger.debug(sline) + lines += [sline] + return lines diff --git a/tests/acceptance/virtiofs_submounts.py b/tests/acceptance/virtiofs_submounts.py index 361e5990b6..949ca87a83 100644 --- a/tests/acceptance/virtiofs_submounts.py +++ b/tests/acceptance/virtiofs_submounts.py @@ -83,20 +83,21 @@ class VirtiofsSubmountsTest(BootLinux): command_line='info usernet') for line in res.split('\r\n'): match = \ - re.search(r'TCP.HOST_FORWARD.*127\.0\.0\.1\s*(\d+)\s+10\.', + re.search(r'TCP.HOST_FORWARD.*127\.0\.0\.1\s+(\d+)\s+10\.', line) if match is not None: - port = match[1] + port = int(match[1]) break self.assertIsNotNone(port) - self.log.debug('sshd listening on port: ' + port) + self.assertGreater(port, 0) + self.log.debug('sshd listening on port: %d', port) return port def ssh_connect(self, username, keyfile): self.ssh_logger = logging.getLogger('ssh') port = self.get_portfwd() - self.ssh_session = ssh.Session('127.0.0.1', port=int(port), + self.ssh_session = ssh.Session('127.0.0.1', port=port, user=username, key=keyfile) for i in range(10): try: @@ -105,7 +106,7 @@ class VirtiofsSubmountsTest(BootLinux): except: time.sleep(4) pass - self.fail('sshd timeout') + self.fail('ssh connection timeout') def ssh_command(self, command): self.ssh_logger.info(command) @@ -136,8 +137,7 @@ class VirtiofsSubmountsTest(BootLinux): return (stdout, stderr, ret) def set_up_shared_dir(self): - atwd = os.getenv('AVOCADO_TEST_WORKDIR') - self.shared_dir = os.path.join(atwd, 'virtiofs-shared') + self.shared_dir = os.path.join(self.workdir, 'virtiofs-shared') os.mkdir(self.shared_dir) @@ -234,10 +234,9 @@ class VirtiofsSubmountsTest(BootLinux): self.seed = self.params.get('seed') - atwd = os.getenv('AVOCADO_TEST_WORKDIR') - self.ssh_key = os.path.join(atwd, 'id_ed25519') + self.ssh_key = os.path.join(self.workdir, 'id_ed25519') - self.run(('ssh-keygen', '-t', 'ed25519', '-f', self.ssh_key)) + self.run(('ssh-keygen', '-N', '', '-t', 'ed25519', '-f', self.ssh_key)) pubkey = open(self.ssh_key + '.pub').read() @@ -249,7 +248,7 @@ class VirtiofsSubmountsTest(BootLinux): # Allow us to connect to SSH self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22', - '-device', 'e1000,netdev=vnet') + '-device', 'virtio-net,netdev=vnet') if not kvm_available(self.arch, self.qemu_bin): self.cancel(KVM_NOT_AVAILABLE) diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index 0779dab5b9..93b29ad823 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -1,6 +1,6 @@ # Makefile for Docker tests -.PHONY: docker docker-test docker-clean docker-image docker-qemu-src +.PHONY: docker docker-help docker-test docker-clean docker-image docker-qemu-src NULL := SPACE := $(NULL) # @@ -11,7 +11,7 @@ HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m)) DOCKER_SUFFIX := .docker DOCKER_FILES_DIR := $(SRC_PATH)/tests/docker/dockerfiles # we don't run tests on intermediate images (used as base by another image) -DOCKER_PARTIAL_IMAGES := debian10 debian11 debian-bootstrap +DOCKER_PARTIAL_IMAGES := debian10 debian11 debian-bootstrap empty DOCKER_IMAGES := $(sort $(notdir $(basename $(wildcard $(DOCKER_FILES_DIR)/*.docker)))) DOCKER_TARGETS := $(patsubst %,docker-image-%,$(DOCKER_IMAGES)) # Use a global constant ccache directory to speed up repetitive builds @@ -92,6 +92,24 @@ docker-binfmt-image-debian-%: $(DOCKER_FILES_DIR)/debian-bootstrap.docker { echo "You will need to build $(EXECUTABLE)"; exit 1;},\ "CHECK", "debian-$* exists")) +# These are test targets +USER_TCG_TARGETS=$(patsubst %-linux-user,qemu-%,$(filter %-linux-user,$(TARGET_DIRS))) +EXEC_COPY_TESTS=$(patsubst %,docker-exec-copy-test-%, $(USER_TCG_TARGETS)) + +$(EXEC_COPY_TESTS): docker-exec-copy-test-%: $(DOCKER_FILES_DIR)/empty.docker + $(call quiet-command, \ + $(DOCKER_SCRIPT) build -t qemu/exec-copy-test-$* -f $< \ + $(if $V,,--quiet) --no-cache \ + --include-executable=$* \ + --skip-binfmt, \ + "TEST","copy $* to container") + $(call quiet-command, \ + $(DOCKER_SCRIPT) run qemu/exec-copy-test-$* \ + /$* -version > tests/docker-exec-copy-test-$*.out, \ + "TEST","check $* works in container") + +docker-exec-copy-test: $(EXEC_COPY_TESTS) + endif # Enforce dependencies for composite images @@ -209,7 +227,7 @@ endif @echo ' before running the command.' @echo ' NETWORK=1 Enable virtual network interface with default backend.' @echo ' NETWORK=$$BACKEND Enable virtual network interface with $$BACKEND.' - @echo ' NOUSER Define to disable adding current user to containers passwd.' + @echo ' NOUSER=1 Define to disable adding current user to containers passwd.' @echo ' NOCACHE=1 Ignore cache when build images.' @echo ' EXECUTABLE=<path> Include executable in image.' @echo ' EXTRA_FILES="<path> [... <path>]"' @@ -218,6 +236,8 @@ endif @echo ' Specify which container engine to run.' @echo ' REGISTRY=url Cache builds from registry (default:$(DOCKER_REGISTRY))' +docker-help: docker + # This rule if for directly running against an arbitrary docker target. # It is called by the expanded docker targets (e.g. make # docker-test-foo@bar) which will do additional verification. diff --git a/tests/docker/docker.py b/tests/docker/docker.py index 884dfeb29c..d28df4c140 100755 --- a/tests/docker/docker.py +++ b/tests/docker/docker.py @@ -93,7 +93,7 @@ def _guess_engine_command(): commands_txt) -def _copy_with_mkdir(src, root_dir, sub_path='.'): +def _copy_with_mkdir(src, root_dir, sub_path='.', name=None): """Copy src into root_dir, creating sub_path as needed.""" dest_dir = os.path.normpath("%s/%s" % (root_dir, sub_path)) try: @@ -102,8 +102,13 @@ def _copy_with_mkdir(src, root_dir, sub_path='.'): # we can safely ignore already created directories pass - dest_file = "%s/%s" % (dest_dir, os.path.basename(src)) - copy(src, dest_file) + dest_file = "%s/%s" % (dest_dir, name if name else os.path.basename(src)) + + try: + copy(src, dest_file) + except FileNotFoundError: + print("Couldn't copy %s to %s" % (src, dest_file)) + pass def _get_so_libs(executable): @@ -120,7 +125,7 @@ def _get_so_libs(executable): search = ldd_re.search(line) if search: try: - libs.append(s.group(1)) + libs.append(search.group(1)) except IndexError: pass except subprocess.CalledProcessError: @@ -150,8 +155,9 @@ def _copy_binary_with_libs(src, bin_dest, dest_dir): if libs: for l in libs: so_path = os.path.dirname(l) + name = os.path.basename(l) real_l = os.path.realpath(l) - _copy_with_mkdir(real_l, dest_dir, so_path) + _copy_with_mkdir(real_l, dest_dir, so_path, name) def _check_binfmt_misc(executable): @@ -432,6 +438,9 @@ class BuildCommand(SubCommand): help="""Specify a binary that will be copied to the container together with all its dependent libraries""") + parser.add_argument("--skip-binfmt", + action="store_true", + help="""Skip binfmt entry check (used for testing)""") parser.add_argument("--extra-files", nargs='*', help="""Specify files that will be copied in the Docker image, fulfilling the ADD directive from the @@ -460,7 +469,9 @@ class BuildCommand(SubCommand): docker_dir = tempfile.mkdtemp(prefix="docker_build") # Validate binfmt_misc will work - if args.include_executable: + if args.skip_binfmt: + qpath = args.include_executable + elif args.include_executable: qpath, enabled = _check_binfmt_misc(args.include_executable) if not enabled: return 1 diff --git a/tests/docker/dockerfiles/empty.docker b/tests/docker/dockerfiles/empty.docker new file mode 100644 index 0000000000..9ba980f1a8 --- /dev/null +++ b/tests/docker/dockerfiles/empty.docker @@ -0,0 +1,8 @@ +# +# Empty Dockerfile +# + +FROM scratch + +# Add everything from the context into the container +ADD . / diff --git a/tests/qapi-schema/comments.out b/tests/qapi-schema/comments.out index 273f0f54e1..ce4f6a4f0f 100644 --- a/tests/qapi-schema/comments.out +++ b/tests/qapi-schema/comments.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out index 419284dae2..715b0bbc1a 100644 --- a/tests/qapi-schema/doc-good.out +++ b/tests/qapi-schema/doc-good.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/empty.out b/tests/qapi-schema/empty.out index 69666c39ad..3feb3f69d3 100644 --- a/tests/qapi-schema/empty.out +++ b/tests/qapi-schema/empty.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/event-case.out b/tests/qapi-schema/event-case.out index 42ae519656..9ae44052ac 100644 --- a/tests/qapi-schema/event-case.out +++ b/tests/qapi-schema/event-case.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/include-repetition.out b/tests/qapi-schema/include-repetition.out index 0b654ddebb..16dbd9b819 100644 --- a/tests/qapi-schema/include-repetition.out +++ b/tests/qapi-schema/include-repetition.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/include-simple.out b/tests/qapi-schema/include-simple.out index 061f81e509..48e923bfbc 100644 --- a/tests/qapi-schema/include-simple.out +++ b/tests/qapi-schema/include-simple.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/indented-expr.out b/tests/qapi-schema/indented-expr.out index 04356775cd..6a30ded3fa 100644 --- a/tests/qapi-schema/indented-expr.out +++ b/tests/qapi-schema/indented-expr.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qapi-schema/qapi-schema-test.out b/tests/qapi-schema/qapi-schema-test.out index 8868ca0dca..3b1387d9f1 100644 --- a/tests/qapi-schema/qapi-schema-test.out +++ b/tests/qapi-schema/qapi-schema-test.out @@ -1,4 +1,4 @@ -module None +module ./builtin object q_empty enum QType prefix QTYPE diff --git a/tests/qemu-iotests/267.out b/tests/qemu-iotests/267.out index 27471ffae8..7176e376e1 100644 --- a/tests/qemu-iotests/267.out +++ b/tests/qemu-iotests/267.out @@ -6,11 +6,11 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 Testing: QEMU X.Y.Z monitor - type 'help' for more information (qemu) savevm snap0 -Error: No block device can accept snapshots +Error: no block device can store vmstate for snapshot (qemu) info snapshots -No available block device supports snapshots +no block device can store vmstate for snapshot (qemu) loadvm snap0 -Error: No block device supports snapshots +Error: no block device can store vmstate for snapshot (qemu) quit @@ -22,7 +22,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) savevm snap0 Error: Device 'none0' is writable but does not support snapshots (qemu) info snapshots -No available block device supports snapshots +no block device can store vmstate for snapshot (qemu) loadvm snap0 Error: Device 'none0' is writable but does not support snapshots (qemu) quit @@ -58,7 +58,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) savevm snap0 Error: Device 'virtio0' is writable but does not support snapshots (qemu) info snapshots -No available block device supports snapshots +no block device can store vmstate for snapshot (qemu) loadvm snap0 Error: Device 'virtio0' is writable but does not support snapshots (qemu) quit @@ -83,7 +83,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) savevm snap0 Error: Device 'file' is writable but does not support snapshots (qemu) info snapshots -No available block device supports snapshots +no block device can store vmstate for snapshot (qemu) loadvm snap0 Error: Device 'file' is writable but does not support snapshots (qemu) quit diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu index ef105dfc39..0fc52d20d7 100644 --- a/tests/qemu-iotests/common.qemu +++ b/tests/qemu-iotests/common.qemu @@ -53,6 +53,15 @@ _in_fd=4 # If $mismatch_only is set, only non-matching responses will # be echoed. # +# If $capture_events is non-empty, then any QMP event names it lists +# will not be echoed out, but instead collected in the $QEMU_EVENTS +# variable. The _wait_event function can later be used to receive +# the cached events. +# +# If $only_capture_events is set to anything but an empty string, +# then an error will be raised if a QMP message is seen which is +# not an event listed in $capture_events. +# # If $success_or_failure is set, the meaning of the arguments is # changed as follows: # $2: A string to search for in the response; if found, this indicates @@ -78,6 +87,31 @@ _timed_wait_for() QEMU_STATUS[$h]=0 while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]} do + if [ -n "$capture_events" ]; then + capture=0 + local evname + for evname in $capture_events + do + case ${resp} in + *\"event\":\ \"${evname}\"* ) capture=1 ;; + esac + done + if [ $capture = 1 ]; + then + ev=$(echo "${resp}" | tr -d '\r' | tr % .) + QEMU_EVENTS="${QEMU_EVENTS:+${QEMU_EVENTS}%}${ev}" + if [ -n "$only_capture_events" ]; then + return + else + continue + fi + fi + fi + if [ -n "$only_capture_events" ]; then + echo "Only expected $capture_events but got ${resp}" + exit 1 + fi + if [ -z "${silent}" ] && [ -z "${mismatch_only}" ]; then echo "${resp}" | _filter_testdir | _filter_qemu \ | _filter_qemu_io | _filter_qmp | _filter_hmp @@ -172,12 +206,82 @@ _send_qemu_cmd() let count--; done if [ ${QEMU_STATUS[$h]} -ne 0 ] && [ -z "${qemu_error_no_exit}" ]; then - echo "Timeout waiting for ${1} on handle ${h}" + echo "Timeout waiting for command ${1} response on handle ${h}" exit 1 #Timeout means the test failed fi } +# Check event cache for a named QMP event +# +# Input parameters: +# $1: Name of the QMP event to check for +# +# Checks if the named QMP event that was previously captured +# into $QEMU_EVENTS. When matched, the QMP event will be echoed +# and the $matched variable set to 1. +# +# _wait_event is more suitable for test usage in most cases +_check_cached_events() +{ + local evname=${1} + + local match="\"event\": \"$evname\"" + + matched=0 + if [ -n "$QEMU_EVENTS" ]; then + CURRENT_QEMU_EVENTS=$QEMU_EVENTS + QEMU_EVENTS= + old_IFS=$IFS + IFS="%" + for ev in $CURRENT_QEMU_EVENTS + do + grep -q "$match" < <(echo "${ev}") + if [ $? -eq 0 ] && [ $matched = 0 ]; then + echo "${ev}" | _filter_testdir | _filter_qemu \ + | _filter_qemu_io | _filter_qmp | _filter_hmp + matched=1 + else + QEMU_EVENTS="${QEMU_EVENTS:+${QEMU_EVENTS}%}${ev}" + fi + done + IFS=$old_IFS + fi +} + +# Wait for a named QMP event +# +# Input parameters: +# $1: QEMU handle to use +# $2: Name of the QMP event to wait for +# +# Checks if the named QMP even was previously captured +# into $QEMU_EVENTS. If none are present, then waits for the +# event to arrive on the QMP channel. When matched, the QMP +# event will be echoed +_wait_event() +{ + local h=${1} + local evname=${2} + + while true + do + _check_cached_events $evname + + if [ $matched = 1 ]; + then + return + fi + + only_capture_events=1 qemu_error_no_exit=1 _timed_wait_for ${h} + + if [ ${QEMU_STATUS[$h]} -ne 0 ] ; then + echo "Timeout waiting for event ${evname} on handle ${h}" + exit 1 #Timeout means the test failed + fi + done +} + # Launch a QEMU process. # # Input parameters: diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 297acf9b6a..77c37e8312 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -109,8 +109,14 @@ peek_file_raw() dd if="$1" bs=1 skip="$2" count="$3" status=none } - -if ! . ./common.config +config=common.config +test -f $config || config=../common.config +if ! test -f $config +then + echo "$0: failed to find common.config" + exit 1 +fi +if ! . $config then echo "$0: failed to source common.config" exit 1 diff --git a/tests/tcg/Makefile.qemu b/tests/tcg/Makefile.qemu index c096c611a2..a56564660c 100644 --- a/tests/tcg/Makefile.qemu +++ b/tests/tcg/Makefile.qemu @@ -90,11 +90,11 @@ run-guest-tests: guest-tests else guest-tests: - $(call quiet-command, /bin/true, "BUILD", \ + $(call quiet-command, true, "BUILD", \ "$(TARGET) guest-tests SKIPPED") run-guest-tests: - $(call quiet-command, /bin/true, "RUN", \ + $(call quiet-command, true, "RUN", \ "tests for $(TARGET) SKIPPED") endif diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 1dd0f64d23..abbdb2e126 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -63,8 +63,11 @@ run-gdbstub-qxfer-auxv-read: sha1 --bin $< --test $(MULTIARCH_SRC)/gdbstub/test-qxfer-auxv-read.py, \ "basic gdbstub qXfer:auxv:read support") -EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read +else +run-gdbstub-%: + $(call skip-test, "gdbstub test $*", "need working gdb") endif +EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read # Update TESTS diff --git a/util/meson.build b/util/meson.build index 3eccdbe596..984fba965f 100644 --- a/util/meson.build +++ b/util/meson.build @@ -52,6 +52,7 @@ if have_system util_ss.add(files('crc-ccitt.c')) util_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus.c'), gio]) util_ss.add(files('yank.c')) + util_ss.add(when: 'CONFIG_LINUX', if_true: files('userfaultfd.c')) endif if have_block diff --git a/util/trace-events b/util/trace-events index 61e0d4bcdf..bac0924899 100644 --- a/util/trace-events +++ b/util/trace-events @@ -91,3 +91,12 @@ qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uin qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")" qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32 qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" + +#userfaultfd.c +uffd_query_features_nosys(int err) "errno: %i" +uffd_query_features_api_failed(int err) "errno: %i" +uffd_create_fd_nosys(int err) "errno: %i" +uffd_create_fd_api_failed(int err) "errno: %i" +uffd_create_fd_api_noioctl(uint64_t ioctl_req, uint64_t ioctl_supp) "ioctl_req: 0x%" PRIx64 "ioctl_supp: 0x%" PRIx64 +uffd_register_memory_failed(void *addr, uint64_t length, uint64_t mode, int err) "addr: %p length: %" PRIu64 " mode: 0x%" PRIx64 " errno: %i" +uffd_unregister_memory_failed(void *addr, uint64_t length, int err) "addr: %p length: %" PRIu64 " errno: %i" diff --git a/util/userfaultfd.c b/util/userfaultfd.c new file mode 100644 index 0000000000..f1cd6af2b1 --- /dev/null +++ b/util/userfaultfd.c @@ -0,0 +1,345 @@ +/* + * Linux UFFD-WP support + * + * Copyright Virtuozzo GmbH, 2020 + * + * Authors: + * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/bitops.h" +#include "qemu/error-report.h" +#include "qemu/userfaultfd.h" +#include "trace.h" +#include <poll.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> + +/** + * uffd_query_features: query UFFD features + * + * Returns: 0 on success, negative value in case of an error + * + * @features: parameter to receive 'uffdio_api.features' + */ +int uffd_query_features(uint64_t *features) +{ + int uffd_fd; + struct uffdio_api api_struct = { 0 }; + int ret = -1; + + uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); + if (uffd_fd < 0) { + trace_uffd_query_features_nosys(errno); + return -1; + } + + api_struct.api = UFFD_API; + api_struct.features = 0; + + if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) { + trace_uffd_query_features_api_failed(errno); + goto out; + } + *features = api_struct.features; + ret = 0; + +out: + close(uffd_fd); + return ret; +} + +/** + * uffd_create_fd: create UFFD file descriptor + * + * Returns non-negative file descriptor or negative value in case of an error + * + * @features: UFFD features to request + * @non_blocking: create UFFD file descriptor for non-blocking operation + */ +int uffd_create_fd(uint64_t features, bool non_blocking) +{ + int uffd_fd; + int flags; + struct uffdio_api api_struct = { 0 }; + uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); + + flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); + uffd_fd = syscall(__NR_userfaultfd, flags); + if (uffd_fd < 0) { + trace_uffd_create_fd_nosys(errno); + return -1; + } + + api_struct.api = UFFD_API; + api_struct.features = features; + if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) { + trace_uffd_create_fd_api_failed(errno); + goto fail; + } + if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) { + trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls); + goto fail; + } + + return uffd_fd; + +fail: + close(uffd_fd); + return -1; +} + +/** + * uffd_close_fd: close UFFD file descriptor + * + * @uffd_fd: UFFD file descriptor + */ +void uffd_close_fd(int uffd_fd) +{ + assert(uffd_fd >= 0); + close(uffd_fd); +} + +/** + * uffd_register_memory: register memory range via UFFD-IO + * + * Returns 0 in case of success, negative value in case of an error + * + * @uffd_fd: UFFD file descriptor + * @addr: base address of memory range + * @length: length of memory range + * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...) + * @ioctls: optional pointer to receive supported IOCTL mask + */ +int uffd_register_memory(int uffd_fd, void *addr, uint64_t length, + uint64_t mode, uint64_t *ioctls) +{ + struct uffdio_register uffd_register; + + uffd_register.range.start = (uintptr_t) addr; + uffd_register.range.len = length; + uffd_register.mode = mode; + + if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) { + trace_uffd_register_memory_failed(addr, length, mode, errno); + return -1; + } + if (ioctls) { + *ioctls = uffd_register.ioctls; + } + + return 0; +} + +/** + * uffd_unregister_memory: un-register memory range with UFFD-IO + * + * Returns 0 in case of success, negative value in case of an error + * + * @uffd_fd: UFFD file descriptor + * @addr: base address of memory range + * @length: length of memory range + */ +int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length) +{ + struct uffdio_range uffd_range; + + uffd_range.start = (uintptr_t) addr; + uffd_range.len = length; + + if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) { + trace_uffd_unregister_memory_failed(addr, length, errno); + return -1; + } + + return 0; +} + +/** + * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO + * + * Returns 0 on success, negative value in case of error + * + * @uffd_fd: UFFD file descriptor + * @addr: base address of memory range + * @length: length of memory range + * @wp: write-protect/unprotect + * @dont_wake: do not wake threads waiting on wr-protected page + */ +int uffd_change_protection(int uffd_fd, void *addr, uint64_t length, + bool wp, bool dont_wake) +{ + struct uffdio_writeprotect uffd_writeprotect; + + uffd_writeprotect.range.start = (uintptr_t) addr; + uffd_writeprotect.range.len = length; + if (!wp && dont_wake) { + /* DONTWAKE is meaningful only on protection release */ + uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE; + } else { + uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0); + } + + if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { + error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64 + " mode=%" PRIx64 " errno=%i", addr, length, + (uint64_t) uffd_writeprotect.mode, errno); + return -1; + } + + return 0; +} + +/** + * uffd_copy_page: copy range of pages to destination via UFFD-IO + * + * Copy range of source pages to the destination to resolve + * missing page fault somewhere in the destination range. + * + * Returns 0 on success, negative value in case of an error + * + * @uffd_fd: UFFD file descriptor + * @dst_addr: destination base address + * @src_addr: source base address + * @length: length of the range to copy + * @dont_wake: do not wake threads waiting on missing page + */ +int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr, + uint64_t length, bool dont_wake) +{ + struct uffdio_copy uffd_copy; + + uffd_copy.dst = (uintptr_t) dst_addr; + uffd_copy.src = (uintptr_t) src_addr; + uffd_copy.len = length; + uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0; + + if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) { + error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64 + " mode=%" PRIx64 " errno=%i", dst_addr, src_addr, + length, (uint64_t) uffd_copy.mode, errno); + return -1; + } + + return 0; +} + +/** + * uffd_zero_page: fill range of pages with zeroes via UFFD-IO + * + * Fill range pages with zeroes to resolve missing page fault within the range. + * + * Returns 0 on success, negative value in case of an error + * + * @uffd_fd: UFFD file descriptor + * @addr: base address + * @length: length of the range to fill with zeroes + * @dont_wake: do not wake threads waiting on missing page + */ +int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake) +{ + struct uffdio_zeropage uffd_zeropage; + + uffd_zeropage.range.start = (uintptr_t) addr; + uffd_zeropage.range.len = length; + uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0; + + if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) { + error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64 + " mode=%" PRIx64 " errno=%i", addr, length, + (uint64_t) uffd_zeropage.mode, errno); + return -1; + } + + return 0; +} + +/** + * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution + * + * Wake up threads waiting on any page/pages from the designated range. + * The main use case is when during some period, page faults are resolved + * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits + * for the whole memory range are satisfied in a single call to uffd_wakeup(). + * + * Returns 0 on success, negative value in case of an error + * + * @uffd_fd: UFFD file descriptor + * @addr: base address + * @length: length of the range + */ +int uffd_wakeup(int uffd_fd, void *addr, uint64_t length) +{ + struct uffdio_range uffd_range; + + uffd_range.start = (uintptr_t) addr; + uffd_range.len = length; + + if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) { + error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i", + addr, length, errno); + return -1; + } + + return 0; +} + +/** + * uffd_read_events: read pending UFFD events + * + * Returns number of fetched messages, 0 if non is available or + * negative value in case of an error + * + * @uffd_fd: UFFD file descriptor + * @msgs: pointer to message buffer + * @count: number of messages that can fit in the buffer + */ +int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count) +{ + ssize_t res; + do { + res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg)); + } while (res < 0 && errno == EINTR); + + if ((res < 0 && errno == EAGAIN)) { + return 0; + } + if (res < 0) { + error_report("uffd_read_events() failed: errno=%i", errno); + return -1; + } + + return (int) (res / sizeof(struct uffd_msg)); +} + +/** + * uffd_poll_events: poll UFFD file descriptor for read + * + * Returns true if events are available for read, false otherwise + * + * @uffd_fd: UFFD file descriptor + * @tmo: timeout value + */ +bool uffd_poll_events(int uffd_fd, int tmo) +{ + int res; + struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 }; + + do { + res = poll(&poll_fd, 1, tmo); + } while (res < 0 && errno == EINTR); + + if (res == 0) { + return false; + } + if (res < 0) { + error_report("uffd_poll_events() failed: errno=%i", errno); + return false; + } + + return (poll_fd.revents & POLLIN) != 0; +} |