diff options
author | William Roche <william.roche@oracle.com> | 2024-01-30 19:06:40 +0000 |
---|---|---|
committer | Peter Xu <peterx@redhat.com> | 2024-02-05 14:41:58 +0800 |
commit | 06152b89db64bc5ccec1e54576706ba891654df9 (patch) | |
tree | df0fe21a342e770e23e964332630f18aa0a0221a | |
parent | 39a6e4f87e7b75a45b08d6dc8b8b7c2954c87440 (diff) |
migration: prevent migration when VM has poisoned memory
A memory page poisoned from the hypervisor level is no longer readable.
The migration of a VM will crash Qemu when it tries to read the
memory address space and stumbles on the poisoned page with a similar
stack trace:
Program terminated with signal SIGBUS, Bus error.
#0 _mm256_loadu_si256
#1 buffer_zero_avx2
#2 select_accel_fn
#3 buffer_is_zero
#4 save_zero_page
#5 ram_save_target_page_legacy
#6 ram_save_host_page
#7 ram_find_and_save_block
#8 ram_save_iterate
#9 qemu_savevm_state_iterate
#10 migration_iteration_run
#11 migration_thread
#12 qemu_thread_start
To avoid this VM crash during the migration, prevent the migration
when a known hardware poison exists on the VM.
Signed-off-by: William Roche <william.roche@oracle.com>
Link: https://lore.kernel.org/r/20240130190640.139364-2-william.roche@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
-rw-r--r-- | accel/kvm/kvm-all.c | 10 | ||||
-rw-r--r-- | accel/stubs/kvm-stub.c | 5 | ||||
-rw-r--r-- | include/sysemu/kvm.h | 6 | ||||
-rw-r--r-- | migration/migration.c | 7 |
4 files changed, 28 insertions, 0 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 49e755ec4a..a8cecd040e 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1119,6 +1119,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension) return ret; } +/* + * We track the poisoned pages to be able to: + * - replace them on VM reset + * - block a migration for a VM with a poisoned page + */ typedef struct HWPoisonPage { ram_addr_t ram_addr; QLIST_ENTRY(HWPoisonPage) list; @@ -1152,6 +1157,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr) QLIST_INSERT_HEAD(&hwpoison_page_list, page, list); } +bool kvm_hwpoisoned_mem(void) +{ + return !QLIST_EMPTY(&hwpoison_page_list); +} + static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size) { #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 1b37d9a302..ca38172884 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void) { return 0; } + +bool kvm_hwpoisoned_mem(void) +{ + return false; +} diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index d614878164..fad9a7e8ff 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -538,4 +538,10 @@ bool kvm_arch_cpu_check_are_resettable(void); bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +/** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. + */ +bool kvm_hwpoisoned_mem(void); #endif diff --git a/migration/migration.c b/migration/migration.c index d5f705ceef..b574e66f7b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -67,6 +67,7 @@ #include "options.h" #include "sysemu/dirtylimit.h" #include "qemu/sockets.h" +#include "sysemu/kvm.h" static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -1906,6 +1907,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (kvm_hwpoisoned_mem()) { + error_setg(errp, "Can't migrate this vm with hardware poisoned memory, " + "please reboot the vm and try again"); + return false; + } + if (migration_is_blocked(errp)) { return false; } |