aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Roche <william.roche@oracle.com>2024-01-30 19:06:40 +0000
committerPeter Xu <peterx@redhat.com>2024-02-05 14:41:58 +0800
commit06152b89db64bc5ccec1e54576706ba891654df9 (patch)
treedf0fe21a342e770e23e964332630f18aa0a0221a
parent39a6e4f87e7b75a45b08d6dc8b8b7c2954c87440 (diff)
migration: prevent migration when VM has poisoned memory
A memory page poisoned from the hypervisor level is no longer readable. The migration of a VM will crash Qemu when it tries to read the memory address space and stumbles on the poisoned page with a similar stack trace: Program terminated with signal SIGBUS, Bus error. #0 _mm256_loadu_si256 #1 buffer_zero_avx2 #2 select_accel_fn #3 buffer_is_zero #4 save_zero_page #5 ram_save_target_page_legacy #6 ram_save_host_page #7 ram_find_and_save_block #8 ram_save_iterate #9 qemu_savevm_state_iterate #10 migration_iteration_run #11 migration_thread #12 qemu_thread_start To avoid this VM crash during the migration, prevent the migration when a known hardware poison exists on the VM. Signed-off-by: William Roche <william.roche@oracle.com> Link: https://lore.kernel.org/r/20240130190640.139364-2-william.roche@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
-rw-r--r--accel/kvm/kvm-all.c10
-rw-r--r--accel/stubs/kvm-stub.c5
-rw-r--r--include/sysemu/kvm.h6
-rw-r--r--migration/migration.c7
4 files changed, 28 insertions, 0 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 49e755ec4a..a8cecd040e 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1119,6 +1119,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
return ret;
}
+/*
+ * We track the poisoned pages to be able to:
+ * - replace them on VM reset
+ * - block a migration for a VM with a poisoned page
+ */
typedef struct HWPoisonPage {
ram_addr_t ram_addr;
QLIST_ENTRY(HWPoisonPage) list;
@@ -1152,6 +1157,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
}
+bool kvm_hwpoisoned_mem(void)
+{
+ return !QLIST_EMPTY(&hwpoison_page_list);
+}
+
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 1b37d9a302..ca38172884 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void)
{
return 0;
}
+
+bool kvm_hwpoisoned_mem(void)
+{
+ return false;
+}
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index d614878164..fad9a7e8ff 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -538,4 +538,10 @@ bool kvm_arch_cpu_check_are_resettable(void);
bool kvm_dirty_ring_enabled(void);
uint32_t kvm_dirty_ring_size(void);
+
+/**
+ * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
+ * reported for the VM.
+ */
+bool kvm_hwpoisoned_mem(void);
#endif
diff --git a/migration/migration.c b/migration/migration.c
index d5f705ceef..b574e66f7b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -67,6 +67,7 @@
#include "options.h"
#include "sysemu/dirtylimit.h"
#include "qemu/sockets.h"
+#include "sysemu/kvm.h"
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -1906,6 +1907,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
+ if (kvm_hwpoisoned_mem()) {
+ error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
+ "please reboot the vm and try again");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}