aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-08-21 10:23:53 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-08-21 10:23:53 +0100
commit55f4e79d794d94b2ab22b0dc99c6b05abc628656 (patch)
tree67f9e9097c5aec4b238a0d69f59ff4f718d1a57d
parent90b9508e211360f243a32474d67a4d49edb80d6a (diff)
parent56eb90af39abf66c0e80588a9f50c31e7df7320b (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc: fixes This includes nvdimm persistence fixes queued before the release. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Mon 20 Aug 2018 11:38:11 BST # gpg: using RSA key 281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: migration/ram: ensure write persistence on loading all data to PMEM. migration/ram: Add check and info message to nvdimm post copy. mem/nvdimm: ensure write persistence to PMEM in label emulation hostmem-file: add the 'pmem' option configure: add libpmem support memory, exec: switch file ram allocation functions to 'flags' parameters memory, exec: Expose all memory block related flags. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--backends/hostmem-file.c44
-rwxr-xr-xconfigure29
-rw-r--r--docs/nvdimm.txt22
-rw-r--r--exec.c38
-rw-r--r--hw/mem/nvdimm.c9
-rw-r--r--include/exec/memory.h31
-rw-r--r--include/exec/ram_addr.h28
-rw-r--r--include/qemu/pmem.h36
-rw-r--r--memory.c8
-rw-r--r--migration/ram.c17
-rw-r--r--numa.c2
-rw-r--r--qemu-options.hx7
12 files changed, 235 insertions, 36 deletions
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 134b08d63a..2476dcb435 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -12,6 +12,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
+#include "qemu/error-report.h"
#include "sysemu/hostmem.h"
#include "sysemu/sysemu.h"
#include "qom/object_interfaces.h"
@@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
struct HostMemoryBackendFile {
HostMemoryBackend parent_obj;
- bool discard_data;
char *mem_path;
uint64_t align;
+ bool discard_data;
+ bool is_pmem;
};
static void
@@ -58,7 +60,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
path = object_get_canonical_path(OBJECT(backend));
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
path,
- backend->size, fb->align, backend->share,
+ backend->size, fb->align,
+ (backend->share ? RAM_SHARED : 0) |
+ (fb->is_pmem ? RAM_PMEM : 0),
fb->mem_path, errp);
g_free(path);
}
@@ -130,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
error_propagate(errp, local_err);
}
+static bool file_memory_backend_get_pmem(Object *o, Error **errp)
+{
+ return MEMORY_BACKEND_FILE(o)->is_pmem;
+}
+
+static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(errp, "cannot change property 'pmem' of %s '%s'",
+ object_get_typename(o),
+ object_get_canonical_path_component(o));
+ return;
+ }
+
+#ifndef CONFIG_LIBPMEM
+ if (value) {
+ Error *local_err = NULL;
+ error_setg(&local_err,
+ "Lack of libpmem support while setting the 'pmem=on'"
+ " of %s '%s'. We can't ensure data persistence.",
+ object_get_typename(o),
+ object_get_canonical_path_component(o));
+ error_propagate(errp, local_err);
+ return;
+ }
+#endif
+
+ fb->is_pmem = value;
+}
+
static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -161,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
file_memory_backend_get_align,
file_memory_backend_set_align,
NULL, NULL, &error_abort);
+ object_class_property_add_bool(oc, "pmem",
+ file_memory_backend_get_pmem, file_memory_backend_set_pmem,
+ &error_abort);
}
static void file_backend_instance_finalize(Object *o)
diff --git a/configure b/configure
index 7d9a63636c..e7bddc04b0 100755
--- a/configure
+++ b/configure
@@ -476,6 +476,7 @@ vxhs=""
libxml2=""
docker="no"
debug_mutex="no"
+libpmem=""
# cross compilers defaults, can be overridden with --cross-cc-ARCH
cross_cc_aarch64="aarch64-linux-gnu-gcc"
@@ -1440,6 +1441,10 @@ for opt do
;;
--disable-debug-mutex) debug_mutex=no
;;
+ --enable-libpmem) libpmem=yes
+ ;;
+ --disable-libpmem) libpmem=no
+ ;;
*)
echo "ERROR: unknown option $opt"
echo "Try '$0 --help' for more information"
@@ -1716,6 +1721,7 @@ disabled with --disable-FEATURE, default is enabled if available:
vhost-user vhost-user support
capstone capstone disassembler support
debug-mutex mutex debugging support
+ libpmem libpmem support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -5594,6 +5600,24 @@ if has "docker"; then
fi
##########################################
+# check for libpmem
+
+if test "$libpmem" != "no"; then
+ if $pkg_config --exists "libpmem"; then
+ libpmem="yes"
+ libpmem_libs=$($pkg_config --libs libpmem)
+ libpmem_cflags=$($pkg_config --cflags libpmem)
+ libs_softmmu="$libs_softmmu $libpmem_libs"
+ QEMU_CFLAGS="$QEMU_CFLAGS $libpmem_cflags"
+ else
+ if test "$libpmem" = "yes" ; then
+ feature_not_found "libpmem" "Install nvml or pmdk"
+ fi
+ libpmem="no"
+ fi
+fi
+
+##########################################
# End of CC checks
# After here, no more $cc or $ld runs
@@ -6059,6 +6083,7 @@ echo "replication support $replication"
echo "VxHS block device $vxhs"
echo "capstone $capstone"
echo "docker $docker"
+echo "libpmem support $libpmem"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -6816,6 +6841,10 @@ if test "$vxhs" = "yes" ; then
echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
fi
+if test "$libpmem" = "yes" ; then
+ echo "CONFIG_LIBPMEM=y" >> $config_host_mak
+fi
+
if test "$tcg_interpreter" = "yes"; then
QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
elif test "$ARCH" = "sparc64" ; then
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 24b443b655..5f158a6170 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -173,3 +173,25 @@ There are currently two valid values for this option:
the NVDIMMs in the event of power loss. This implies that the
platform also supports flushing dirty data through the memory
controller on power loss.
+
+If the vNVDIMM backend is in host persistent memory that can be accessed in
+SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
+the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
+is built with libpmem [2] support (configured with --enable-libpmem), QEMU
+will take necessary operations to guarantee the persistence of its own writes
+to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
+If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
+a "lack of libpmem support" message to ensure the persistence is available.
+For example, if we want to ensure the persistence for some backend file,
+use the QEMU command line:
+
+ -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
+
+References
+----------
+
+[1] NVM Programming Model (NPM)
+ Version 1.2
+ https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
+[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
+ http://pmem.io/pmdk/
diff --git a/exec.c b/exec.c
index e7be0761c2..6826c8337d 100644
--- a/exec.c
+++ b/exec.c
@@ -87,26 +87,6 @@ AddressSpace address_space_memory;
MemoryRegion io_mem_rom, io_mem_notdirty;
static MemoryRegion io_mem_unassigned;
-
-/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
-#define RAM_PREALLOC (1 << 0)
-
-/* RAM is mmap-ed with MAP_SHARED */
-#define RAM_SHARED (1 << 1)
-
-/* Only a portion of RAM (used_length) is actually used, and migrated.
- * This used_length size can change across reboots.
- */
-#define RAM_RESIZEABLE (1 << 2)
-
-/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
- * zero the page and wake waiting processes.
- * (Set during postcopy)
- */
-#define RAM_UF_ZEROPAGE (1 << 3)
-
-/* RAM can be migrated */
-#define RAM_MIGRATABLE (1 << 4)
#endif
#ifdef TARGET_PAGE_BITS_VARY
@@ -2252,13 +2232,16 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
#ifdef __linux__
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
- bool share, int fd,
+ uint32_t ram_flags, int fd,
Error **errp)
{
RAMBlock *new_block;
Error *local_err = NULL;
int64_t file_size;
+ /* Just support these ram flags by now. */
+ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
+
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
return NULL;
@@ -2294,14 +2277,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
new_block->mr = mr;
new_block->used_length = size;
new_block->max_length = size;
- new_block->flags = share ? RAM_SHARED : 0;
+ new_block->flags = ram_flags;
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp);
if (!new_block->host) {
g_free(new_block);
return NULL;
}
- ram_block_add(new_block, &local_err, share);
+ ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED);
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
@@ -2313,7 +2296,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
- bool share, const char *mem_path,
+ uint32_t ram_flags, const char *mem_path,
Error **errp)
{
int fd;
@@ -2325,7 +2308,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
return NULL;
}
- block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+ block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp);
if (!block) {
if (created) {
unlink(mem_path);
@@ -4086,6 +4069,11 @@ err:
return ret;
}
+bool ramblock_is_pmem(RAMBlock *rb)
+{
+ return rb->flags & RAM_PMEM;
+}
+
#endif
void page_size_init(void)
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 021d1c3997..1c6674c4ed 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/pmem.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "hw/mem/nvdimm.h"
@@ -164,11 +165,17 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
{
MemoryRegion *mr;
PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+ bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem),
+ "pmem", NULL);
uint64_t backend_offset;
nvdimm_validate_rw_label_data(nvdimm, size, offset);
- memcpy(nvdimm->label_data + offset, buf, size);
+ if (!is_pmem) {
+ memcpy(nvdimm->label_data + offset, buf, size);
+ } else {
+ pmem_memcpy_persist(nvdimm->label_data + offset, buf, size);
+ }
mr = host_memory_backend_get_memory(dimm->hostmem);
backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 6863656182..eb4f2fb249 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -103,6 +103,29 @@ struct IOMMUNotifier {
};
typedef struct IOMMUNotifier IOMMUNotifier;
+/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
+#define RAM_PREALLOC (1 << 0)
+
+/* RAM is mmap-ed with MAP_SHARED */
+#define RAM_SHARED (1 << 1)
+
+/* Only a portion of RAM (used_length) is actually used, and migrated.
+ * This used_length size can change across reboots.
+ */
+#define RAM_RESIZEABLE (1 << 2)
+
+/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
+ * zero the page and wake waiting processes.
+ * (Set during postcopy)
+ */
+#define RAM_UF_ZEROPAGE (1 << 3)
+
+/* RAM can be migrated */
+#define RAM_MIGRATABLE (1 << 4)
+
+/* RAM is a persistent kind memory */
+#define RAM_PMEM (1 << 5)
+
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
hwaddr start, hwaddr end,
@@ -611,6 +634,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
void *host),
Error **errp);
#ifdef __linux__
+
/**
* memory_region_init_ram_from_file: Initialize RAM memory region with a
* mmap-ed backend.
@@ -622,7 +646,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
* @size: size of the region.
* @align: alignment of the region base address; if 0, the default alignment
* (getpagesize()) will be used.
- * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @ram_flags: Memory region features:
+ * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
+ * - RAM_PMEM: the memory is persistent memory
+ * Other bits are ignored now.
* @path: the path in which to allocate the RAM.
* @errp: pointer to Error*, to store an error if it happens.
*
@@ -634,7 +661,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
const char *name,
uint64_t size,
uint64_t align,
- bool share,
+ uint32_t ram_flags,
const char *path,
Error **errp);
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index cf4ce06248..3abb639056 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -70,13 +70,37 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
return host_addr_offset >> TARGET_PAGE_BITS;
}
+bool ramblock_is_pmem(RAMBlock *rb);
+
long qemu_getrampagesize(void);
+
+/**
+ * qemu_ram_alloc_from_file,
+ * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing
+ * file or device
+ *
+ * Parameters:
+ * @size: the size in bytes of the ram block
+ * @mr: the memory region where the ram block is
+ * @ram_flags: specify the properties of the ram block, which can be one
+ * or bit-or of following values
+ * - RAM_SHARED: mmap the backing file or device with MAP_SHARED
+ * - RAM_PMEM: the backend @mem_path or @fd is persistent memory
+ * Other bits are ignored.
+ * @mem_path or @fd: specify the backing file or device
+ * @errp: pointer to Error*, to store an error if it happens
+ *
+ * Return:
+ * On success, return a pointer to the ram block.
+ * On failure, return NULL.
+ */
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
- bool share, const char *mem_path,
+ uint32_t ram_flags, const char *mem_path,
Error **errp);
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
- bool share, int fd,
+ uint32_t ram_flags, int fd,
Error **errp);
+
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp);
RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
new file mode 100644
index 0000000000..dfb6d0da62
--- /dev/null
+++ b/include/qemu/pmem.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU header file for libpmem.
+ *
+ * Copyright (c) 2018 Intel Corporation.
+ *
+ * Author: Haozhong Zhang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_PMEM_H
+#define QEMU_PMEM_H
+
+#ifdef CONFIG_LIBPMEM
+#include <libpmem.h>
+#else /* !CONFIG_LIBPMEM */
+
+static inline void *
+pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
+{
+ /* If 'pmem' option is 'on', we should always have libpmem support,
+ or qemu will report a error and exit, never come here. */
+ g_assert_not_reached();
+ return NULL;
+}
+
+static inline void
+pmem_persist(const void *addr, size_t len)
+{
+ g_assert_not_reached();
+}
+
+#endif /* CONFIG_LIBPMEM */
+
+#endif /* !QEMU_PMEM_H */
diff --git a/memory.c b/memory.c
index 8b44672c13..9b73892768 100644
--- a/memory.c
+++ b/memory.c
@@ -1551,7 +1551,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
const char *name,
uint64_t size,
uint64_t align,
- bool share,
+ uint32_t ram_flags,
const char *path,
Error **errp)
{
@@ -1560,7 +1560,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->align = align;
- mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
+ mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
@@ -1576,7 +1576,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
- mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+ mr->ram_block = qemu_ram_alloc_from_fd(size, mr,
+ share ? RAM_SHARED : 0,
+ fd, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
#endif
diff --git a/migration/ram.c b/migration/ram.c
index 24dea2730c..fa79d0a5b9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -33,6 +33,7 @@
#include "qemu/bitops.h"
#include "qemu/bitmap.h"
#include "qemu/main-loop.h"
+#include "qemu/pmem.h"
#include "xbzrle.h"
#include "ram.h"
#include "migration.h"
@@ -3547,6 +3548,13 @@ static int ram_load_setup(QEMUFile *f, void *opaque)
static int ram_load_cleanup(void *opaque)
{
RAMBlock *rb;
+
+ RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+ if (ramblock_is_pmem(rb)) {
+ pmem_persist(rb->host, rb->used_length);
+ }
+ }
+
xbzrle_load_cleanup();
compress_threads_load_cleanup();
@@ -3906,6 +3914,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
static bool ram_has_postcopy(void *opaque)
{
+ RAMBlock *rb;
+ RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+ if (ramblock_is_pmem(rb)) {
+ info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
+ "is not supported now!", rb->idstr, rb->host);
+ return false;
+ }
+ }
+
return migrate_postcopy_ram();
}
diff --git a/numa.c b/numa.c
index 5f6367b989..81542d4ebb 100644
--- a/numa.c
+++ b/numa.c
@@ -479,7 +479,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
if (mem_path) {
#ifdef __linux__
Error *err = NULL;
- memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
+ memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0,
mem_path, &err);
if (err) {
error_report_err(err);
diff --git a/qemu-options.hx b/qemu-options.hx
index 4efdedfdbb..5515dfaba5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4048,6 +4048,13 @@ requires an alignment different than the default one used by QEMU, eg
the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
such cases, users can specify the required alignment via this option.
+The @option{pmem} option specifies whether the backing file specified
+by @option{mem-path} is in host persistent memory that can be accessed
+using the SNIA NVM programming model (e.g. Intel NVDIMM).
+If @option{pmem} is set to 'on', QEMU will take necessary operations to
+guarantee the persistence of its own writes to @option{mem-path}
+(e.g. in vNVDIMM label emulation and live migration).
+
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
Creates a memory backend object, which can be used to back the guest RAM.