diff options
31 files changed, 591 insertions, 75 deletions
diff --git a/bsd-user/main.c b/bsd-user/main.c index a8c807e8df..6192e9d91e 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -905,7 +905,7 @@ int main(int argc, char **argv) /* init tcg before creating CPUs and to get qemu_host_page_size */ tcg_exec_init(0); - cpu_type = parse_cpu_model(cpu_model); + cpu_type = parse_cpu_option(cpu_model); cpu = cpu_create(cpu_type); env = cpu->env_ptr; #if defined(TARGET_SPARC) || defined(TARGET_PPC) diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index 7231c2d78f..b531cacd35 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -144,9 +144,25 @@ Guest Data Persistence ---------------------- Though QEMU supports multiple types of vNVDIMM backends on Linux, -currently the only one that can guarantee the guest write persistence -is the device DAX on the real NVDIMM device (e.g., /dev/dax0.0), to -which all guest access do not involve any host-side kernel cache. +the only backend that can guarantee the guest write persistence is: + +A. DAX device (e.g., /dev/dax0.0, ) or +B. DAX file(mounted with dax option) + +When using B (A file supporting direct mapping of persistent memory) +as a backend, write persistence is guaranteed if the host kernel has +support for the MAP_SYNC flag in the mmap system call (available +since Linux 4.15 and on certain distro kernels) and additionally +both 'pmem' and 'share' flags are set to 'on' on the backend. + +If these conditions are not satisfied i.e. if either 'pmem' or 'share' +are not set, if the backend file does not support DAX or if MAP_SYNC +is not supported by the host kernel, write persistence is not +guaranteed after a system crash. For compatibility reasons, these +conditions are ignored if not satisfied. Currently, no way is +provided to test for them. +For more details, please reference mmap(2) man page: +http://man7.org/linux/man-pages/man2/mmap.2.html. When using other types of backends, it's suggested to set 'unarmed' option of '-device nvdimm' to 'on', which sets the unarmed flag of the @@ -983,14 +983,18 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) #endif } -const char *parse_cpu_model(const char *cpu_model) +const char *parse_cpu_option(const char *cpu_option) { ObjectClass *oc; CPUClass *cc; gchar **model_pieces; const char *cpu_type; - model_pieces = g_strsplit(cpu_model, ",", 2); + model_pieces = g_strsplit(cpu_option, ",", 2); + if (!model_pieces[0]) { + error_report("-cpu option cannot be empty"); + exit(1); + } oc = cpu_class_by_name(CPU_RESOLVING_TYPE, model_pieces[0]); if (oc == NULL) { @@ -1915,7 +1919,7 @@ static void *file_ram_alloc(RAMBlock *block, } area = qemu_ram_mmap(fd, memory, block->mr->align, - block->flags & RAM_SHARED); + block->flags & RAM_SHARED, block->flags & RAM_PMEM); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index ce2664a30b..16ba67f7a7 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1978,10 +1978,17 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_4_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) + static void virt_machine_4_0_options(MachineClass *mc) { + virt_machine_4_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); } -DEFINE_VIRT_MACHINE_AS_LATEST(4, 0) +DEFINE_VIRT_MACHINE(4, 0) static void virt_machine_3_1_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index 743fef2898..5d046a43e3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -24,6 +24,9 @@ #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" +GlobalProperty hw_compat_4_0[] = {}; +const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + GlobalProperty hw_compat_3_1[] = { { "pcie-root-port", "x-speed", "2_5" }, { "pcie-root-port", "x-width", "1" }, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f2c15bf1f2..d98b737b8f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -115,6 +115,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; /* Physical Address of PVH entry point read from kernel ELF NOTE */ static size_t pvh_start_addr; +GlobalProperty pc_compat_4_0[] = {}; +const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0); + GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8ad8e885c6..2a7700b564 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -428,13 +428,25 @@ static void pc_i440fx_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); } -static void pc_i440fx_4_0_machine_options(MachineClass *m) +static void pc_i440fx_4_1_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); + +static void pc_i440fx_4_0_machine_options(MachineClass *m) +{ + pc_i440fx_4_1_machine_options(m); + m->alias = NULL; + m->is_default = 0; + compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); +} + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, pc_i440fx_4_0_machine_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 372c6b73be..37dd350511 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -365,12 +365,23 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_4_0_machine_options(MachineClass *m) +static void pc_q35_4_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL, + pc_q35_4_1_machine_options); + +static void pc_q35_4_0_machine_options(MachineClass *m) +{ + pc_q35_4_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); +} + DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL, pc_q35_4_0_machine_options); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b52b82d298..8438741ec2 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4345,14 +4345,25 @@ static const TypeInfo spapr_machine_info = { type_init(spapr_machine_register_##suffix) /* + * pseries-4.1 + */ +static void spapr_machine_4_1_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(4_1, "4.1", true); + +/* * pseries-4.0 */ static void spapr_machine_4_0_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_4_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); } -DEFINE_SPAPR_MACHINE(4_0, "4.0", true); +DEFINE_SPAPR_MACHINE(4_0, "4.0", false); /* * pseries-3.1 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 7e256d3d31..bbc6e8fa0b 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -658,14 +658,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_4_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_4_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(4_1, "4.1", true); + static void ccw_machine_4_0_instance_options(MachineState *machine) { + ccw_machine_4_1_instance_options(machine); } static void ccw_machine_4_0_class_options(MachineClass *mc) { + ccw_machine_4_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); } -DEFINE_CCW_MACHINE(4_0, "4.0", true); +DEFINE_CCW_MACHINE(4_0, "4.0", false); static void ccw_machine_3_1_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index e231860666..6f7916f88f 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -57,7 +57,6 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, #define MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(MachineClass, (klass), TYPE_MACHINE) -MachineClass *find_default_machine(void); extern MachineState *current_machine; void machine_run_board_init(MachineState *machine); @@ -293,6 +292,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_4_0[]; +extern const size_t hw_compat_4_0_len; + extern GlobalProperty hw_compat_3_1[]; extern const size_t hw_compat_3_1_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ca65ef18af..43df7230a2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +extern GlobalProperty pc_compat_4_0[]; +extern const size_t pc_compat_4_0_len; + extern GlobalProperty pc_compat_3_1[]; extern const size_t pc_compat_3_1_len; diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index ef04f0ed5b..eec98d82c1 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -7,7 +7,26 @@ size_t qemu_fd_getpagesize(int fd); size_t qemu_mempath_getpagesize(const char *mem_path); -void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); +/** + * qemu_ram_mmap: mmap the specified file or device. + * + * Parameters: + * @fd: the file or the device to mmap + * @size: the number of bytes to be mmaped + * @align: if not zero, specify the alignment of the starting mapping address; + * otherwise, the alignment in use will be determined by QEMU. + * @shared: map has RAM_SHARED flag. + * @is_pmem: map has RAM_PMEM flag. + * + * Return: + * On success, return a pointer to the mapped area. + * On failure, return MAP_FAILED. + */ +void *qemu_ram_mmap(int fd, + size_t size, + size_t align, + bool shared, + bool is_pmem); void qemu_ram_munmap(int fd, void *ptr, size_t size); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index e9bec3a5bc..08abcbd3fe 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -681,15 +681,15 @@ ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model); CPUState *cpu_create(const char *typename); /** - * parse_cpu_model: - * @cpu_model: The model string including optional parameters. + * parse_cpu_option: + * @cpu_option: The -cpu option including optional parameters. * * processes optional parameters and registers them as global properties * * Returns: type of CPU to create or prints error and terminates process * if an error occurred. */ -const char *parse_cpu_model(const char *cpu_model); +const char *parse_cpu_option(const char *cpu_option); /** * cpu_has_work: diff --git a/linux-headers/asm-arm/mman.h b/linux-headers/asm-arm/mman.h new file mode 100644 index 0000000000..41f99c573b --- /dev/null +++ b/linux-headers/asm-arm/mman.h @@ -0,0 +1,4 @@ +#include <asm-generic/mman.h> + +#define arch_mmap_check(addr, len, flags) \ + (((flags) & MAP_FIXED && (addr) < FIRST_USER_ADDRESS) ? -EINVAL : 0) diff --git a/linux-headers/asm-arm64/mman.h b/linux-headers/asm-arm64/mman.h new file mode 100644 index 0000000000..8eebf89f5a --- /dev/null +++ b/linux-headers/asm-arm64/mman.h @@ -0,0 +1 @@ +#include <asm-generic/mman.h> diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h new file mode 100644 index 0000000000..b0f8e87235 --- /dev/null +++ b/linux-headers/asm-generic/hugetlb_encode.h @@ -0,0 +1,36 @@ +#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_ +#define _ASM_GENERIC_HUGETLB_ENCODE_H_ + +/* + * Several system calls take a flag to request "hugetlb" huge pages. + * Without further specification, these system calls will use the + * system's default huge page size. If a system supports multiple + * huge page sizes, the desired huge page size can be specified in + * bits [26:31] of the flag arguments. The value in these 6 bits + * will encode the log2 of the huge page size. + * + * The following definitions are associated with this huge page size + * encoding in flag arguments. System call specific header files + * that use this encoding should include this file. They can then + * provide definitions based on these with their own specific prefix. + * for example: + * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT + */ + +#define HUGETLB_FLAG_ENCODE_SHIFT 26 +#define HUGETLB_FLAG_ENCODE_MASK 0x3f + +#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) + +#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h new file mode 100644 index 0000000000..e7ee32861d --- /dev/null +++ b/linux-headers/asm-generic/mman-common.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_MMAN_COMMON_H +#define __ASM_GENERIC_MMAN_COMMON_H + +/* + Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED +# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ +#else +# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ +#endif + +/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_FREE 8 /* free pages only if memory pressure */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_HWPOISON 100 /* poison a page for testing */ +#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ + +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + +#endif /* __ASM_GENERIC_MMAN_COMMON_H */ diff --git a/linux-headers/asm-generic/mman.h b/linux-headers/asm-generic/mman.h new file mode 100644 index 0000000000..653687d977 --- /dev/null +++ b/linux-headers/asm-generic/mman.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_MMAN_H +#define __ASM_GENERIC_MMAN_H + +#include <asm-generic/mman-common.h> + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */ + +/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +#endif /* __ASM_GENERIC_MMAN_H */ diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h new file mode 100644 index 0000000000..3035ca499c --- /dev/null +++ b/linux-headers/asm-mips/mman.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2002 by Ralf Baechle + */ +#ifndef _ASM_MMAN_H +#define _ASM_MMAN_H + +/* + * Protections are chosen from these bits, OR'd together. The + * implementation does not necessarily support PROT_EXEC or PROT_WRITE + * without PROT_READ. The only guarantees are that no writing will be + * allowed without PROT_WRITE and no access will be allowed for PROT_NONE. + */ +#define PROT_NONE 0x00 /* page can not be accessed */ +#define PROT_READ 0x01 /* page can be read */ +#define PROT_WRITE 0x02 /* page can be written */ +#define PROT_EXEC 0x04 /* page can be executed */ +/* 0x08 reserved for PROT_EXEC_NOFLUSH */ +#define PROT_SEM 0x10 /* page may be used for atomic ops */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +/* + * Flags for mmap + */ +#define MAP_SHARED 0x001 /* Share changes */ +#define MAP_PRIVATE 0x002 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ +#define MAP_TYPE 0x00f /* Mask for type of mapping */ +#define MAP_FIXED 0x010 /* Interpret addr exactly */ + +/* not used by linux, but here to make sure we don't clash with ABI defines */ +#define MAP_RENAME 0x020 /* Assign page to file */ +#define MAP_AUTOGROW 0x040 /* File may grow by writing */ +#define MAP_LOCAL 0x080 /* Copy on fork/sproc */ +#define MAP_AUTORSRV 0x100 /* Logical swap reserved on demand */ + +/* These are linux-specific */ +#define MAP_NORESERVE 0x0400 /* don't check for reservations */ +#define MAP_ANONYMOUS 0x0800 /* don't use a file */ +#define MAP_GROWSDOWN 0x1000 /* stack-like segment */ +#define MAP_DENYWRITE 0x2000 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x4000 /* mark it as an executable */ +#define MAP_LOCKED 0x8000 /* pages are locked */ +#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x20000 /* do not block on IO */ +#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + +/* + * Flags for msync + */ +#define MS_ASYNC 0x0001 /* sync memory asynchronously */ +#define MS_INVALIDATE 0x0002 /* invalidate mappings & caches */ +#define MS_SYNC 0x0004 /* synchronous memory sync */ + +/* + * Flags for mlockall + */ +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_FREE 8 /* free pages only if memory pressure */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ +#define MADV_HWPOISON 100 /* poison a page for testing */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + +#endif /* _ASM_MMAN_H */ diff --git a/linux-headers/asm-powerpc/mman.h b/linux-headers/asm-powerpc/mman.h new file mode 100644 index 0000000000..1c2b3fca05 --- /dev/null +++ b/linux-headers/asm-powerpc/mman.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASM_POWERPC_MMAN_H +#define _ASM_POWERPC_MMAN_H + +#include <asm-generic/mman-common.h> + + +#define PROT_SAO 0x10 /* Strong Access Ordering */ + +#define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ +#define MAP_NORESERVE 0x40 /* don't reserve swap pages */ +#define MAP_LOCKED 0x80 + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ + +#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ +#define MCL_FUTURE 0x4000 /* lock all additions to address space */ +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ + +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ + +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_EXECUTE) +#endif /* _ASM_POWERPC_MMAN_H */ diff --git a/linux-headers/asm-s390/mman.h b/linux-headers/asm-s390/mman.h new file mode 100644 index 0000000000..8eebf89f5a --- /dev/null +++ b/linux-headers/asm-s390/mman.h @@ -0,0 +1 @@ +#include <asm-generic/mman.h> diff --git a/linux-headers/asm-x86/mman.h b/linux-headers/asm-x86/mman.h new file mode 100644 index 0000000000..d4a8d0424b --- /dev/null +++ b/linux-headers/asm-x86/mman.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_MMAN_H +#define _ASM_X86_MMAN_H + +#define MAP_32BIT 0x40 /* only give out 32bit addresses */ + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +/* + * Take the 4 protection key bits out of the vma->vm_flags + * value and turn them in to the bits that we can put in + * to a pte. + * + * Only override these if Protection Keys are available + * (which is only on 64-bit). + */ +#define arch_vm_get_page_prot(vm_flags) __pgprot( \ + ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ + ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ + ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ + ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) + +#define arch_calc_vm_prot_bits(prot, key) ( \ + ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ + ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ + ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \ + ((key) & 0x8 ? VM_PKEY_BIT3 : 0)) +#endif + +#include <asm-generic/mman.h> + +#endif /* _ASM_X86_MMAN_H */ diff --git a/linux-headers/linux/mman.h b/linux-headers/linux/mman.h new file mode 100644 index 0000000000..3c44b6f480 --- /dev/null +++ b/linux-headers/linux/mman.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_MMAN_H +#define _LINUX_MMAN_H + +#include <asm/mman.h> +#include <asm-generic/hugetlb_encode.h> + +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 + +#define OVERCOMMIT_GUESS 0 +#define OVERCOMMIT_ALWAYS 1 +#define OVERCOMMIT_NEVER 2 + +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + +#endif /* _LINUX_MMAN_H */ diff --git a/linux-user/main.c b/linux-user/main.c index 17387166ab..3d2230320b 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -662,7 +662,7 @@ int main(int argc, char **argv, char **envp) if (cpu_model == NULL) { cpu_model = cpu_get_model(get_elf_eflags(execfd)); } - cpu_type = parse_cpu_model(cpu_model); + cpu_type = parse_cpu_option(cpu_model); /* init tcg before creating CPUs and to get qemu_host_page_size */ tcg_exec_init(0); diff --git a/qemu-options.hx b/qemu-options.hx index 08749a3391..bdc74c0620 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4233,6 +4233,11 @@ using the SNIA NVM programming model (e.g. Intel NVDIMM). If @option{pmem} is set to 'on', QEMU will take necessary operations to guarantee the persistence of its own writes to @option{mem-path} (e.g. in vNVDIMM label emulation and live migration). +Also, we will map the backend-file with MAP_SYNC flag, which ensures the +file metadata is in sync for @option{mem-path} in case of host crash +or a power failure. MAP_SYNC requires support from both the host kernel +(since Linux kernel 4.15) and the filesystem of @option{mem-path} mounted +with DAX option. @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave} diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index a310a9072b..c3819d2b98 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -95,7 +95,7 @@ for arch in $ARCHLIST; do rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" - for header in kvm.h unistd.h bitsperlong.h; do + for header in kvm.h unistd.h bitsperlong.h mman.h; do cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" done @@ -139,13 +139,13 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in kvm.h vfio.h vfio_ccw.h vhost.h \ - psci.h psp-sev.h userfaultfd.h; do + psci.h psp-sev.h userfaultfd.h mman.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done rm -rf "$output/linux-headers/asm-generic" mkdir -p "$output/linux-headers/asm-generic" -for header in unistd.h bitsperlong.h; do +for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" done diff --git a/tests/acceptance/empty_cpu_model.py b/tests/acceptance/empty_cpu_model.py new file mode 100644 index 0000000000..3f4f663582 --- /dev/null +++ b/tests/acceptance/empty_cpu_model.py @@ -0,0 +1,19 @@ +# Check for crash when using empty -cpu option +# +# Copyright (c) 2019 Red Hat, Inc. +# +# Author: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +import subprocess +from avocado_qemu import Test + +class EmptyCPUModel(Test): + def test(self): + cmd = [self.qemu_bin, '-S', '-display', 'none', '-machine', 'none', '-cpu', ''] + r = subprocess.run(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + self.assertEquals(r.returncode, 1, "QEMU exit code should be 1") + self.assertEquals(r.stdout, b'', "QEMU stdout should be empty") + self.assertNotEquals(r.stderr, b'', "QEMU stderr shouldn't be empty") diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 8565885420..f7f177d0ea 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -10,6 +10,13 @@ * later. See the COPYING file in the top-level directory. */ +#ifdef CONFIG_LINUX +#include <linux/mman.h> +#else /* !CONFIG_LINUX */ +#define MAP_SYNC 0x0 +#define MAP_SHARED_VALIDATE 0x0 +#endif /* CONFIG_LINUX */ + #include "qemu/osdep.h" #include "qemu/mmap-alloc.h" #include "qemu/host-utils.h" @@ -75,9 +82,14 @@ size_t qemu_mempath_getpagesize(const char *mem_path) return getpagesize(); } -void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) +void *qemu_ram_mmap(int fd, + size_t size, + size_t align, + bool shared, + bool is_pmem) { int flags; + int map_sync_flags = 0; int guardfd; size_t offset; size_t pagesize; @@ -128,9 +140,40 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) flags = MAP_FIXED; flags |= fd == -1 ? MAP_ANONYMOUS : 0; flags |= shared ? MAP_SHARED : MAP_PRIVATE; + if (shared && is_pmem) { + map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE; + } + offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; - ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0); + ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, + flags | map_sync_flags, fd, 0); + + if (ptr == MAP_FAILED && map_sync_flags) { + if (errno == ENOTSUP) { + char *proc_link, *file_name; + int len; + proc_link = g_strdup_printf("/proc/self/fd/%d", fd); + file_name = g_malloc0(PATH_MAX); + len = readlink(proc_link, file_name, PATH_MAX - 1); + if (len < 0) { + len = 0; + } + file_name[len] = '\0'; + fprintf(stderr, "Warning: requesting persistence across crashes " + "for backend file %s failed. Proceeding without " + "persistence, data might become corrupted in case of host " + "crash.\n", file_name); + g_free(proc_link); + g_free(file_name); + } + /* + * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC, + * we will remove these flags to handle compatibility. + */ + ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, + flags, fd, 0); + } if (ptr == MAP_FAILED) { munmap(guardptr, total); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 88dda9cd39..d97b1717d5 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -203,7 +203,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) { size_t align = QEMU_VMALLOC_ALIGN; - void *ptr = qemu_ram_mmap(-1, size, align, shared); + void *ptr = qemu_ram_mmap(-1, size, align, shared, false); if (ptr == MAP_FAILED) { return NULL; @@ -1465,45 +1465,34 @@ static int usb_parse(const char *cmdline) MachineState *current_machine; -static MachineClass *find_machine(const char *name) +static MachineClass *find_machine(const char *name, GSList *machines) { - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); - MachineClass *mc = NULL; + GSList *el; for (el = machines; el; el = el->next) { - MachineClass *temp = el->data; + MachineClass *mc = el->data; - if (!strcmp(temp->name, name)) { - mc = temp; - break; - } - if (temp->alias && - !strcmp(temp->alias, name)) { - mc = temp; - break; + if (!strcmp(mc->name, name) || !g_strcmp0(mc->alias, name)) { + return mc; } } - g_slist_free(machines); - return mc; + return NULL; } -MachineClass *find_default_machine(void) +static MachineClass *find_default_machine(GSList *machines) { - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); - MachineClass *mc = NULL; + GSList *el; for (el = machines; el; el = el->next) { - MachineClass *temp = el->data; + MachineClass *mc = el->data; - if (temp->is_default) { - mc = temp; - break; + if (mc->is_default) { + return mc; } } - g_slist_free(machines); - return mc; + return NULL; } MachineInfoList *qmp_query_machines(Error **errp) @@ -2585,22 +2574,12 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b) object_class_get_name(OBJECT_CLASS(mc1))); } - static MachineClass *machine_parse(const char *name) +static MachineClass *machine_parse(const char *name, GSList *machines) { - MachineClass *mc = NULL; - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); + MachineClass *mc; + GSList *el; - if (name) { - mc = find_machine(name); - } - if (mc) { - g_slist_free(machines); - return mc; - } - if (name && !is_help_option(name)) { - error_report("unsupported machine type"); - error_printf("Use -machine help to list supported machines\n"); - } else { + if (is_help_option(name)) { printf("Supported machines are:\n"); machines = g_slist_sort(machines, machine_class_cmp); for (el = machines; el; el = el->next) { @@ -2612,10 +2591,16 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b) mc->is_default ? " (default)" : "", mc->deprecation_reason ? " (deprecated)" : ""); } + exit(0); } - g_slist_free(machines); - exit(!name || !is_help_option(name)); + mc = find_machine(name, machines); + if (!mc) { + error_report("unsupported machine type"); + error_printf("Use -machine help to list supported machines\n"); + exit(1); + } + return mc; } void qemu_add_exit_notifier(Notifier *notify) @@ -2706,7 +2691,8 @@ static const QEMUOption *lookup_opt(int argc, char **argv, static MachineClass *select_machine(void) { - MachineClass *machine_class = find_default_machine(); + GSList *machines = object_class_get_list(TYPE_MACHINE, false); + MachineClass *machine_class = find_default_machine(machines); const char *optarg; QemuOpts *opts; Location loc; @@ -2718,7 +2704,7 @@ static MachineClass *select_machine(void) optarg = qemu_opt_get(opts, "type"); if (optarg) { - machine_class = machine_parse(optarg); + machine_class = machine_parse(optarg, machines); } if (!machine_class) { @@ -2728,6 +2714,7 @@ static MachineClass *select_machine(void) } loc_pop(&loc); + g_slist_free(machines); return machine_class; } @@ -3002,7 +2989,7 @@ int main(int argc, char **argv, char **envp) const char *optarg; const char *loadvm = NULL; MachineClass *machine_class; - const char *cpu_model; + const char *cpu_option; const char *vga_model = NULL; const char *qtest_chrdev = NULL; const char *qtest_log = NULL; @@ -3081,7 +3068,7 @@ int main(int argc, char **argv, char **envp) QLIST_INIT (&vm_change_state_head); os_setup_early_signal_handling(); - cpu_model = NULL; + cpu_option = NULL; snapshot = 0; nb_nics = 0; @@ -3133,7 +3120,7 @@ int main(int argc, char **argv, char **envp) switch(popt->index) { case QEMU_OPTION_cpu: /* hw initialization will check this */ - cpu_model = optarg; + cpu_option = optarg; break; case QEMU_OPTION_hda: case QEMU_OPTION_hdb: @@ -4050,8 +4037,8 @@ int main(int argc, char **argv, char **envp) qemu_set_hw_version(machine_class->hw_version); } - if (cpu_model && is_help_option(cpu_model)) { - list_cpus(cpu_model); + if (cpu_option && is_help_option(cpu_option)) { + list_cpus(cpu_option); exit(0); } @@ -4299,9 +4286,9 @@ int main(int argc, char **argv, char **envp) * Global properties get set up by qdev_prop_register_global(), * called from user_register_global_props(), and certain option * desugaring. Also in CPU feature desugaring (buried in - * parse_cpu_model()), which happens below this point, but may + * parse_cpu_option()), which happens below this point, but may * only target the CPU type, which can only be created after - * parse_cpu_model() returned the type. + * parse_cpu_option() returned the type. * * Machine compat properties: object_set_machine_compat_props(). * Accelerator compat props: object_set_accelerator_compat_props(), @@ -4465,8 +4452,8 @@ int main(int argc, char **argv, char **envp) /* parse features once if machine provides default cpu_type */ current_machine->cpu_type = machine_class->default_cpu_type; - if (cpu_model) { - current_machine->cpu_type = parse_cpu_model(cpu_model); + if (cpu_option) { + current_machine->cpu_type = parse_cpu_option(cpu_option); } parse_numa_opts(current_machine); |