From f93b9953703be41408d5f0e09a871775d4be3c36 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 15:58:52 -1000 Subject: linux-user/elfload: Disable core dump if getrlimit fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not dump core at all if getrlimit fails; this ensures that dumpsize is valid throughout the function, not just for the initial test vs rlim_cur. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- linux-user/elfload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index b8eef893d0..fb47fe39c9 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4673,7 +4673,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) return 0; } - if (getrlimit(RLIMIT_CORE, &dumpsize) == 0 && dumpsize.rlim_cur == 0) { + if (getrlimit(RLIMIT_CORE, &dumpsize) < 0 || dumpsize.rlim_cur == 0) { return 0; } -- cgit v1.2.3 From b1beea6ba5c3eb9938897ae1c4042515c3e4b0c2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 16:03:06 -1000 Subject: linux-user/elfload: Merge init_note_info and fill_note_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- linux-user/elfload.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index fb47fe39c9..7b3a2c20f2 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4514,16 +4514,6 @@ static void fill_thread_info(struct elf_note_info *info, const CPUArchState *env info->notes_size += note_size(&ets->notes[0]); } -static void init_note_info(struct elf_note_info *info) -{ - /* Initialize the elf_note_info structure so that it is at - * least safe to call free_note_info() on it. Must be - * called before calling fill_note_info(). - */ - memset(info, 0, sizeof (*info)); - QTAILQ_INIT(&info->thread_list); -} - static int fill_note_info(struct elf_note_info *info, long signr, const CPUArchState *env) { @@ -4532,6 +4522,9 @@ static int fill_note_info(struct elf_note_info *info, TaskState *ts = (TaskState *)cpu->opaque; int i; + memset(info, 0, sizeof (*info)); + QTAILQ_INIT(&info->thread_list); + info->notes = g_new0(struct memelfnote, NUMNOTES); if (info->notes == NULL) return (-ENOMEM); @@ -4665,8 +4658,6 @@ static int elf_core_dump(int signr, const CPUArchState *env) int segs = 0; int fd = -1; - init_note_info(&info); - errno = 0; if (prctl(PR_GET_DUMPABLE) == 0) { -- cgit v1.2.3 From 6a202944354d077141bb97a21d35f8ab16b6a127 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 10:49:21 -1000 Subject: linux-user/elfload: Tidy fill_note_info and struct elf_note_info In fill_note_info, there were unnecessary checks for success of g_new/g_malloc. But these structures do not need to be dyamically allocated at all, and can in fact be statically allocated within the parent structure. This removes all error paths from fill_note_info, so change the return type to void. Change type of signr to match both caller (elf_core_dump) and callee (fill_prstatus), which both use int for signr. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 48 ++++++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 7b3a2c20f2..cc43487a37 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4066,10 +4066,12 @@ struct elf_thread_status { int num_notes; }; +#define NUMNOTES 3 + struct elf_note_info { - struct memelfnote *notes; - struct target_elf_prstatus *prstatus; /* NT_PRSTATUS */ - struct target_elf_prpsinfo *psinfo; /* NT_PRPSINFO */ + struct memelfnote notes[NUMNOTES]; + struct target_elf_prstatus prstatus; /* NT_PRSTATUS */ + struct target_elf_prpsinfo psinfo; /* NT_PRPSINFO */ QTAILQ_HEAD(, elf_thread_status) thread_list; #if 0 @@ -4117,7 +4119,7 @@ static void fill_auxv_note(struct memelfnote *, const TaskState *); static void fill_elf_note_phdr(struct elf_phdr *, int, off_t); static size_t note_size(const struct memelfnote *); static void free_note_info(struct elf_note_info *); -static int fill_note_info(struct elf_note_info *, long, const CPUArchState *); +static void fill_note_info(struct elf_note_info *, int, const CPUArchState *); static void fill_thread_info(struct elf_note_info *, const CPUArchState *); static int dump_write(int, const void *, size_t); @@ -4514,44 +4516,33 @@ static void fill_thread_info(struct elf_note_info *info, const CPUArchState *env info->notes_size += note_size(&ets->notes[0]); } -static int fill_note_info(struct elf_note_info *info, - long signr, const CPUArchState *env) +static void fill_note_info(struct elf_note_info *info, + int signr, const CPUArchState *env) { -#define NUMNOTES 3 CPUState *cpu = env_cpu((CPUArchState *)env); TaskState *ts = (TaskState *)cpu->opaque; - int i; memset(info, 0, sizeof (*info)); QTAILQ_INIT(&info->thread_list); - info->notes = g_new0(struct memelfnote, NUMNOTES); - if (info->notes == NULL) - return (-ENOMEM); - info->prstatus = g_malloc0(sizeof (*info->prstatus)); - if (info->prstatus == NULL) - return (-ENOMEM); - info->psinfo = g_malloc0(sizeof (*info->psinfo)); - if (info->prstatus == NULL) - return (-ENOMEM); - /* * First fill in status (and registers) of current thread * including process info & aux vector. */ - fill_prstatus(info->prstatus, ts, signr); - elf_core_copy_regs(&info->prstatus->pr_reg, env); + fill_prstatus(&info->prstatus, ts, signr); + elf_core_copy_regs(&info->prstatus.pr_reg, env); fill_note(&info->notes[0], "CORE", NT_PRSTATUS, - sizeof (*info->prstatus), info->prstatus); - fill_psinfo(info->psinfo, ts); + sizeof(info->prstatus), &info->prstatus); + fill_psinfo(&info->psinfo, ts); fill_note(&info->notes[1], "CORE", NT_PRPSINFO, - sizeof (*info->psinfo), info->psinfo); + sizeof(info->psinfo), &info->psinfo); fill_auxv_note(&info->notes[2], ts); info->numnote = 3; info->notes_size = 0; - for (i = 0; i < info->numnote; i++) + for (int i = 0; i < info->numnote; i++) { info->notes_size += note_size(&info->notes[i]); + } /* read and fill status of all threads */ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { @@ -4562,8 +4553,6 @@ static int fill_note_info(struct elf_note_info *info, fill_thread_info(info, cpu_env(cpu)); } } - - return (0); } static void free_note_info(struct elf_note_info *info) @@ -4575,10 +4564,6 @@ static void free_note_info(struct elf_note_info *info) QTAILQ_REMOVE(&info->thread_list, ets, ets_link); g_free(ets); } - - g_free(info->prstatus); - g_free(info->psinfo); - g_free(info->notes); } static int write_note_info(struct elf_note_info *info, int fd) @@ -4694,8 +4679,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) goto out; /* fill in the in-memory version of notes */ - if (fill_note_info(&info, signr, env) < 0) - goto out; + fill_note_info(&info, signr, env); offset += sizeof (elf); /* elf header */ offset += (segs + 1) * sizeof (struct elf_phdr); /* program headers */ -- cgit v1.2.3 From 0af22a6abf8397f966701f120eee8a5bdf22ab19 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 16:50:32 -1000 Subject: linux-user/elfload: Stack allocate struct mm_struct Ignoring the fact that g_malloc cannot fail, the structure is quite small and might as well be allocated locally. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index cc43487a37..98b82b1a49 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4099,7 +4099,7 @@ struct mm_struct { int mm_count; /* number of mappings */ }; -static struct mm_struct *vma_init(void); +static void vma_init(struct mm_struct *); static void vma_delete(struct mm_struct *); static int vma_add_mapping(struct mm_struct *, target_ulong, target_ulong, abi_ulong); @@ -4174,17 +4174,10 @@ static inline void bswap_note(struct elf_note *en) { } * thread that received the signal is stopped. */ -static struct mm_struct *vma_init(void) +static void vma_init(struct mm_struct *mm) { - struct mm_struct *mm; - - if ((mm = g_malloc(sizeof (*mm))) == NULL) - return (NULL); - mm->mm_count = 0; QTAILQ_INIT(&mm->mm_mmap); - - return (mm); } static void vma_delete(struct mm_struct *mm) @@ -4195,7 +4188,6 @@ static void vma_delete(struct mm_struct *mm) QTAILQ_REMOVE(&mm->mm_mmap, vma, vma_link); g_free(vma); } - g_free(mm); } static int vma_add_mapping(struct mm_struct *mm, target_ulong start, @@ -4638,7 +4630,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) struct elfhdr elf; struct elf_phdr phdr; struct rlimit dumpsize; - struct mm_struct *mm = NULL; + struct mm_struct mm; off_t offset = 0, data_offset = 0; int segs = 0; int fd = -1; @@ -4664,11 +4656,10 @@ static int elf_core_dump(int signr, const CPUArchState *env) * set up structure containing this information. After * this point vma_xxx functions can be used. */ - if ((mm = vma_init()) == NULL) - goto out; + vma_init(&mm); - walk_memory_regions(mm, vma_walker); - segs = vma_get_mapping_count(mm); + walk_memory_regions(&mm, vma_walker); + segs = vma_get_mapping_count(&mm); /* * Construct valid coredump ELF header. We also @@ -4701,7 +4692,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) * Write program headers for memory regions mapped in * the target process. */ - for (vma = vma_first(mm); vma != NULL; vma = vma_next(vma)) { + for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { (void) memset(&phdr, 0, sizeof (phdr)); phdr.p_type = PT_LOAD; @@ -4738,7 +4729,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) /* * Finally we can dump process memory into corefile as well. */ - for (vma = vma_first(mm); vma != NULL; vma = vma_next(vma)) { + for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { abi_ulong addr; abi_ulong end; @@ -4767,8 +4758,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) out: free_note_info(&info); - if (mm != NULL) - vma_delete(mm); + vma_delete(&mm); (void) close(fd); if (errno != 0) -- cgit v1.2.3 From ccb6f3eee0f746961f95e9956fa20decd1f46da3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 16:58:02 -1000 Subject: linux-user/elfload: Latch errno before cleanup in elf_core_dump On the off-chance that one of the cleanup functions changes errno, latch the errno that we want to return beforehand. Flush errno to 0 upon success, rather than at the beginning. No need to avoid negation of 0. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 98b82b1a49..39d9ef9acc 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4634,8 +4634,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) off_t offset = 0, data_offset = 0; int segs = 0; int fd = -1; - - errno = 0; + int ret; if (prctl(PR_GET_DUMPABLE) == 0) { return 0; @@ -4755,15 +4754,14 @@ static int elf_core_dump(int signr, const CPUArchState *env) goto out; } } + errno = 0; out: + ret = -errno; free_note_info(&info); vma_delete(&mm); - (void) close(fd); - - if (errno != 0) - return (-errno); - return (0); + close(fd); + return ret; } #endif /* USE_ELF_CORE_DUMP */ -- cgit v1.2.3 From 106f8da6643634f141b39198576156a4c5bd6e60 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 17:03:19 -1000 Subject: linux-user/elfload: Open core file after vma_init Swap the ordering of vma_init and open. This will be necessary for further changes, and adjusts the error cleanup path. Narrow the scope of corefile, as the variable can be freed immediately after use in open(). Signed-off-by: Richard Henderson --- linux-user/elfload.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 39d9ef9acc..877799e9c7 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4625,7 +4625,6 @@ static int elf_core_dump(int signr, const CPUArchState *env) const CPUState *cpu = env_cpu((CPUArchState *)env); const TaskState *ts = (const TaskState *)cpu->opaque; struct vm_area_struct *vma = NULL; - g_autofree char *corefile = NULL; struct elf_note_info info; struct elfhdr elf; struct elf_phdr phdr; @@ -4644,12 +4643,6 @@ static int elf_core_dump(int signr, const CPUArchState *env) return 0; } - corefile = core_dump_filename(ts); - - if ((fd = open(corefile, O_WRONLY | O_CREAT, - S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) - return (-errno); - /* * Walk through target process memory mappings and * set up structure containing this information. After @@ -4657,6 +4650,15 @@ static int elf_core_dump(int signr, const CPUArchState *env) */ vma_init(&mm); + { + g_autofree char *corefile = core_dump_filename(ts); + fd = open(corefile, O_WRONLY | O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + } + if (fd < 0) { + goto out; + } + walk_memory_regions(&mm, vma_walker); segs = vma_get_mapping_count(&mm); -- cgit v1.2.3 From e0add9a835c6259df211eb4874d3d00d9d016796 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 17:07:38 -1000 Subject: linux-user/elfload: Truncate core file on open While we usually create a new corefile, truncate otherwise. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 877799e9c7..16dd08a828 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4652,7 +4652,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) { g_autofree char *corefile = core_dump_filename(ts); - fd = open(corefile, O_WRONLY | O_CREAT, + fd = open(corefile, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); } if (fd < 0) { -- cgit v1.2.3 From b5262077655ab586d90f80a0f1b9a536bbc999ba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 17:14:22 -1000 Subject: linux-user/elfload: Lock cpu list and mmap during elf_core_dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not allow changes to the set of cpus and memory regions while we are dumping core. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- linux-user/elfload.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 16dd08a828..6f9da721d7 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4537,13 +4537,11 @@ static void fill_note_info(struct elf_note_info *info, } /* read and fill status of all threads */ - WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { - CPU_FOREACH(cpu) { - if (cpu == thread_cpu) { - continue; - } - fill_thread_info(info, cpu_env(cpu)); + CPU_FOREACH(cpu) { + if (cpu == thread_cpu) { + continue; } + fill_thread_info(info, cpu_env(cpu)); } } @@ -4643,6 +4641,9 @@ static int elf_core_dump(int signr, const CPUArchState *env) return 0; } + cpu_list_lock(); + mmap_lock(); + /* * Walk through target process memory mappings and * set up structure containing this information. After @@ -4760,6 +4761,8 @@ static int elf_core_dump(int signr, const CPUArchState *env) out: ret = -errno; + mmap_unlock(); + cpu_list_unlock(); free_note_info(&info); vma_delete(&mm); close(fd); -- cgit v1.2.3 From 2410d28dc992082ec3348d6544ef30c1f950c8b9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 26 Feb 2024 18:18:57 -1000 Subject: linux-user/elfload: Size corefile before opening Verify the size of the corefile vs the rlimit before opening and creating the core file at all. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 83 ++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 6f9da721d7..bad01bd2ef 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4270,6 +4270,16 @@ static int vma_walker(void *priv, target_ulong start, target_ulong end, return (0); } +static size_t size_note(const char *name, size_t datasz) +{ + size_t namesz = strlen(name) + 1; + + namesz = ROUND_UP(namesz, 4); + datasz = ROUND_UP(datasz, 4); + + return sizeof(struct elf_note) + namesz + datasz; +} + static void fill_note(struct memelfnote *note, const char *name, int type, unsigned int sz, void *data) { @@ -4428,27 +4438,9 @@ static int dump_write(int fd, const void *ptr, size_t size) { const char *bufp = (const char *)ptr; ssize_t bytes_written, bytes_left; - struct rlimit dumpsize; - off_t pos; bytes_written = 0; - getrlimit(RLIMIT_CORE, &dumpsize); - if ((pos = lseek(fd, 0, SEEK_CUR))==-1) { - if (errno == ESPIPE) { /* not a seekable stream */ - bytes_left = size; - } else { - return pos; - } - } else { - if (dumpsize.rlim_cur <= pos) { - return -1; - } else if (dumpsize.rlim_cur == RLIM_INFINITY) { - bytes_left = size; - } else { - size_t limit_left=dumpsize.rlim_cur - pos; - bytes_left = limit_left >= size ? size : limit_left ; - } - } + bytes_left = size; /* * In normal conditions, single write(2) should do but @@ -4622,16 +4614,15 @@ static int elf_core_dump(int signr, const CPUArchState *env) { const CPUState *cpu = env_cpu((CPUArchState *)env); const TaskState *ts = (const TaskState *)cpu->opaque; - struct vm_area_struct *vma = NULL; + struct vm_area_struct *vma; struct elf_note_info info; struct elfhdr elf; struct elf_phdr phdr; struct rlimit dumpsize; struct mm_struct mm; - off_t offset = 0, data_offset = 0; - int segs = 0; + off_t offset, note_offset, data_offset; + int segs, cpus, ret; int fd = -1; - int ret; if (prctl(PR_GET_DUMPABLE) == 0) { return 0; @@ -4646,10 +4637,36 @@ static int elf_core_dump(int signr, const CPUArchState *env) /* * Walk through target process memory mappings and - * set up structure containing this information. After - * this point vma_xxx functions can be used. + * set up structure containing this information. */ vma_init(&mm); + walk_memory_regions(&mm, vma_walker); + segs = vma_get_mapping_count(&mm); + + cpus = 0; + CPU_FOREACH(cpu) { + cpus++; + } + + offset = sizeof(struct elfhdr); + offset += (segs + 1) * sizeof(struct elf_phdr); + note_offset = offset; + + offset += size_note("CORE", ts->info->auxv_len); + offset += size_note("CORE", sizeof(struct target_elf_prpsinfo)); + offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus; + offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); + data_offset = offset; + + for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { + offset += vma_dump_size(vma); + } + + /* Do not dump if the corefile size exceeds the limit. */ + if (dumpsize.rlim_cur != RLIM_INFINITY && dumpsize.rlim_cur < offset) { + errno = 0; + goto out; + } { g_autofree char *corefile = core_dump_filename(ts); @@ -4660,9 +4677,6 @@ static int elf_core_dump(int signr, const CPUArchState *env) goto out; } - walk_memory_regions(&mm, vma_walker); - segs = vma_get_mapping_count(&mm); - /* * Construct valid coredump ELF header. We also * add one more segment for notes. @@ -4674,26 +4688,17 @@ static int elf_core_dump(int signr, const CPUArchState *env) /* fill in the in-memory version of notes */ fill_note_info(&info, signr, env); - offset += sizeof (elf); /* elf header */ - offset += (segs + 1) * sizeof (struct elf_phdr); /* program headers */ - /* write out notes program header */ - fill_elf_note_phdr(&phdr, info.notes_size, offset); + fill_elf_note_phdr(&phdr, info.notes_size, note_offset); - offset += info.notes_size; if (dump_write(fd, &phdr, sizeof (phdr)) != 0) goto out; - /* - * ELF specification wants data to start at page boundary so - * we align it here. - */ - data_offset = offset = roundup(offset, ELF_EXEC_PAGESIZE); - /* * Write program headers for memory regions mapped in * the target process. */ + offset = data_offset; for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { (void) memset(&phdr, 0, sizeof (phdr)); -- cgit v1.2.3 From 243c47066253c4236b8792ee158f9971d1c27bf9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Feb 2024 06:34:18 -1000 Subject: linux-user/elfload: Write corefile elf header in one block Fixes a bug in which write_note() wrote namesz_rounded and datasz_rounded bytes, even though name and data pointers contain only the unrounded number of bytes. Instead of many small writes, allocate a block to contain all of the elf headers and all of the notes. Copy the data into the block piecemeal and the write it to the file as a chunk. This also avoids the need to lseek forward for alignment. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 416 ++++++++++++++------------------------------------- 1 file changed, 116 insertions(+), 300 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index bad01bd2ef..b8d07d8054 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4002,18 +4002,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) * Example for ARM target is provided in this file. */ -/* An ELF note in memory */ -struct memelfnote { - const char *name; - size_t namesz; - size_t namesz_rounded; - int type; - size_t datasz; - size_t datasz_rounded; - void *data; - size_t notesz; -}; - struct target_elf_siginfo { abi_int si_signo; /* signal number */ abi_int si_code; /* extra code */ @@ -4053,40 +4041,6 @@ struct target_elf_prpsinfo { char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -/* Here is the structure in which status of each thread is captured. */ -struct elf_thread_status { - QTAILQ_ENTRY(elf_thread_status) ets_link; - struct target_elf_prstatus prstatus; /* NT_PRSTATUS */ -#if 0 - elf_fpregset_t fpu; /* NT_PRFPREG */ - struct task_struct *thread; - elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ -#endif - struct memelfnote notes[1]; - int num_notes; -}; - -#define NUMNOTES 3 - -struct elf_note_info { - struct memelfnote notes[NUMNOTES]; - struct target_elf_prstatus prstatus; /* NT_PRSTATUS */ - struct target_elf_prpsinfo psinfo; /* NT_PRPSINFO */ - - QTAILQ_HEAD(, elf_thread_status) thread_list; -#if 0 - /* - * Current version of ELF coredump doesn't support - * dumping fp regs etc. - */ - elf_fpregset_t *fpu; - elf_fpxregset_t *xfpu; - int thread_status_size; -#endif - int notes_size; - int numnote; -}; - struct vm_area_struct { target_ulong vma_start; /* start vaddr of memory region */ target_ulong vma_end; /* end vaddr of memory region */ @@ -4110,22 +4064,6 @@ static abi_ulong vma_dump_size(const struct vm_area_struct *); static int vma_walker(void *priv, target_ulong start, target_ulong end, unsigned long flags); -static void fill_elf_header(struct elfhdr *, int, uint16_t, uint32_t); -static void fill_note(struct memelfnote *, const char *, int, - unsigned int, void *); -static void fill_prstatus(struct target_elf_prstatus *, const TaskState *, int); -static int fill_psinfo(struct target_elf_prpsinfo *, const TaskState *); -static void fill_auxv_note(struct memelfnote *, const TaskState *); -static void fill_elf_note_phdr(struct elf_phdr *, int, off_t); -static size_t note_size(const struct memelfnote *); -static void free_note_info(struct elf_note_info *); -static void fill_note_info(struct elf_note_info *, int, const CPUArchState *); -static void fill_thread_info(struct elf_note_info *, const CPUArchState *); - -static int dump_write(int, const void *, size_t); -static int write_note(struct memelfnote *, int); -static int write_note_info(struct elf_note_info *, int); - #ifdef BSWAP_NEEDED static void bswap_prstatus(struct target_elf_prstatus *prstatus) { @@ -4280,35 +4218,32 @@ static size_t size_note(const char *name, size_t datasz) return sizeof(struct elf_note) + namesz + datasz; } -static void fill_note(struct memelfnote *note, const char *name, int type, - unsigned int sz, void *data) +static void *fill_note(void **pptr, int type, const char *name, size_t datasz) { - unsigned int namesz; + void *ptr = *pptr; + struct elf_note *n = ptr; + size_t namesz = strlen(name) + 1; - namesz = strlen(name) + 1; - note->name = name; - note->namesz = namesz; - note->namesz_rounded = roundup(namesz, sizeof (int32_t)); - note->type = type; - note->datasz = sz; - note->datasz_rounded = roundup(sz, sizeof (int32_t)); + n->n_namesz = namesz; + n->n_descsz = datasz; + n->n_type = type; + bswap_note(n); - note->data = data; + ptr += sizeof(*n); + memcpy(ptr, name, namesz); - /* - * We calculate rounded up note size here as specified by - * ELF document. - */ - note->notesz = sizeof (struct elf_note) + - note->namesz_rounded + note->datasz_rounded; + namesz = ROUND_UP(namesz, 4); + datasz = ROUND_UP(datasz, 4); + + *pptr = ptr + namesz + datasz; + return ptr + namesz; } static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, uint32_t flags) { - (void) memset(elf, 0, sizeof(*elf)); + memcpy(elf->e_ident, ELFMAG, SELFMAG); - (void) memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; elf->e_ident[EI_DATA] = ELF_DATA; elf->e_ident[EI_VERSION] = EV_CURRENT; @@ -4326,95 +4261,79 @@ static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, bswap_ehdr(elf); } -static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) +static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset) { phdr->p_type = PT_NOTE; phdr->p_offset = offset; - phdr->p_vaddr = 0; - phdr->p_paddr = 0; phdr->p_filesz = sz; - phdr->p_memsz = 0; - phdr->p_flags = 0; - phdr->p_align = 0; bswap_phdr(phdr, 1); } -static size_t note_size(const struct memelfnote *note) +static void fill_prstatus_note(void *data, const TaskState *ts, + CPUState *cpu, int signr) { - return (note->notesz); -} + /* + * Because note memory is only aligned to 4, and target_elf_prstatus + * may well have higher alignment requirements, fill locally and + * memcpy to the destination afterward. + */ + struct target_elf_prstatus prstatus = { + .pr_info.si_signo = signr, + .pr_cursig = signr, + .pr_pid = ts->ts_tid, + .pr_ppid = getppid(), + .pr_pgrp = getpgrp(), + .pr_sid = getsid(0), + }; -static void fill_prstatus(struct target_elf_prstatus *prstatus, - const TaskState *ts, int signr) -{ - (void) memset(prstatus, 0, sizeof (*prstatus)); - prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; - prstatus->pr_pid = ts->ts_tid; - prstatus->pr_ppid = getppid(); - prstatus->pr_pgrp = getpgrp(); - prstatus->pr_sid = getsid(0); - - bswap_prstatus(prstatus); + elf_core_copy_regs(&prstatus.pr_reg, cpu_env(cpu)); + bswap_prstatus(&prstatus); + memcpy(data, &prstatus, sizeof(prstatus)); } -static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts) +static void fill_prpsinfo_note(void *data, const TaskState *ts) { + /* + * Because note memory is only aligned to 4, and target_elf_prpsinfo + * may well have higher alignment requirements, fill locally and + * memcpy to the destination afterward. + */ + struct target_elf_prpsinfo psinfo; char *base_filename; - unsigned int i, len; - - (void) memset(psinfo, 0, sizeof (*psinfo)); + size_t len; len = ts->info->env_strings - ts->info->arg_strings; - if (len >= ELF_PRARGSZ) - len = ELF_PRARGSZ - 1; - if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_strings, len)) { - return -EFAULT; - } - for (i = 0; i < len; i++) - if (psinfo->pr_psargs[i] == 0) - psinfo->pr_psargs[i] = ' '; - psinfo->pr_psargs[len] = 0; - - psinfo->pr_pid = getpid(); - psinfo->pr_ppid = getppid(); - psinfo->pr_pgrp = getpgrp(); - psinfo->pr_sid = getsid(0); - psinfo->pr_uid = getuid(); - psinfo->pr_gid = getgid(); + len = MIN(len, ELF_PRARGSZ); + memcpy(&psinfo.pr_psargs, g2h_untagged(ts->info->arg_strings), len); + for (size_t i = 0; i < len; i++) { + if (psinfo.pr_psargs[i] == 0) { + psinfo.pr_psargs[i] = ' '; + } + } + + psinfo.pr_pid = getpid(); + psinfo.pr_ppid = getppid(); + psinfo.pr_pgrp = getpgrp(); + psinfo.pr_sid = getsid(0); + psinfo.pr_uid = getuid(); + psinfo.pr_gid = getgid(); base_filename = g_path_get_basename(ts->bprm->filename); /* * Using strncpy here is fine: at max-length, * this field is not NUL-terminated. */ - (void) strncpy(psinfo->pr_fname, base_filename, - sizeof(psinfo->pr_fname)); - + strncpy(psinfo.pr_fname, base_filename, sizeof(psinfo.pr_fname)); g_free(base_filename); - bswap_psinfo(psinfo); - return (0); + + bswap_psinfo(&psinfo); + memcpy(data, &psinfo, sizeof(psinfo)); } -static void fill_auxv_note(struct memelfnote *note, const TaskState *ts) +static void fill_auxv_note(void *data, const TaskState *ts) { - elf_addr_t auxv = (elf_addr_t)ts->info->saved_auxv; - elf_addr_t orig_auxv = auxv; - void *ptr; - int len = ts->info->auxv_len; - - /* - * Auxiliary vector is stored in target process stack. It contains - * {type, value} pairs that we need to dump into note. This is not - * strictly necessary but we do it here for sake of completeness. - */ - - /* read in whole auxv vector and copy it to memelfnote */ - ptr = lock_user(VERIFY_READ, orig_auxv, len, 0); - if (ptr != NULL) { - fill_note(note, "CORE", NT_AUXV, len, ptr); - unlock_user(ptr, auxv, len); - } + memcpy(data, g2h_untagged(ts->info->saved_auxv), ts->info->auxv_len); } /* @@ -4462,111 +4381,6 @@ static int dump_write(int fd, const void *ptr, size_t size) return (0); } -static int write_note(struct memelfnote *men, int fd) -{ - struct elf_note en; - - en.n_namesz = men->namesz; - en.n_type = men->type; - en.n_descsz = men->datasz; - - bswap_note(&en); - - if (dump_write(fd, &en, sizeof(en)) != 0) - return (-1); - if (dump_write(fd, men->name, men->namesz_rounded) != 0) - return (-1); - if (dump_write(fd, men->data, men->datasz_rounded) != 0) - return (-1); - - return (0); -} - -static void fill_thread_info(struct elf_note_info *info, const CPUArchState *env) -{ - CPUState *cpu = env_cpu((CPUArchState *)env); - TaskState *ts = (TaskState *)cpu->opaque; - struct elf_thread_status *ets; - - ets = g_malloc0(sizeof (*ets)); - ets->num_notes = 1; /* only prstatus is dumped */ - fill_prstatus(&ets->prstatus, ts, 0); - elf_core_copy_regs(&ets->prstatus.pr_reg, env); - fill_note(&ets->notes[0], "CORE", NT_PRSTATUS, sizeof (ets->prstatus), - &ets->prstatus); - - QTAILQ_INSERT_TAIL(&info->thread_list, ets, ets_link); - - info->notes_size += note_size(&ets->notes[0]); -} - -static void fill_note_info(struct elf_note_info *info, - int signr, const CPUArchState *env) -{ - CPUState *cpu = env_cpu((CPUArchState *)env); - TaskState *ts = (TaskState *)cpu->opaque; - - memset(info, 0, sizeof (*info)); - QTAILQ_INIT(&info->thread_list); - - /* - * First fill in status (and registers) of current thread - * including process info & aux vector. - */ - fill_prstatus(&info->prstatus, ts, signr); - elf_core_copy_regs(&info->prstatus.pr_reg, env); - fill_note(&info->notes[0], "CORE", NT_PRSTATUS, - sizeof(info->prstatus), &info->prstatus); - fill_psinfo(&info->psinfo, ts); - fill_note(&info->notes[1], "CORE", NT_PRPSINFO, - sizeof(info->psinfo), &info->psinfo); - fill_auxv_note(&info->notes[2], ts); - info->numnote = 3; - - info->notes_size = 0; - for (int i = 0; i < info->numnote; i++) { - info->notes_size += note_size(&info->notes[i]); - } - - /* read and fill status of all threads */ - CPU_FOREACH(cpu) { - if (cpu == thread_cpu) { - continue; - } - fill_thread_info(info, cpu_env(cpu)); - } -} - -static void free_note_info(struct elf_note_info *info) -{ - struct elf_thread_status *ets; - - while (!QTAILQ_EMPTY(&info->thread_list)) { - ets = QTAILQ_FIRST(&info->thread_list); - QTAILQ_REMOVE(&info->thread_list, ets, ets_link); - g_free(ets); - } -} - -static int write_note_info(struct elf_note_info *info, int fd) -{ - struct elf_thread_status *ets; - int i, error = 0; - - /* write prstatus, psinfo and auxv for current thread */ - for (i = 0; i < info->numnote; i++) - if ((error = write_note(&info->notes[i], fd)) != 0) - return (error); - - /* write prstatus for each thread */ - QTAILQ_FOREACH(ets, &info->thread_list, ets_link) { - if ((error = write_note(&ets->notes[0], fd)) != 0) - return (error); - } - - return (0); -} - /* * Write out ELF coredump. * @@ -4615,14 +4429,13 @@ static int elf_core_dump(int signr, const CPUArchState *env) const CPUState *cpu = env_cpu((CPUArchState *)env); const TaskState *ts = (const TaskState *)cpu->opaque; struct vm_area_struct *vma; - struct elf_note_info info; - struct elfhdr elf; - struct elf_phdr phdr; struct rlimit dumpsize; struct mm_struct mm; off_t offset, note_offset, data_offset; + size_t note_size; int segs, cpus, ret; int fd = -1; + CPUState *cpu_iter; if (prctl(PR_GET_DUMPABLE) == 0) { return 0; @@ -4644,7 +4457,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) segs = vma_get_mapping_count(&mm); cpus = 0; - CPU_FOREACH(cpu) { + CPU_FOREACH(cpu_iter) { cpus++; } @@ -4655,6 +4468,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) offset += size_note("CORE", ts->info->auxv_len); offset += size_note("CORE", sizeof(struct target_elf_prpsinfo)); offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus; + note_size = offset - note_offset; offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); data_offset = offset; @@ -4678,61 +4492,64 @@ static int elf_core_dump(int signr, const CPUArchState *env) } /* - * Construct valid coredump ELF header. We also - * add one more segment for notes. + * There is a fair amount of alignment padding within the notes + * as well as preceeding the process memory. Allocate a zeroed + * block to hold it all. Write all of the headers directly into + * this buffer and then write it out as a block. */ - fill_elf_header(&elf, segs + 1, ELF_MACHINE, 0); - if (dump_write(fd, &elf, sizeof (elf)) != 0) - goto out; + { + g_autofree void *header = g_malloc0(data_offset); + void *hptr, *dptr; + + /* Create elf file header. */ + hptr = header; + fill_elf_header(hptr, segs + 1, ELF_MACHINE, 0); + hptr += sizeof(struct elfhdr); + + /* Create elf program headers. */ + fill_elf_note_phdr(hptr, note_size, note_offset); + hptr += sizeof(struct elf_phdr); + + offset = data_offset; + for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { + struct elf_phdr *phdr = hptr; + + phdr->p_type = PT_LOAD; + phdr->p_offset = offset; + phdr->p_vaddr = vma->vma_start; + phdr->p_paddr = 0; + phdr->p_filesz = vma_dump_size(vma); + offset += phdr->p_filesz; + phdr->p_memsz = vma->vma_end - vma->vma_start; + phdr->p_flags = (vma->vma_flags & PROT_READ ? PF_R : 0) + | (vma->vma_flags & PROT_WRITE ? PF_W : 0) + | (vma->vma_flags & PROT_EXEC ? PF_X : 0); + phdr->p_align = ELF_EXEC_PAGESIZE; + + bswap_phdr(phdr, 1); + hptr += sizeof(struct elf_phdr); + } - /* fill in the in-memory version of notes */ - fill_note_info(&info, signr, env); + /* Create the notes. */ + dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len); + fill_auxv_note(dptr, ts); - /* write out notes program header */ - fill_elf_note_phdr(&phdr, info.notes_size, note_offset); + dptr = fill_note(&hptr, NT_PRPSINFO, "CORE", + sizeof(struct target_elf_prpsinfo)); + fill_prpsinfo_note(dptr, ts); - if (dump_write(fd, &phdr, sizeof (phdr)) != 0) - goto out; + CPU_FOREACH(cpu_iter) { + dptr = fill_note(&hptr, NT_PRSTATUS, "CORE", + sizeof(struct target_elf_prstatus)); + fill_prstatus_note(dptr, ts, cpu_iter, + cpu_iter == cpu ? signr : 0); + } - /* - * Write program headers for memory regions mapped in - * the target process. - */ - offset = data_offset; - for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { - (void) memset(&phdr, 0, sizeof (phdr)); - - phdr.p_type = PT_LOAD; - phdr.p_offset = offset; - phdr.p_vaddr = vma->vma_start; - phdr.p_paddr = 0; - phdr.p_filesz = vma_dump_size(vma); - offset += phdr.p_filesz; - phdr.p_memsz = vma->vma_end - vma->vma_start; - phdr.p_flags = vma->vma_flags & PROT_READ ? PF_R : 0; - if (vma->vma_flags & PROT_WRITE) - phdr.p_flags |= PF_W; - if (vma->vma_flags & PROT_EXEC) - phdr.p_flags |= PF_X; - phdr.p_align = ELF_EXEC_PAGESIZE; - - bswap_phdr(&phdr, 1); - if (dump_write(fd, &phdr, sizeof(phdr)) != 0) { + if (dump_write(fd, header, data_offset) < 0) { goto out; } } - /* - * Next we write notes just after program headers. No - * alignment needed here. - */ - if (write_note_info(&info, fd) < 0) - goto out; - - /* align data to page boundary */ - if (lseek(fd, data_offset, SEEK_SET) != data_offset) - goto out; - /* * Finally we can dump process memory into corefile as well. */ @@ -4768,7 +4585,6 @@ static int elf_core_dump(int signr, const CPUArchState *env) ret = -errno; mmap_unlock(); cpu_list_unlock(); - free_note_info(&info); vma_delete(&mm); close(fd); return ret; -- cgit v1.2.3 From b4c7ab816b48e0d2dd15753c0329bd4aa59f713f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Feb 2024 06:43:53 -1000 Subject: linux-user/elfload: Write process memory to core file in larger chunks We do not need to copy pages from guest memory before writing them out. Because vmas are contiguous in host memory, we can write them in one go. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index b8d07d8054..491e754f72 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4551,32 +4551,13 @@ static int elf_core_dump(int signr, const CPUArchState *env) } /* - * Finally we can dump process memory into corefile as well. + * Finally write process memory into the corefile as well. */ for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { - abi_ulong addr; - abi_ulong end; + size_t size = vma_dump_size(vma); - end = vma->vma_start + vma_dump_size(vma); - - for (addr = vma->vma_start; addr < end; - addr += TARGET_PAGE_SIZE) { - char page[TARGET_PAGE_SIZE]; - int error; - - /* - * Read in page from target process memory and - * write it to coredump file. - */ - error = copy_from_user(page, addr, sizeof (page)); - if (error != 0) { - (void) fprintf(stderr, "unable to dump " TARGET_ABI_FMT_lx "\n", - addr); - errno = -error; - goto out; - } - if (dump_write(fd, page, TARGET_PAGE_SIZE) < 0) - goto out; + if (size && dump_write(fd, g2h_untagged(vma->vma_start), size) < 0) { + goto out; } } errno = 0; -- cgit v1.2.3 From 1928d50bec7ef7956499e408ac3a501f02c57c47 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Feb 2024 06:47:49 -1000 Subject: linux-user/elfload: Simplify vma_dump_size Use the flags that we've already saved in order to test accessibility. Use g2h_untagged and compare guest memory directly instead of copy_from_user. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 491e754f72..47b5ce3005 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4166,37 +4166,23 @@ static int vma_get_mapping_count(const struct mm_struct *mm) */ static abi_ulong vma_dump_size(const struct vm_area_struct *vma) { - /* if we cannot even read the first page, skip it */ - if (!access_ok_untagged(VERIFY_READ, vma->vma_start, TARGET_PAGE_SIZE)) - return (0); + /* The area must be readable. */ + if (!(vma->vma_flags & PROT_READ)) { + return 0; + } /* * Usually we don't dump executable pages as they contain * non-writable code that debugger can read directly from - * target library etc. However, thread stacks are marked - * also executable so we read in first page of given region - * and check whether it contains elf header. If there is - * no elf header, we dump it. + * target library etc. If there is no elf header, we dump it. */ - if (vma->vma_flags & PROT_EXEC) { - char page[TARGET_PAGE_SIZE]; - - if (copy_from_user(page, vma->vma_start, sizeof (page))) { - return 0; - } - if ((page[EI_MAG0] == ELFMAG0) && - (page[EI_MAG1] == ELFMAG1) && - (page[EI_MAG2] == ELFMAG2) && - (page[EI_MAG3] == ELFMAG3)) { - /* - * Mappings are possibly from ELF binary. Don't dump - * them. - */ - return (0); - } + if (!(vma->vma_flags & PROT_WRITE) && + (vma->vma_flags & PROT_EXEC) && + memcmp(g2h_untagged(vma->vma_start), ELFMAG, SELFMAG) == 0) { + return 0; } - return (vma->vma_end - vma->vma_start); + return vma->vma_end - vma->vma_start; } static int vma_walker(void *priv, target_ulong start, target_ulong end, -- cgit v1.2.3 From 50e33f52fbc476f71eb006e0c8d3fa355539c3e1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Feb 2024 07:58:41 -1000 Subject: linux-user/elfload: Rely on walk_memory_regions for vmas Rather than creating new data structures for vma, rely on the IntervalTree used by walk_memory_regions. Use PAGE_* constants, per the page table api, rather than PROT_* constants, per the mmap api. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 213 +++++++++++++++++++-------------------------------- 1 file changed, 78 insertions(+), 135 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 47b5ce3005..ae0abc4931 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4041,29 +4041,6 @@ struct target_elf_prpsinfo { char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -struct vm_area_struct { - target_ulong vma_start; /* start vaddr of memory region */ - target_ulong vma_end; /* end vaddr of memory region */ - abi_ulong vma_flags; /* protection etc. flags for the region */ - QTAILQ_ENTRY(vm_area_struct) vma_link; -}; - -struct mm_struct { - QTAILQ_HEAD(, vm_area_struct) mm_mmap; - int mm_count; /* number of mappings */ -}; - -static void vma_init(struct mm_struct *); -static void vma_delete(struct mm_struct *); -static int vma_add_mapping(struct mm_struct *, target_ulong, - target_ulong, abi_ulong); -static int vma_get_mapping_count(const struct mm_struct *); -static struct vm_area_struct *vma_first(const struct mm_struct *); -static struct vm_area_struct *vma_next(struct vm_area_struct *); -static abi_ulong vma_dump_size(const struct vm_area_struct *); -static int vma_walker(void *priv, target_ulong start, target_ulong end, - unsigned long flags); - #ifdef BSWAP_NEEDED static void bswap_prstatus(struct target_elf_prstatus *prstatus) { @@ -4105,69 +4082,14 @@ static inline void bswap_psinfo(struct target_elf_prpsinfo *p) {} static inline void bswap_note(struct elf_note *en) { } #endif /* BSWAP_NEEDED */ -/* - * Minimal support for linux memory regions. These are needed - * when we are finding out what memory exactly belongs to - * emulated process. No locks needed here, as long as - * thread that received the signal is stopped. - */ - -static void vma_init(struct mm_struct *mm) -{ - mm->mm_count = 0; - QTAILQ_INIT(&mm->mm_mmap); -} - -static void vma_delete(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - while ((vma = vma_first(mm)) != NULL) { - QTAILQ_REMOVE(&mm->mm_mmap, vma, vma_link); - g_free(vma); - } -} - -static int vma_add_mapping(struct mm_struct *mm, target_ulong start, - target_ulong end, abi_ulong flags) -{ - struct vm_area_struct *vma; - - if ((vma = g_malloc0(sizeof (*vma))) == NULL) - return (-1); - - vma->vma_start = start; - vma->vma_end = end; - vma->vma_flags = flags; - - QTAILQ_INSERT_TAIL(&mm->mm_mmap, vma, vma_link); - mm->mm_count++; - - return (0); -} - -static struct vm_area_struct *vma_first(const struct mm_struct *mm) -{ - return (QTAILQ_FIRST(&mm->mm_mmap)); -} - -static struct vm_area_struct *vma_next(struct vm_area_struct *vma) -{ - return (QTAILQ_NEXT(vma, vma_link)); -} - -static int vma_get_mapping_count(const struct mm_struct *mm) -{ - return (mm->mm_count); -} - /* * Calculate file (dump) size of given memory region. */ -static abi_ulong vma_dump_size(const struct vm_area_struct *vma) +static size_t vma_dump_size(target_ulong start, target_ulong end, + unsigned long flags) { /* The area must be readable. */ - if (!(vma->vma_flags & PROT_READ)) { + if (!(flags & PAGE_READ)) { return 0; } @@ -4176,22 +4098,13 @@ static abi_ulong vma_dump_size(const struct vm_area_struct *vma) * non-writable code that debugger can read directly from * target library etc. If there is no elf header, we dump it. */ - if (!(vma->vma_flags & PROT_WRITE) && - (vma->vma_flags & PROT_EXEC) && - memcmp(g2h_untagged(vma->vma_start), ELFMAG, SELFMAG) == 0) { + if (!(flags & PAGE_WRITE_ORG) && + (flags & PAGE_EXEC) && + memcmp(g2h_untagged(start), ELFMAG, SELFMAG) == 0) { return 0; } - return vma->vma_end - vma->vma_start; -} - -static int vma_walker(void *priv, target_ulong start, target_ulong end, - unsigned long flags) -{ - struct mm_struct *mm = (struct mm_struct *)priv; - - vma_add_mapping(mm, start, end, flags); - return (0); + return end - start; } static size_t size_note(const char *name, size_t datasz) @@ -4367,6 +4280,61 @@ static int dump_write(int fd, const void *ptr, size_t size) return (0); } +typedef struct { + unsigned count; + size_t size; +} CountAndSizeRegions; + +static int wmr_count_and_size_regions(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) +{ + CountAndSizeRegions *css = opaque; + + css->count++; + css->size += vma_dump_size(start, end, flags); + return 0; +} + +typedef struct { + struct elf_phdr *phdr; + off_t offset; +} FillRegionPhdr; + +static int wmr_fill_region_phdr(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) +{ + FillRegionPhdr *d = opaque; + struct elf_phdr *phdr = d->phdr; + + phdr->p_type = PT_LOAD; + phdr->p_vaddr = start; + phdr->p_paddr = 0; + phdr->p_filesz = vma_dump_size(start, end, flags); + phdr->p_offset = d->offset; + d->offset += phdr->p_filesz; + phdr->p_memsz = end - start; + phdr->p_flags = (flags & PAGE_READ ? PF_R : 0) + | (flags & PAGE_WRITE_ORG ? PF_W : 0) + | (flags & PAGE_EXEC ? PF_X : 0); + phdr->p_align = ELF_EXEC_PAGESIZE; + + bswap_phdr(phdr, 1); + d->phdr = phdr + 1; + return 0; +} + +static int wmr_write_region(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) +{ + int fd = *(int *)opaque; + size_t size = vma_dump_size(start, end, flags); + + if (!size) { + return 0; + } + return dump_write(fd, g2h_untagged(start), size); +} + /* * Write out ELF coredump. * @@ -4414,12 +4382,11 @@ static int elf_core_dump(int signr, const CPUArchState *env) { const CPUState *cpu = env_cpu((CPUArchState *)env); const TaskState *ts = (const TaskState *)cpu->opaque; - struct vm_area_struct *vma; struct rlimit dumpsize; - struct mm_struct mm; + CountAndSizeRegions css; off_t offset, note_offset, data_offset; size_t note_size; - int segs, cpus, ret; + int cpus, ret; int fd = -1; CPUState *cpu_iter; @@ -4438,9 +4405,8 @@ static int elf_core_dump(int signr, const CPUArchState *env) * Walk through target process memory mappings and * set up structure containing this information. */ - vma_init(&mm); - walk_memory_regions(&mm, vma_walker); - segs = vma_get_mapping_count(&mm); + memset(&css, 0, sizeof(css)); + walk_memory_regions(&css, wmr_count_and_size_regions); cpus = 0; CPU_FOREACH(cpu_iter) { @@ -4448,22 +4414,18 @@ static int elf_core_dump(int signr, const CPUArchState *env) } offset = sizeof(struct elfhdr); - offset += (segs + 1) * sizeof(struct elf_phdr); + offset += (css.count + 1) * sizeof(struct elf_phdr); note_offset = offset; offset += size_note("CORE", ts->info->auxv_len); offset += size_note("CORE", sizeof(struct target_elf_prpsinfo)); offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus; note_size = offset - note_offset; - offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); - data_offset = offset; - - for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { - offset += vma_dump_size(vma); - } + data_offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); /* Do not dump if the corefile size exceeds the limit. */ - if (dumpsize.rlim_cur != RLIM_INFINITY && dumpsize.rlim_cur < offset) { + if (dumpsize.rlim_cur != RLIM_INFINITY + && dumpsize.rlim_cur < data_offset + css.size) { errno = 0; goto out; } @@ -4485,36 +4447,22 @@ static int elf_core_dump(int signr, const CPUArchState *env) */ { g_autofree void *header = g_malloc0(data_offset); + FillRegionPhdr frp; void *hptr, *dptr; /* Create elf file header. */ hptr = header; - fill_elf_header(hptr, segs + 1, ELF_MACHINE, 0); + fill_elf_header(hptr, css.count + 1, ELF_MACHINE, 0); hptr += sizeof(struct elfhdr); /* Create elf program headers. */ fill_elf_note_phdr(hptr, note_size, note_offset); hptr += sizeof(struct elf_phdr); - offset = data_offset; - for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { - struct elf_phdr *phdr = hptr; - - phdr->p_type = PT_LOAD; - phdr->p_offset = offset; - phdr->p_vaddr = vma->vma_start; - phdr->p_paddr = 0; - phdr->p_filesz = vma_dump_size(vma); - offset += phdr->p_filesz; - phdr->p_memsz = vma->vma_end - vma->vma_start; - phdr->p_flags = (vma->vma_flags & PROT_READ ? PF_R : 0) - | (vma->vma_flags & PROT_WRITE ? PF_W : 0) - | (vma->vma_flags & PROT_EXEC ? PF_X : 0); - phdr->p_align = ELF_EXEC_PAGESIZE; - - bswap_phdr(phdr, 1); - hptr += sizeof(struct elf_phdr); - } + frp.phdr = hptr; + frp.offset = data_offset; + walk_memory_regions(&frp, wmr_fill_region_phdr); + hptr = frp.phdr; /* Create the notes. */ dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len); @@ -4539,12 +4487,8 @@ static int elf_core_dump(int signr, const CPUArchState *env) /* * Finally write process memory into the corefile as well. */ - for (vma = vma_first(&mm); vma != NULL; vma = vma_next(vma)) { - size_t size = vma_dump_size(vma); - - if (size && dump_write(fd, g2h_untagged(vma->vma_start), size) < 0) { - goto out; - } + if (walk_memory_regions(&fd, wmr_write_region) < 0) { + goto out; } errno = 0; @@ -4552,7 +4496,6 @@ static int elf_core_dump(int signr, const CPUArchState *env) ret = -errno; mmap_unlock(); cpu_list_unlock(); - vma_delete(&mm); close(fd); return ret; } -- cgit v1.2.3 From 41689bb30cb55f411a4d9f7a31f3e601292d3be2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Feb 2024 08:25:22 -1000 Subject: linux-user/elfload: Unprotect regions before core dump By unprotecting regions, we re-instate writability and unify regions that have been split, which may reduce the total number of regions. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index ae0abc4931..38bfc9ac67 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3963,6 +3963,8 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) } #ifdef USE_ELF_CORE_DUMP +#include "exec/translate-all.h" + /* * Definitions to generate Intel SVR4-like core files. * These mostly have the same names as the SVR4 types with "target_elf_" @@ -4280,6 +4282,23 @@ static int dump_write(int fd, const void *ptr, size_t size) return (0); } +static int wmr_page_unprotect_regions(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) +{ + if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) { + size_t step = MAX(TARGET_PAGE_SIZE, qemu_host_page_size); + + while (1) { + page_unprotect(start, 0); + if (end - start <= step) { + break; + } + start += step; + } + } + return 0; +} + typedef struct { unsigned count; size_t size; @@ -4401,6 +4420,9 @@ static int elf_core_dump(int signr, const CPUArchState *env) cpu_list_lock(); mmap_lock(); + /* By unprotecting, we merge vmas that might be split. */ + walk_memory_regions(NULL, wmr_page_unprotect_regions); + /* * Walk through target process memory mappings and * set up structure containing this information. -- cgit v1.2.3 From 7f89fdf8ebe6ef8df48f0a05f44e1020c713a94e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Feb 2024 15:40:11 -0600 Subject: tcg/aarch64: Apple does not align __int128_t in even registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms When passing an argument with 16-byte alignment in integer registers, Apple platforms allow the argument to start in an odd-numbered xN register. The standard ABI requires it to begin in an even-numbered xN register. Cc: qemu-stable@nongnu.org Fixes: 5427a9a7604 ("tcg: Add TCG_TARGET_CALL_{RET,ARG}_I128") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2169 Signed-off-by: Richard Henderson Message-Id: <9fc0c2c7-dd57-459e-aecb-528edb74b4a7@linaro.org> Reviewed-by: Philippe Mathieu-Daudé --- tcg/aarch64/tcg-target.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index ef5ebe91bd..85d5746e47 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -55,7 +55,11 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL -#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#ifdef CONFIG_DARWIN +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#else +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#endif #define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL #define have_lse (cpuinfo & CPUINFO_LSE) -- cgit v1.2.3 From 62bcba836cb199bb0d9b5aa160919c863393859f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 19 Feb 2024 17:31:51 +0000 Subject: accel/tcg: Set can_do_io at at start of lookup_tb_ptr helper If a page table is in IO memory and lookup_tb_ptr probes the TLB it can result in a page table walk for the instruction fetch. If this hits IO memory and io_prepare falsely assumes it needs to do a TLB recompile. Avoid that by setting can_do_io at the start of lookup_tb_ptr. Link: https://lore.kernel.org/qemu-devel/CAFEAcA_a_AyQ=Epz3_+CheAT8Crsk9mOu894wbNW_FywamkZiw@mail.gmail.com/#t Reviewed-by: Richard Henderson Signed-off-by: Peter Maydell Signed-off-by: Jonathan Cameron Message-Id: <20240219173153.12114-2-Jonathan.Cameron@huawei.com> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 977576ca14..52239a441f 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -396,6 +396,14 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) uint64_t cs_base; uint32_t flags, cflags; + /* + * By definition we've just finished a TB, so I/O is OK. + * Avoid the possibility of calling cpu_io_recompile() if + * a page table walk triggered by tb_lookup() calling + * probe_access_internal() happens to touch an MMIO device. + * The next TB, if we chain to it, will clear the flag again. + */ + cpu->neg.can_do_io = true; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); cflags = curr_cflags(cpu); -- cgit v1.2.3 From 6aba908d2b2d4c6c6e1e0c57a908b6ad9730525d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 19 Feb 2024 17:31:53 +0000 Subject: tcg: Avoid double lock if page tables happen to be in mmio memory. On i386, after fixing the page walking code to work with pages in MMIO memory (specifically CXL emulated interleaved memory), a crash was seen in an interrupt handling path. Useful part of backtrace 7 0x0000555555ab1929 in bql_lock_impl (file=0x555556049122 "../../accel/tcg/cputlb.c", line=2033) at ../../system/cpus.c:524 8 bql_lock_impl (file=file@entry=0x555556049122 "../../accel/tcg/cputlb.c", line=line@entry=2033) at ../../system/cpus.c:520 9 0x0000555555c9f7d6 in do_ld_mmio_beN (cpu=0x5555578e0cb0, full=0x7ffe88012950, ret_be=ret_be@entry=0, addr=19595792376, size=size@entry=8, mmu_idx=4, type=MMU_DATA_LOAD, ra=0) at ../../accel/tcg/cputlb.c:2033 10 0x0000555555ca0fbd in do_ld_8 (cpu=cpu@entry=0x5555578e0cb0, p=p@entry=0x7ffff4efd1d0, mmu_idx=, type=type@entry=MMU_DATA_LOAD, memop=, ra=ra@entry=0) at ../../accel/tcg/cputlb.c:2356 11 0x0000555555ca341f in do_ld8_mmu (cpu=cpu@entry=0x5555578e0cb0, addr=addr@entry=19595792376, oi=oi@entry=52, ra=0, ra@entry=52, access_type=access_type@entry=MMU_DATA_LOAD) at ../../accel/tcg/cputlb.c:2439 12 0x0000555555ca5f59 in cpu_ldq_mmu (ra=52, oi=52, addr=19595792376, env=0x5555578e3470) at ../../accel/tcg/ldst_common.c.inc:169 13 cpu_ldq_le_mmuidx_ra (env=0x5555578e3470, addr=19595792376, mmu_idx=, ra=ra@entry=0) at ../../accel/tcg/ldst_common.c.inc:301 14 0x0000555555b4b5fc in ptw_ldq (ra=0, in=0x7ffff4efd320) at ../../target/i386/tcg/sysemu/excp_helper.c:98 15 ptw_ldq (ra=0, in=0x7ffff4efd320) at ../../target/i386/tcg/sysemu/excp_helper.c:93 16 mmu_translate (env=env@entry=0x5555578e3470, in=0x7ffff4efd3e0, out=0x7ffff4efd3b0, err=err@entry=0x7ffff4efd3c0, ra=ra@entry=0) at ../../target/i386/tcg/sysemu/excp_helper.c:174 17 0x0000555555b4c4b3 in get_physical_address (ra=0, err=0x7ffff4efd3c0, out=0x7ffff4efd3b0, mmu_idx=0, access_type=MMU_DATA_LOAD, addr=18446741874686299840, env=0x5555578e3470) at ../../target/i386/tcg/sysemu/excp_helper.c:580 18 x86_cpu_tlb_fill (cs=0x5555578e0cb0, addr=18446741874686299840, size=, access_type=MMU_DATA_LOAD, mmu_idx=0, probe=, retaddr=0) at ../../target/i386/tcg/sysemu/excp_helper.c:606 19 0x0000555555ca0ee9 in tlb_fill (retaddr=0, mmu_idx=0, access_type=MMU_DATA_LOAD, size=, addr=18446741874686299840, cpu=0x7ffff4efd540) at ../../accel/tcg/cputlb.c:1315 20 mmu_lookup1 (cpu=cpu@entry=0x5555578e0cb0, data=data@entry=0x7ffff4efd540, mmu_idx=0, access_type=access_type@entry=MMU_DATA_LOAD, ra=ra@entry=0) at ../../accel/tcg/cputlb.c:1713 21 0x0000555555ca2c61 in mmu_lookup (cpu=cpu@entry=0x5555578e0cb0, addr=addr@entry=18446741874686299840, oi=oi@entry=32, ra=ra@entry=0, type=type@entry=MMU_DATA_LOAD, l=l@entry=0x7ffff4efd540) at ../../accel/tcg/cputlb.c:1803 22 0x0000555555ca3165 in do_ld4_mmu (cpu=cpu@entry=0x5555578e0cb0, addr=addr@entry=18446741874686299840, oi=oi@entry=32, ra=ra@entry=0, access_type=access_type@entry=MMU_DATA_LOAD) at ../../accel/tcg/cputlb.c:2416 23 0x0000555555ca5ef9 in cpu_ldl_mmu (ra=0, oi=32, addr=18446741874686299840, env=0x5555578e3470) at ../../accel/tcg/ldst_common.c.inc:158 24 cpu_ldl_le_mmuidx_ra (env=env@entry=0x5555578e3470, addr=addr@entry=18446741874686299840, mmu_idx=, ra=ra@entry=0) at ../../accel/tcg/ldst_common.c.inc:294 25 0x0000555555bb6cdd in do_interrupt64 (is_hw=1, next_eip=18446744072399775809, error_code=0, is_int=0, intno=236, env=0x5555578e3470) at ../../target/i386/tcg/seg_helper.c:889 26 do_interrupt_all (cpu=cpu@entry=0x5555578e0cb0, intno=236, is_int=is_int@entry=0, error_code=error_code@entry=0, next_eip=next_eip@entry=0, is_hw=is_hw@entry=1) at ../../target/i386/tcg/seg_helper.c:1130 27 0x0000555555bb87da in do_interrupt_x86_hardirq (env=env@entry=0x5555578e3470, intno=, is_hw=is_hw@entry=1) at ../../target/i386/tcg/seg_helper.c:1162 28 0x0000555555b5039c in x86_cpu_exec_interrupt (cs=0x5555578e0cb0, interrupt_request=) at ../../target/i386/tcg/sysemu/seg_helper.c:197 29 0x0000555555c94480 in cpu_handle_interrupt (last_tb=, cpu=0x5555578e0cb0) at ../../accel/tcg/cpu-exec.c:844 Peter identified this as being due to the BQL already being held when the page table walker encounters MMIO memory and attempts to take the lock again. There are other examples of similar paths TCG, so this follows the approach taken in those of simply checking if the lock is already held and if it is, don't take it again. Reviewed-by: Peter Maydell Suggested-by: Peter Maydell Signed-off-by: Jonathan Cameron Message-Id: <20240219173153.12114-4-Jonathan.Cameron@huawei.com> [rth: Use BQL_LOCK_GUARD] Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 047cd2cc0a..6243bcb179 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2022,7 +2022,6 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, MemoryRegion *mr; hwaddr mr_offset; MemTxAttrs attrs; - uint64_t ret; tcg_debug_assert(size > 0 && size <= 8); @@ -2030,12 +2029,9 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); - ret = int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx, - type, ra, mr, mr_offset); - bql_unlock(); - - return ret; + BQL_LOCK_GUARD(); + return int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx, + type, ra, mr, mr_offset); } static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, @@ -2054,13 +2050,11 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); + BQL_LOCK_GUARD(); a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset); b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset + size - 8); - bql_unlock(); - return int128_make128(b, a); } @@ -2569,7 +2563,6 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, hwaddr mr_offset; MemoryRegion *mr; MemTxAttrs attrs; - uint64_t ret; tcg_debug_assert(size > 0 && size <= 8); @@ -2577,12 +2570,9 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); - ret = int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx, - ra, mr, mr_offset); - bql_unlock(); - - return ret; + BQL_LOCK_GUARD(); + return int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx, + ra, mr, mr_offset); } static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, @@ -2593,7 +2583,6 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, MemoryRegion *mr; hwaddr mr_offset; MemTxAttrs attrs; - uint64_t ret; tcg_debug_assert(size > 8 && size <= 16); @@ -2601,14 +2590,11 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); + BQL_LOCK_GUARD(); int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8, mmu_idx, ra, mr, mr_offset); - ret = int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8, - size - 8, mmu_idx, ra, mr, mr_offset + 8); - bql_unlock(); - - return ret; + return int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8, + size - 8, mmu_idx, ra, mr, mr_offset + 8); } /* -- cgit v1.2.3 From a372d483f1ea6c1a37ce7ba5c44978bd53ca7938 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:36 +1100 Subject: accel/tcg: Remove qemu_host_page_size from page_protect/page_unprotect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use qemu_real_host_page_size instead. Except for the final mprotect within page_protect, we already handled host < target page size. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-2-richard.henderson@linaro.org> --- accel/tcg/user-exec.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 68b252cb8e..69b7429e31 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -651,16 +651,17 @@ void page_protect(tb_page_addr_t address) { PageFlagsNode *p; target_ulong start, last; + int host_page_size = qemu_real_host_page_size(); int prot; assert_memory_lock(); - if (qemu_host_page_size <= TARGET_PAGE_SIZE) { + if (host_page_size <= TARGET_PAGE_SIZE) { start = address & TARGET_PAGE_MASK; last = start + TARGET_PAGE_SIZE - 1; } else { - start = address & qemu_host_page_mask; - last = start + qemu_host_page_size - 1; + start = address & -host_page_size; + last = start + host_page_size - 1; } p = pageflags_find(start, last); @@ -671,7 +672,7 @@ void page_protect(tb_page_addr_t address) if (unlikely(p->itree.last < last)) { /* More than one protection region covers the one host page. */ - assert(TARGET_PAGE_SIZE < qemu_host_page_size); + assert(TARGET_PAGE_SIZE < host_page_size); while ((p = pageflags_next(p, start, last)) != NULL) { prot |= p->flags; } @@ -679,7 +680,7 @@ void page_protect(tb_page_addr_t address) if (prot & PAGE_WRITE) { pageflags_set_clear(start, last, 0, PAGE_WRITE); - mprotect(g2h_untagged(start), qemu_host_page_size, + mprotect(g2h_untagged(start), last - start + 1, prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE); } } @@ -725,18 +726,19 @@ int page_unprotect(target_ulong address, uintptr_t pc) } #endif } else { + int host_page_size = qemu_real_host_page_size(); target_ulong start, len, i; int prot; - if (qemu_host_page_size <= TARGET_PAGE_SIZE) { + if (host_page_size <= TARGET_PAGE_SIZE) { start = address & TARGET_PAGE_MASK; len = TARGET_PAGE_SIZE; prot = p->flags | PAGE_WRITE; pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0); current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc); } else { - start = address & qemu_host_page_mask; - len = qemu_host_page_size; + start = address & -host_page_size; + len = host_page_size; prot = 0; for (i = 0; i < len; i += TARGET_PAGE_SIZE) { -- cgit v1.2.3 From f11c05c3b90a8b873ddecc1fc037858445aa9b9c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:37 +1100 Subject: linux-user: Adjust SVr4 NULL page mapping Use TARGET_PAGE_SIZE and MAP_FIXED_NOREPLACE. We really should be attending to this earlier during probe_guest_base, as well as better detection and emulation of various Linux personalities. Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-3-richard.henderson@linaro.org> --- linux-user/elfload.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 38bfc9ac67..a51518f817 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3912,8 +3912,9 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ - target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); } #ifdef TARGET_MIPS info->interp_fp_abi = interp_info.fp_abi; -- cgit v1.2.3 From ae6bffe05efd73c284170a7709bce641c27ca9fb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:38 +1100 Subject: linux-user: Remove qemu_host_page_{size, mask} in probe_guest_base The host SHMLBA is by definition a multiple of the host page size. Thus the remaining component of qemu_host_page_size is the target page size. Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-4-richard.henderson@linaro.org> --- linux-user/elfload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index a51518f817..561c11ff37 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2893,7 +2893,7 @@ static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr, /* Add any HI_COMMPAGE not covered by reserved_va. */ if (reserved_va < HI_COMMPAGE) { - ga->bounds[n][0] = HI_COMMPAGE & qemu_host_page_mask; + ga->bounds[n][0] = HI_COMMPAGE & qemu_real_host_page_mask(); ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1; n++; } @@ -3075,7 +3075,7 @@ void probe_guest_base(const char *image_name, abi_ulong guest_loaddr, abi_ulong guest_hiaddr) { /* In order to use host shmat, we must be able to honor SHMLBA. */ - uintptr_t align = MAX(SHMLBA, qemu_host_page_size); + uintptr_t align = MAX(SHMLBA, TARGET_PAGE_SIZE); /* Sanity check the guest binary. */ if (reserved_va) { -- cgit v1.2.3 From d17b684c10e1707d535b9dfa685df610834ec210 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:39 +1100 Subject: linux-user: Remove qemu_host_page_size from create_elf_tables AT_PAGESZ is supposed to advertise the guest page size. The random adjustment made here using qemu_host_page_size does not match anything else within linux-user. The idea here is good, but should be done more systemically via adjustment to TARGET_PAGE_SIZE. Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-5-richard.henderson@linaro.org> --- linux-user/elfload.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 561c11ff37..0f135f6b6d 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2679,13 +2679,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff)); NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); - if ((info->alignment & ~qemu_host_page_mask) != 0) { - /* Target doesn't support host page size alignment */ - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); - } else { - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(MAX(TARGET_PAGE_SIZE, - qemu_host_page_size))); - } + NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info ? interp_info->load_addr : 0)); NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); NEW_AUX_ENT(AT_ENTRY, info->entry); -- cgit v1.2.3 From d1fc62303e1447c516bf6c1fa1c6331715a88180 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:40 +1100 Subject: linux-user/hppa: Simplify init_guest_commpage If reserved_va, then we have already reserved the entire guest virtual address space; no need to remap page. If !reserved_va, then use MAP_FIXED_NOREPLACE. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-6-richard.henderson@linaro.org> --- linux-user/elfload.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 0f135f6b6d..53b61aac77 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1970,16 +1970,20 @@ static inline void init_thread(struct target_pt_regs *regs, static bool init_guest_commpage(void) { - void *want = g2h_untagged(LO_COMMPAGE); - void *addr = mmap(want, qemu_host_page_size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); - - if (addr == MAP_FAILED) { - perror("Allocating guest commpage"); - exit(EXIT_FAILURE); - } - if (addr != want) { - return false; + /* If reserved_va, then we have already mapped 0 page on the host. */ + if (!reserved_va) { + void *want, *addr; + + want = g2h_untagged(LO_COMMPAGE); + addr = mmap(want, TARGET_PAGE_SIZE, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); + if (addr == MAP_FAILED) { + perror("Allocating guest commpage"); + exit(EXIT_FAILURE); + } + if (addr != want) { + return false; + } } /* -- cgit v1.2.3 From 51f8c9b8720f30ba1c4eb1d977ae7bce7884555e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:41 +1100 Subject: linux-user/nios2: Remove qemu_host_page_size from init_guest_commpage Use qemu_real_host_page_size. If !reserved_va, use MAP_FIXED_NOREPLACE. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-7-richard.henderson@linaro.org> --- linux-user/elfload.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 53b61aac77..479acb4e7b 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1532,10 +1532,14 @@ static bool init_guest_commpage(void) 0x3a, 0x68, 0x3b, 0x00, /* trap 0 */ }; - void *want = g2h_untagged(LO_COMMPAGE & -qemu_host_page_size); - void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); - + int host_page_size = qemu_real_host_page_size(); + void *want, *addr; + + want = g2h_untagged(LO_COMMPAGE & -host_page_size); + addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + (reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE), + -1, 0); if (addr == MAP_FAILED) { perror("Allocating guest commpage"); exit(EXIT_FAILURE); @@ -1544,9 +1548,9 @@ static bool init_guest_commpage(void) return false; } - memcpy(addr, kuser_page, sizeof(kuser_page)); + memcpy(g2h_untagged(LO_COMMPAGE), kuser_page, sizeof(kuser_page)); - if (mprotect(addr, qemu_host_page_size, PROT_READ)) { + if (mprotect(addr, host_page_size, PROT_READ)) { perror("Protecting guest commpage"); exit(EXIT_FAILURE); } -- cgit v1.2.3 From 2cd71515c4bef41c6348d87dfd3a709d3905ba77 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:42 +1100 Subject: linux-user/arm: Remove qemu_host_page_size from init_guest_commpage Use qemu_real_host_page_size. If the commpage is not within reserved_va, use MAP_FIXED_NOREPLACE. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-8-richard.henderson@linaro.org> --- linux-user/elfload.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 479acb4e7b..d2919a411d 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -460,6 +460,7 @@ enum { static bool init_guest_commpage(void) { ARMCPU *cpu = ARM_CPU(thread_cpu); + int host_page_size = qemu_real_host_page_size(); abi_ptr commpage; void *want; void *addr; @@ -472,10 +473,12 @@ static bool init_guest_commpage(void) return true; } - commpage = HI_COMMPAGE & -qemu_host_page_size; + commpage = HI_COMMPAGE & -host_page_size; want = g2h_untagged(commpage); - addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + (commpage < reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE), + -1, 0); if (addr == MAP_FAILED) { perror("Allocating guest commpage"); @@ -488,12 +491,12 @@ static bool init_guest_commpage(void) /* Set kernel helper versions; rest of page is 0. */ __put_user(5, (uint32_t *)g2h_untagged(0xffff0ffcu)); - if (mprotect(addr, qemu_host_page_size, PROT_READ)) { + if (mprotect(addr, host_page_size, PROT_READ)) { perror("Protecting guest commpage"); exit(EXIT_FAILURE); } - page_set_flags(commpage, commpage | ~qemu_host_page_mask, + page_set_flags(commpage, commpage | (host_page_size - 1), PAGE_READ | PAGE_EXEC | PAGE_VALID); return true; } -- cgit v1.2.3 From 2c796d230e0bb82134d034020026bc8b711adfe0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Feb 2024 08:55:57 -1000 Subject: linux-user: Remove qemu_host_page_size from elf_core_dump Used only once in wmr_page_unprotect_regions. Signed-off-by: Richard Henderson --- linux-user/elfload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index d2919a411d..cc2013c7b4 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4292,7 +4292,7 @@ static int wmr_page_unprotect_regions(void *opaque, target_ulong start, target_ulong end, unsigned long flags) { if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) { - size_t step = MAX(TARGET_PAGE_SIZE, qemu_host_page_size); + size_t step = MAX(TARGET_PAGE_SIZE, qemu_real_host_page_size()); while (1) { page_unprotect(start, 0); -- cgit v1.2.3 From 621ac47d3778b8e883affd04fe80e76e3c8b64e1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:43 +1100 Subject: linux-user: Remove qemu_host_page_{size, mask} from mmap.c Use qemu_real_host_page_size instead. Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-9-richard.henderson@linaro.org> --- linux-user/mmap.c | 66 +++++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 96c9433e27..4d3c8717b9 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -165,6 +165,7 @@ static int target_to_host_prot(int prot) /* NOTE: all the constants are the HOST ones, but addresses are target. */ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) { + int host_page_size = qemu_real_host_page_size(); abi_ulong starts[3]; abi_ulong lens[3]; int prots[3]; @@ -189,13 +190,13 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) } last = start + len - 1; - host_start = start & qemu_host_page_mask; + host_start = start & -host_page_size; host_last = HOST_PAGE_ALIGN(last) - 1; nranges = 0; mmap_lock(); - if (host_last - host_start < qemu_host_page_size) { + if (host_last - host_start < host_page_size) { /* Single host page contains all guest pages: sum the prot. */ prot1 = target_prot; for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { @@ -205,7 +206,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) prot1 |= page_get_flags(a + 1); } starts[nranges] = host_start; - lens[nranges] = qemu_host_page_size; + lens[nranges] = host_page_size; prots[nranges] = prot1; nranges++; } else { @@ -218,10 +219,10 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) /* If the resulting sum differs, create a new range. */ if (prot1 != target_prot) { starts[nranges] = host_start; - lens[nranges] = qemu_host_page_size; + lens[nranges] = host_page_size; prots[nranges] = prot1; nranges++; - host_start += qemu_host_page_size; + host_start += host_page_size; } } @@ -233,9 +234,9 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) } /* If the resulting sum differs, create a new range. */ if (prot1 != target_prot) { - host_last -= qemu_host_page_size; + host_last -= host_page_size; starts[nranges] = host_last + 1; - lens[nranges] = qemu_host_page_size; + lens[nranges] = host_page_size; prots[nranges] = prot1; nranges++; } @@ -270,6 +271,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, int prot, int flags, int fd, off_t offset) { + int host_page_size = qemu_real_host_page_size(); abi_ulong real_last; void *host_start; int prot_old, prot_new; @@ -286,7 +288,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, return false; } - real_last = real_start + qemu_host_page_size - 1; + real_last = real_start + host_page_size - 1; host_start = g2h_untagged(real_start); /* Get the protection of the target pages outside the mapping. */ @@ -304,12 +306,12 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, * outside of the fragment we need to map. Allocate a new host * page to cover, discarding whatever else may have been present. */ - void *p = mmap(host_start, qemu_host_page_size, + void *p = mmap(host_start, host_page_size, target_to_host_prot(prot), flags | MAP_ANONYMOUS, -1, 0); if (p != host_start) { if (p != MAP_FAILED) { - munmap(p, qemu_host_page_size); + munmap(p, host_page_size); errno = EEXIST; } return false; @@ -324,7 +326,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, /* Adjust protection to be able to write. */ if (!(host_prot_old & PROT_WRITE)) { host_prot_old |= PROT_WRITE; - mprotect(host_start, qemu_host_page_size, host_prot_old); + mprotect(host_start, host_page_size, host_prot_old); } /* Read or zero the new guest pages. */ @@ -338,7 +340,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, /* Put final protection */ if (host_prot_new != host_prot_old) { - mprotect(host_start, qemu_host_page_size, host_prot_new); + mprotect(host_start, host_page_size, host_prot_new); } return true; } @@ -373,17 +375,18 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, */ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) { + int host_page_size = qemu_real_host_page_size(); void *ptr, *prev; abi_ulong addr; int wrapped, repeat; - align = MAX(align, qemu_host_page_size); + align = MAX(align, host_page_size); /* If 'start' == 0, then a default start address is used. */ if (start == 0) { start = mmap_next_start; } else { - start &= qemu_host_page_mask; + start &= -host_page_size; } start = ROUND_UP(start, align); @@ -492,6 +495,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, int flags, int fd, off_t offset) { + int host_page_size = qemu_real_host_page_size(); abi_ulong ret, last, real_start, real_last, retaddr, host_len; abi_ulong passthrough_start = -1, passthrough_last = 0; int page_flags; @@ -537,8 +541,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, } } - real_start = start & qemu_host_page_mask; - host_offset = offset & qemu_host_page_mask; + real_start = start & -host_page_size; + host_offset = offset & -host_page_size; /* * If the user is asking for the kernel to find a location, do that @@ -567,8 +571,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, * may need to truncate file maps at EOF and add extra anonymous pages * up to the targets page boundary. */ - if ((qemu_real_host_page_size() < qemu_host_page_size) && - !(flags & MAP_ANONYMOUS)) { + if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) { struct stat sb; if (fstat(fd, &sb) == -1) { @@ -595,11 +598,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, host_len = HOST_PAGE_ALIGN(host_len); host_prot = target_to_host_prot(target_prot); - /* - * Note: we prefer to control the mapping address. It is - * especially important if qemu_host_page_size > - * qemu_real_host_page_size. - */ + /* Note: we prefer to control the mapping address. */ p = mmap(g2h_untagged(start), host_len, host_prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { @@ -665,7 +664,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, * aligned, so we read it */ if (!(flags & MAP_ANONYMOUS) && - (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { + (offset & (host_page_size - 1)) != (start & (host_page_size - 1))) { /* * msync() won't work here, so we return an error if write is * possible while it is a shared mapping @@ -694,7 +693,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, /* handle the start of the mapping */ if (start > real_start) { - if (real_last == real_start + qemu_host_page_size - 1) { + if (real_last == real_start + host_page_size - 1) { /* one single host page */ if (!mmap_frag(real_start, start, last, target_prot, flags, fd, offset)) { @@ -703,21 +702,21 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, goto the_end1; } if (!mmap_frag(real_start, start, - real_start + qemu_host_page_size - 1, + real_start + host_page_size - 1, target_prot, flags, fd, offset)) { goto fail; } - real_start += qemu_host_page_size; + real_start += host_page_size; } /* handle the end of the mapping */ if (last < real_last) { - abi_ulong real_page = real_last - qemu_host_page_size + 1; + abi_ulong real_page = real_last - host_page_size + 1; if (!mmap_frag(real_page, real_page, last, target_prot, flags, fd, offset + real_page - start)) { goto fail; } - real_last -= qemu_host_page_size; + real_last -= host_page_size; } /* map the middle (easier) */ @@ -784,6 +783,7 @@ fail: static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) { + int host_page_size = qemu_real_host_page_size(); abi_ulong real_start; abi_ulong real_last; abi_ulong real_len; @@ -793,7 +793,7 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) int prot; last = start + len - 1; - real_start = start & qemu_host_page_mask; + real_start = start & -host_page_size; real_last = HOST_PAGE_ALIGN(last) - 1; /* @@ -802,7 +802,7 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) * The single page special case is required for the last page, * lest real_start overflow to zero. */ - if (real_last - real_start < qemu_host_page_size) { + if (real_last - real_start < host_page_size) { prot = 0; for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { prot |= page_get_flags(a); @@ -818,14 +818,14 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) prot |= page_get_flags(a); } if (prot != 0) { - real_start += qemu_host_page_size; + real_start += host_page_size; } for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { prot |= page_get_flags(a + 1); } if (prot != 0) { - real_last -= qemu_host_page_size; + real_last -= host_page_size; } if (real_last < real_start) { -- cgit v1.2.3 From e56922abf0754df87fff949cc1f8f23956552aba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:44 +1100 Subject: linux-user: Remove REAL_HOST_PAGE_ALIGN from mmap.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have qemu_real_host_page_size() in a local variable. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-10-richard.henderson@linaro.org> --- linux-user/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 4d3c8717b9..53e5486cc8 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -585,7 +585,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, * the hosts real pagesize. Additional anonymous maps * will be created beyond EOF. */ - len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); + len = ROUND_UP(sb.st_size - offset, host_page_size); } } -- cgit v1.2.3 From b36b2b1d3d41c02cc3a112df95478a4bd46d5f9a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:45 +1100 Subject: linux-user: Remove HOST_PAGE_ALIGN from mmap.c This removes a hidden use of qemu_host_page_size, using instead the existing host_page_size local within each function. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-11-richard.henderson@linaro.org> --- linux-user/mmap.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 53e5486cc8..d11f758d07 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -191,7 +191,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) last = start + len - 1; host_start = start & -host_page_size; - host_last = HOST_PAGE_ALIGN(last) - 1; + host_last = ROUND_UP(last, host_page_size) - 1; nranges = 0; mmap_lock(); @@ -389,8 +389,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) start &= -host_page_size; } start = ROUND_UP(start, align); - - size = HOST_PAGE_ALIGN(size); + size = ROUND_UP(size, host_page_size); if (reserved_va) { return mmap_find_vma_reserved(start, size, align); @@ -550,7 +549,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, */ if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { host_len = len + offset - host_offset; - host_len = HOST_PAGE_ALIGN(host_len); + host_len = ROUND_UP(host_len, host_page_size); start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); if (start == (abi_ulong)-1) { errno = ENOMEM; @@ -595,7 +594,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, void *p; host_len = len + offset - host_offset; - host_len = HOST_PAGE_ALIGN(host_len); + host_len = ROUND_UP(host_len, host_page_size); host_prot = target_to_host_prot(target_prot); /* Note: we prefer to control the mapping address. */ @@ -625,7 +624,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, goto fail; } last = start + len - 1; - real_last = HOST_PAGE_ALIGN(last) - 1; + real_last = ROUND_UP(last, host_page_size) - 1; /* * Test if requested memory area fits target address space @@ -794,7 +793,7 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) last = start + len - 1; real_start = start & -host_page_size; - real_last = HOST_PAGE_ALIGN(last) - 1; + real_last = ROUND_UP(last, host_page_size) - 1; /* * If guest pages remain on the first or last host pages, -- cgit v1.2.3 From 5d2203691ed5d1657616c402d30b3143382fc25d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:46 +1100 Subject: migration: Remove qemu_host_page_size Replace with the maximum of the real host page size and the target page size. This is an exact replacement. Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-12-richard.henderson@linaro.org> --- migration/ram.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 45a00b45ed..83fd780fc2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2934,7 +2934,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) { RAMState **rsp = opaque; RAMBlock *block; - int ret; + int ret, max_hg_page_size; if (compress_threads_save_setup()) { return -1; @@ -2949,6 +2949,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f; + /* + * ??? Mirrors the previous value of qemu_host_page_size, + * but is this really what was intended for the migration? + */ + max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE); + WITH_RCU_READ_LOCK_GUARD() { qemu_put_be64(f, ram_bytes_total_with_ignored() | RAM_SAVE_FLAG_MEM_SIZE); @@ -2957,8 +2963,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); - if (migrate_postcopy_ram() && block->page_size != - qemu_host_page_size) { + if (migrate_postcopy_ram() && + block->page_size != max_hg_page_size) { qemu_put_be64(f, block->page_size); } if (migrate_ignore_shared()) { @@ -3791,6 +3797,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) int ret = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = migration_incoming_postcopy_advised(); + int max_hg_page_size; assert(block); @@ -3808,9 +3815,16 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) return ret; } } + + /* + * ??? Mirrors the previous value of qemu_host_page_size, + * but is this really what was intended for the migration? + */ + max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE); + /* For postcopy we need to check hugepage sizes match */ if (postcopy_advised && migrate_postcopy_ram() && - block->page_size != qemu_host_page_size) { + block->page_size != max_hg_page_size) { uint64_t remote_page_size = qemu_get_be64(f); if (remote_page_size != block->page_size) { error_report("Mismatched RAM page size %s " -- cgit v1.2.3 From b61af9b0d192c23bf5bcf01f206d87b8518f216e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:47 +1100 Subject: hw/tpm: Remove HOST_PAGE_ALIGN from tpm_ppi_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This removes a hidden use of qemu_host_page_size, hoisting two uses of qemu_real_host_page_size to a local variable. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller --- hw/tpm/tpm_ppi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c index 7f74e26ec6..f27ed6c35e 100644 --- a/hw/tpm/tpm_ppi.c +++ b/hw/tpm/tpm_ppi.c @@ -47,8 +47,10 @@ void tpm_ppi_reset(TPMPPI *tpmppi) void tpm_ppi_init(TPMPPI *tpmppi, MemoryRegion *m, hwaddr addr, Object *obj) { - tpmppi->buf = qemu_memalign(qemu_real_host_page_size(), - HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); + size_t host_page_size = qemu_real_host_page_size(); + + tpmppi->buf = qemu_memalign(host_page_size, + ROUND_UP(TPM_PPI_ADDR_SIZE, host_page_size)); memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", TPM_PPI_ADDR_SIZE, tpmppi->buf); vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); -- cgit v1.2.3 From 80c3aeef7f31e2bbac621521e72434e87fd0e64f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:48 +1100 Subject: softmmu/physmem: Remove qemu_host_page_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use qemu_real_host_page_size() instead. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-14-richard.henderson@linaro.org> --- system/physmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system/physmem.c b/system/physmem.c index e3ebc19eef..3b08e064ff 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -3511,7 +3511,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) * fallocate works on hugepages and shmem * shared anonymous memory requires madvise REMOVE */ - need_madvise = (rb->page_size == qemu_host_page_size); + need_madvise = (rb->page_size == qemu_real_host_page_size()); need_fallocate = rb->fd != -1; if (need_fallocate) { /* For a file, this causes the area of the file to be zero'd -- cgit v1.2.3 From 9260bd40130e934fce5b5716977307129171fa7e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:49 +1100 Subject: softmmu/physmem: Remove HOST_PAGE_ALIGN Align allocation sizes to the maximum of host and target page sizes. Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-15-richard.henderson@linaro.org> --- system/physmem.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/system/physmem.c b/system/physmem.c index 3b08e064ff..3adda08ebf 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1680,7 +1680,8 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) assert(block); - newsize = HOST_PAGE_ALIGN(newsize); + newsize = TARGET_PAGE_ALIGN(newsize); + newsize = REAL_HOST_PAGE_ALIGN(newsize); if (block->used_length == newsize) { /* @@ -1916,7 +1917,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, return NULL; } - size = HOST_PAGE_ALIGN(size); + size = TARGET_PAGE_ALIGN(size); + size = REAL_HOST_PAGE_ALIGN(size); + file_size = get_file_size(fd); if (file_size > offset && file_size < (offset + size)) { error_setg(errp, "backing store size 0x%" PRIx64 @@ -2014,13 +2017,17 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, { RAMBlock *new_block; Error *local_err = NULL; + int align; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | RAM_NORESERVE)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); - size = HOST_PAGE_ALIGN(size); - max_size = HOST_PAGE_ALIGN(max_size); + align = qemu_real_host_page_size(); + align = MAX(align, TARGET_PAGE_SIZE); + size = ROUND_UP(size, align); + max_size = ROUND_UP(max_size, align); + new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->resized = resized; -- cgit v1.2.3 From 13c13397556afccc05f0f604cbc7d74df0980335 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:50 +1100 Subject: linux-user: Remove qemu_host_page_size from main MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use qemu_real_host_page_size() instead. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-16-richard.henderson@linaro.org> --- linux-user/main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index 74b2fbb393..e540acb84a 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -781,7 +781,7 @@ int main(int argc, char **argv, char **envp) } cpu_type = parse_cpu_option(cpu_model); - /* init tcg before creating CPUs and to get qemu_host_page_size */ + /* init tcg before creating CPUs */ { AccelState *accel = current_accel(); AccelClass *ac = ACCEL_GET_CLASS(accel); @@ -804,8 +804,10 @@ int main(int argc, char **argv, char **envp) */ max_reserved_va = MAX_RESERVED_VA(cpu); if (reserved_va != 0) { - if ((reserved_va + 1) % qemu_host_page_size) { - char *s = size_to_str(qemu_host_page_size); + int host_page_size = qemu_real_host_page_size(); + + if ((reserved_va + 1) % host_page_size) { + char *s = size_to_str(host_page_size); fprintf(stderr, "Reserved virtual address not aligned mod %s\n", s); g_free(s); exit(EXIT_FAILURE); @@ -902,7 +904,7 @@ int main(int argc, char **argv, char **envp) * If we're in a chroot with no /proc, fall back to 1 page. */ if (mmap_min_addr == 0) { - mmap_min_addr = qemu_host_page_size; + mmap_min_addr = qemu_real_host_page_size(); qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx (fallback)\n", mmap_min_addr); -- cgit v1.2.3 From d558c395a9bc3f15e010dc0fb786af9a400bccac Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:51 +1100 Subject: linux-user: Split out target_mmap__locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All "goto fail" may be transformed to "return -1". Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-17-richard.henderson@linaro.org> --- linux-user/mmap.c | 62 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index d11f758d07..b4c3cc65aa 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -490,9 +490,9 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) } } -/* NOTE: all the constants are the HOST ones */ -abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, - int flags, int fd, off_t offset) +static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, + int target_prot, int flags, + int fd, off_t offset) { int host_page_size = qemu_real_host_page_size(); abi_ulong ret, last, real_start, real_last, retaddr, host_len; @@ -500,30 +500,27 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, int page_flags; off_t host_offset; - mmap_lock(); - trace_target_mmap(start, len, target_prot, flags, fd, offset); - if (!len) { errno = EINVAL; - goto fail; + return -1; } page_flags = validate_prot_to_pageflags(target_prot); if (!page_flags) { errno = EINVAL; - goto fail; + return -1; } /* Also check for overflows... */ len = TARGET_PAGE_ALIGN(len); if (!len) { errno = ENOMEM; - goto fail; + return -1; } if (offset & ~TARGET_PAGE_MASK) { errno = EINVAL; - goto fail; + return -1; } /* @@ -553,7 +550,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); if (start == (abi_ulong)-1) { errno = ENOMEM; - goto fail; + return -1; } } @@ -574,7 +571,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, struct stat sb; if (fstat(fd, &sb) == -1) { - goto fail; + return -1; } /* Are we trying to create a map beyond EOF?. */ @@ -601,7 +598,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, p = mmap(g2h_untagged(start), host_len, host_prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { - goto fail; + return -1; } /* update start so that it points to the file position at 'offset' */ host_start = (uintptr_t)p; @@ -610,7 +607,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, flags | MAP_FIXED, fd, host_offset); if (p == MAP_FAILED) { munmap(g2h_untagged(start), host_len); - goto fail; + return -1; } host_start += offset - host_offset; } @@ -621,7 +618,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, } else { if (start & ~TARGET_PAGE_MASK) { errno = EINVAL; - goto fail; + return -1; } last = start + len - 1; real_last = ROUND_UP(last, host_page_size) - 1; @@ -633,14 +630,14 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, */ if (last < start || !guest_range_valid_untagged(start, len)) { errno = ENOMEM; - goto fail; + return -1; } if (flags & MAP_FIXED_NOREPLACE) { /* Validate that the chosen range is empty. */ if (!page_check_range_empty(start, last)) { errno = EEXIST; - goto fail; + return -1; } /* @@ -671,17 +668,17 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, if ((flags & MAP_TYPE) == MAP_SHARED && (target_prot & PROT_WRITE)) { errno = EINVAL; - goto fail; + return -1; } retaddr = target_mmap(start, len, target_prot | PROT_WRITE, (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (retaddr == -1) { - goto fail; + return -1; } if (pread(fd, g2h_untagged(start), len, offset) == -1) { - goto fail; + return -1; } if (!(target_prot & PROT_WRITE)) { ret = target_mprotect(start, len, target_prot); @@ -696,14 +693,14 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, /* one single host page */ if (!mmap_frag(real_start, start, last, target_prot, flags, fd, offset)) { - goto fail; + return -1; } goto the_end1; } if (!mmap_frag(real_start, start, real_start + host_page_size - 1, target_prot, flags, fd, offset)) { - goto fail; + return -1; } real_start += host_page_size; } @@ -713,7 +710,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, if (!mmap_frag(real_page, real_page, last, target_prot, flags, fd, offset + real_page - start)) { - goto fail; + return -1; } real_last -= host_page_size; } @@ -739,7 +736,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, munmap(p, len1); errno = EEXIST; } - goto fail; + return -1; } passthrough_start = real_start; passthrough_last = real_last; @@ -773,11 +770,22 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, qemu_log_unlock(f); } } - mmap_unlock(); return start; -fail: +} + +/* NOTE: all the constants are the HOST ones */ +abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, + int flags, int fd, off_t offset) +{ + abi_long ret; + + trace_target_mmap(start, len, target_prot, flags, fd, offset); + mmap_lock(); + + ret = target_mmap__locked(start, len, target_prot, flags, fd, offset); + mmap_unlock(); - return -1; + return ret; } static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) -- cgit v1.2.3 From e8cec51be073bb1abac3850a4c1a90f96a416af2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:52 +1100 Subject: linux-user: Move some mmap checks outside the lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Basic validation of operands does not require the lock. Hoist them from target_mmap__locked back into target_mmap. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-18-richard.henderson@linaro.org> --- linux-user/mmap.c | 107 +++++++++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index b4c3cc65aa..fbaea832c5 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -491,52 +491,14 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) } static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, - int target_prot, int flags, + int target_prot, int flags, int page_flags, int fd, off_t offset) { int host_page_size = qemu_real_host_page_size(); abi_ulong ret, last, real_start, real_last, retaddr, host_len; abi_ulong passthrough_start = -1, passthrough_last = 0; - int page_flags; off_t host_offset; - if (!len) { - errno = EINVAL; - return -1; - } - - page_flags = validate_prot_to_pageflags(target_prot); - if (!page_flags) { - errno = EINVAL; - return -1; - } - - /* Also check for overflows... */ - len = TARGET_PAGE_ALIGN(len); - if (!len) { - errno = ENOMEM; - return -1; - } - - if (offset & ~TARGET_PAGE_MASK) { - errno = EINVAL; - return -1; - } - - /* - * If we're mapping shared memory, ensure we generate code for parallel - * execution and flush old translations. This will work up to the level - * supported by the host -- anything that requires EXCP_ATOMIC will not - * be atomic with respect to an external process. - */ - if (flags & MAP_SHARED) { - CPUState *cpu = thread_cpu; - if (!(cpu->tcg_cflags & CF_PARALLEL)) { - cpu->tcg_cflags |= CF_PARALLEL; - tb_flush(cpu); - } - } - real_start = start & -host_page_size; host_offset = offset & -host_page_size; @@ -616,23 +578,9 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, passthrough_start = start; passthrough_last = last; } else { - if (start & ~TARGET_PAGE_MASK) { - errno = EINVAL; - return -1; - } last = start + len - 1; real_last = ROUND_UP(last, host_page_size) - 1; - /* - * Test if requested memory area fits target address space - * It can fail only on 64-bit host with 32-bit target. - * On any other target/host host mmap() handles this error correctly. - */ - if (last < start || !guest_range_valid_untagged(start, len)) { - errno = ENOMEM; - return -1; - } - if (flags & MAP_FIXED_NOREPLACE) { /* Validate that the chosen range is empty. */ if (!page_check_range_empty(start, last)) { @@ -778,13 +726,64 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, int flags, int fd, off_t offset) { abi_long ret; + int page_flags; trace_target_mmap(start, len, target_prot, flags, fd, offset); + + if (!len) { + errno = EINVAL; + return -1; + } + + page_flags = validate_prot_to_pageflags(target_prot); + if (!page_flags) { + errno = EINVAL; + return -1; + } + + /* Also check for overflows... */ + len = TARGET_PAGE_ALIGN(len); + if (!len || len != (size_t)len) { + errno = ENOMEM; + return -1; + } + + if (offset & ~TARGET_PAGE_MASK) { + errno = EINVAL; + return -1; + } + if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) { + if (start & ~TARGET_PAGE_MASK) { + errno = EINVAL; + return -1; + } + if (!guest_range_valid_untagged(start, len)) { + errno = ENOMEM; + return -1; + } + } + mmap_lock(); - ret = target_mmap__locked(start, len, target_prot, flags, fd, offset); + ret = target_mmap__locked(start, len, target_prot, flags, + page_flags, fd, offset); mmap_unlock(); + + /* + * If we're mapping shared memory, ensure we generate code for parallel + * execution and flush old translations. This will work up to the level + * supported by the host -- anything that requires EXCP_ATOMIC will not + * be atomic with respect to an external process. + */ + if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) { + CPUState *cpu = thread_cpu; + if (!(cpu->tcg_cflags & CF_PARALLEL)) { + cpu->tcg_cflags |= CF_PARALLEL; + tb_flush(cpu); + } + } + return ret; } -- cgit v1.2.3 From f0a362c476d5e866d91bdca8cad108d7f8e3897c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:53 +1100 Subject: linux-user: Fix sub-host-page mmap We cannot skip over the_end1 to the_end, because we fail to record the validity of the guest page with the interval tree. Remove "the_end" and rename "the_end1" to "the_end". Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-19-richard.henderson@linaro.org> --- linux-user/mmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index fbaea832c5..48fcdd4a32 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -643,7 +643,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, target_prot, flags, fd, offset)) { return -1; } - goto the_end1; + goto the_end; } if (!mmap_frag(real_start, start, real_start + host_page_size - 1, @@ -690,7 +690,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, passthrough_last = real_last; } } - the_end1: + the_end: if (flags & MAP_ANONYMOUS) { page_flags |= PAGE_ANON; } @@ -708,7 +708,6 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, } } shm_region_rm_complete(start, last); - the_end: trace_target_mmap_complete(start); if (qemu_loglevel_mask(CPU_LOG_PAGE)) { FILE *f = qemu_log_trylock(); -- cgit v1.2.3 From 6ecc25570f2ef691def1c9d1583c4a3586e56f3a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:54 +1100 Subject: linux-user: Split out mmap_end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a subroutine instead of a goto within target_mmap__locked. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-20-richard.henderson@linaro.org> --- linux-user/mmap.c | 71 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 48fcdd4a32..cc983bedbd 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -490,6 +490,43 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) } } +/* + * Record a successful mmap within the user-exec interval tree. + */ +static abi_long mmap_end(abi_ulong start, abi_ulong last, + abi_ulong passthrough_start, + abi_ulong passthrough_last, + int flags, int page_flags) +{ + if (flags & MAP_ANONYMOUS) { + page_flags |= PAGE_ANON; + } + page_flags |= PAGE_RESET; + if (passthrough_start > passthrough_last) { + page_set_flags(start, last, page_flags); + } else { + if (start < passthrough_start) { + page_set_flags(start, passthrough_start - 1, page_flags); + } + page_set_flags(passthrough_start, passthrough_last, + page_flags | PAGE_PASSTHROUGH); + if (passthrough_last < last) { + page_set_flags(passthrough_last + 1, last, page_flags); + } + } + shm_region_rm_complete(start, last); + trace_target_mmap_complete(start); + if (qemu_loglevel_mask(CPU_LOG_PAGE)) { + FILE *f = qemu_log_trylock(); + if (f) { + fprintf(f, "page layout changed following mmap\n"); + page_dump(f); + qemu_log_unlock(f); + } + } + return start; +} + static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, int target_prot, int flags, int page_flags, int fd, off_t offset) @@ -632,7 +669,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, ret = target_mprotect(start, len, target_prot); assert(ret == 0); } - goto the_end; + return mmap_end(start, last, -1, 0, flags, page_flags); } /* handle the start of the mapping */ @@ -643,7 +680,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, target_prot, flags, fd, offset)) { return -1; } - goto the_end; + return mmap_end(start, last, -1, 0, flags, page_flags); } if (!mmap_frag(real_start, start, real_start + host_page_size - 1, @@ -690,34 +727,8 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, passthrough_last = real_last; } } - the_end: - if (flags & MAP_ANONYMOUS) { - page_flags |= PAGE_ANON; - } - page_flags |= PAGE_RESET; - if (passthrough_start > passthrough_last) { - page_set_flags(start, last, page_flags); - } else { - if (start < passthrough_start) { - page_set_flags(start, passthrough_start - 1, page_flags); - } - page_set_flags(passthrough_start, passthrough_last, - page_flags | PAGE_PASSTHROUGH); - if (passthrough_last < last) { - page_set_flags(passthrough_last + 1, last, page_flags); - } - } - shm_region_rm_complete(start, last); - trace_target_mmap_complete(start); - if (qemu_loglevel_mask(CPU_LOG_PAGE)) { - FILE *f = qemu_log_trylock(); - if (f) { - fprintf(f, "page layout changed following mmap\n"); - page_dump(f); - qemu_log_unlock(f); - } - } - return start; + return mmap_end(start, last, passthrough_start, passthrough_last, + flags, page_flags); } /* NOTE: all the constants are the HOST ones */ -- cgit v1.2.3 From ad87d26e6bb13257409f412224c862fc54025e8b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:55 +1100 Subject: linux-user: Do early mmap placement only for reserved_va For reserved_va, place all non-fixed maps then proceed as for MAP_FIXED. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-21-richard.henderson@linaro.org> --- linux-user/mmap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index cc983bedbd..1bbfeb25b1 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -540,17 +540,19 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, host_offset = offset & -host_page_size; /* - * If the user is asking for the kernel to find a location, do that - * before we truncate the length for mapping files below. + * For reserved_va, we are in full control of the allocation. + * Find a suitable hole and convert to MAP_FIXED. */ - if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { + if (reserved_va && !(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { host_len = len + offset - host_offset; - host_len = ROUND_UP(host_len, host_page_size); - start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); + start = mmap_find_vma(real_start, host_len, + MAX(host_page_size, TARGET_PAGE_SIZE)); if (start == (abi_ulong)-1) { errno = ENOMEM; return -1; } + start += offset - host_offset; + flags |= MAP_FIXED; } /* -- cgit v1.2.3 From 2952b642a555207748dd961fcbfdc48f198eebb6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 13 Feb 2024 10:20:27 -1000 Subject: linux-user: Split out do_munmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/mmap.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 1bbfeb25b1..8ebcca4444 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -267,6 +267,21 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) return ret; } +/* + * Perform munmap on behalf of the target, with host parameters. + * If reserved_va, we must replace the memory reservation. + */ +static int do_munmap(void *addr, size_t len) +{ + if (reserved_va) { + void *ptr = mmap(addr, len, PROT_NONE, + MAP_FIXED | MAP_ANONYMOUS + | MAP_PRIVATE | MAP_NORESERVE, -1, 0); + return ptr == addr ? 0 : -1; + } + return munmap(addr, len); +} + /* map an incomplete host page */ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, int prot, int flags, int fd, off_t offset) @@ -854,13 +869,7 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) real_len = real_last - real_start + 1; host_start = g2h_untagged(real_start); - if (reserved_va) { - void *ptr = mmap(host_start, real_len, PROT_NONE, - MAP_FIXED | MAP_ANONYMOUS - | MAP_PRIVATE | MAP_NORESERVE, -1, 0); - return ptr == host_start ? 0 : -1; - } - return munmap(host_start, real_len); + return do_munmap(host_start, real_len); } int target_munmap(abi_ulong start, abi_ulong len) -- cgit v1.2.3 From 3bfa271e46b506b2ceff41e3b258ad2ddeb25bc3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 13 Feb 2024 10:40:57 -1000 Subject: linux-user: Use do_munmap for target_mmap failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the cases for which the host mmap succeeds, but does not yield the desired address, use do_munmap to restore the reserved_va memory reservation. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 8ebcca4444..cbcd31e941 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -326,7 +326,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, flags | MAP_ANONYMOUS, -1, 0); if (p != host_start) { if (p != MAP_FAILED) { - munmap(p, host_page_size); + do_munmap(p, host_page_size); errno = EEXIST; } return false; @@ -622,7 +622,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, p = mmap(g2h_untagged(start), len, host_prot, flags | MAP_FIXED, fd, host_offset); if (p == MAP_FAILED) { - munmap(g2h_untagged(start), host_len); + do_munmap(g2h_untagged(start), host_len); return -1; } host_start += offset - host_offset; @@ -735,7 +735,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, flags, fd, offset1); if (p != want_p) { if (p != MAP_FAILED) { - munmap(p, len1); + do_munmap(p, len1); errno = EEXIST; } return -1; -- cgit v1.2.3 From 68098de90e653adbb8bfc30f85aab9d12f111b80 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:56 +1100 Subject: linux-user: Split out mmap_h_eq_g Move the MAX_FIXED_NOREPLACE check for reserved_va earlier. Move the computation of host_prot earlier. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-22-richard.henderson@linaro.org> --- linux-user/mmap.c | 68 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 13 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index cbcd31e941..d3556bcc14 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -542,6 +542,33 @@ static abi_long mmap_end(abi_ulong start, abi_ulong last, return start; } +/* + * Special case host page size == target page size, + * where there are no edge conditions. + */ +static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len, + int host_prot, int flags, int page_flags, + int fd, off_t offset) +{ + void *p, *want_p = g2h_untagged(start); + abi_ulong last; + + p = mmap(want_p, len, host_prot, flags, fd, offset); + if (p == MAP_FAILED) { + return -1; + } + /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */ + if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) { + do_munmap(p, len); + errno = EEXIST; + return -1; + } + + start = h2g(p); + last = start + len - 1; + return mmap_end(start, last, start, last, flags, page_flags); +} + static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, int target_prot, int flags, int page_flags, int fd, off_t offset) @@ -550,6 +577,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, abi_ulong ret, last, real_start, real_last, retaddr, host_len; abi_ulong passthrough_start = -1, passthrough_last = 0; off_t host_offset; + int host_prot; real_start = start & -host_page_size; host_offset = offset & -host_page_size; @@ -558,16 +586,33 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, * For reserved_va, we are in full control of the allocation. * Find a suitable hole and convert to MAP_FIXED. */ - if (reserved_va && !(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { - host_len = len + offset - host_offset; - start = mmap_find_vma(real_start, host_len, - MAX(host_page_size, TARGET_PAGE_SIZE)); - if (start == (abi_ulong)-1) { - errno = ENOMEM; - return -1; + if (reserved_va) { + if (flags & MAP_FIXED_NOREPLACE) { + /* Validate that the chosen range is empty. */ + if (!page_check_range_empty(start, start + len - 1)) { + errno = EEXIST; + return -1; + } + flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; + } else if (!(flags & MAP_FIXED)) { + size_t real_len = len + offset - host_offset; + abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE); + + start = mmap_find_vma(real_start, real_len, align); + if (start == (abi_ulong)-1) { + errno = ENOMEM; + return -1; + } + start += offset - host_offset; + flags |= MAP_FIXED; } - start += offset - host_offset; - flags |= MAP_FIXED; + } + + host_prot = target_to_host_prot(target_prot); + + if (host_page_size == TARGET_PAGE_SIZE) { + return mmap_h_eq_g(start, len, host_prot, flags, + page_flags, fd, offset); } /* @@ -603,12 +648,10 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { uintptr_t host_start; - int host_prot; void *p; host_len = len + offset - host_offset; host_len = ROUND_UP(host_len, host_page_size); - host_prot = target_to_host_prot(target_prot); /* Note: we prefer to control the mapping address. */ p = mmap(g2h_untagged(start), host_len, host_prot, @@ -731,8 +774,7 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, len1 = real_last - real_start + 1; want_p = g2h_untagged(real_start); - p = mmap(want_p, len1, target_to_host_prot(target_prot), - flags, fd, offset1); + p = mmap(want_p, len1, host_prot, flags, fd, offset1); if (p != want_p) { if (p != MAP_FAILED) { do_munmap(p, len1); -- cgit v1.2.3 From 8080b2f80430e15c28c7e3abd45451fb55a6dd95 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:57 +1100 Subject: linux-user: Split out mmap_h_lt_g Work much harder to get alignment and mapping beyond the end of the file correct. Both of which are excercised by our test-mmap for alpha (8k pages) on any 4k page host. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-23-richard.henderson@linaro.org> --- linux-user/mmap.c | 184 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 153 insertions(+), 31 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index d3556bcc14..ff8f9f7ed0 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -569,6 +569,156 @@ static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len, return mmap_end(start, last, start, last, flags, page_flags); } +/* + * Special case host page size < target page size. + * + * The two special cases are increased guest alignment, and mapping + * past the end of a file. + * + * When mapping files into a memory area larger than the file, + * accesses to pages beyond the file size will cause a SIGBUS. + * + * For example, if mmaping a file of 100 bytes on a host with 4K + * pages emulating a target with 8K pages, the target expects to + * be able to access the first 8K. But the host will trap us on + * any access beyond 4K. + * + * When emulating a target with a larger page-size than the hosts, + * we may need to truncate file maps at EOF and add extra anonymous + * pages up to the targets page boundary. + * + * This workaround only works for files that do not change. + * If the file is later extended (e.g. ftruncate), the SIGBUS + * vanishes and the proper behaviour is that changes within the + * anon page should be reflected in the file. + * + * However, this case is rather common with executable images, + * so the workaround is important for even trivial tests, whereas + * the mmap of of a file being extended is less common. + */ +static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot, + int mmap_flags, int page_flags, int fd, + off_t offset, int host_page_size) +{ + void *p, *want_p = g2h_untagged(start); + off_t fileend_adj = 0; + int flags = mmap_flags; + abi_ulong last, pass_last; + + if (!(flags & MAP_ANONYMOUS)) { + struct stat sb; + + if (fstat(fd, &sb) == -1) { + return -1; + } + if (offset >= sb.st_size) { + /* + * The entire map is beyond the end of the file. + * Transform it to an anonymous mapping. + */ + flags |= MAP_ANONYMOUS; + fd = -1; + offset = 0; + } else if (offset + len > sb.st_size) { + /* + * A portion of the map is beyond the end of the file. + * Truncate the file portion of the allocation. + */ + fileend_adj = offset + len - sb.st_size; + } + } + + if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) { + if (fileend_adj) { + p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0); + } else { + p = mmap(want_p, len, host_prot, flags, fd, offset); + } + if (p != want_p) { + if (p != MAP_FAILED) { + /* Host does not support MAP_FIXED_NOREPLACE: emulate. */ + do_munmap(p, len); + errno = EEXIST; + } + return -1; + } + + if (fileend_adj) { + void *t = mmap(p, len - fileend_adj, host_prot, + (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, + fd, offset); + + if (t == MAP_FAILED) { + int save_errno = errno; + + /* + * We failed a map over the top of the successful anonymous + * mapping above. The only failure mode is running out of VMAs, + * and there's nothing that we can do to detect that earlier. + * If we have replaced an existing mapping with MAP_FIXED, + * then we cannot properly recover. It's a coin toss whether + * it would be better to exit or continue here. + */ + if (!(flags & MAP_FIXED_NOREPLACE) && + !page_check_range_empty(start, start + len - 1)) { + qemu_log("QEMU target_mmap late failure: %s", + strerror(save_errno)); + } + + do_munmap(want_p, len); + errno = save_errno; + return -1; + } + } + } else { + size_t host_len, part_len; + + /* + * Take care to align the host memory. Perform a larger anonymous + * allocation and extract the aligned portion. Remap the file on + * top of that. + */ + host_len = len + TARGET_PAGE_SIZE - host_page_size; + p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { + return -1; + } + + part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1); + if (part_len) { + part_len = TARGET_PAGE_SIZE - part_len; + do_munmap(p, part_len); + p += part_len; + host_len -= part_len; + } + if (len < host_len) { + do_munmap(p + len, host_len - len); + } + + if (!(flags & MAP_ANONYMOUS)) { + void *t = mmap(p, len - fileend_adj, host_prot, + flags | MAP_FIXED, fd, offset); + + if (t == MAP_FAILED) { + int save_errno = errno; + do_munmap(p, len); + errno = save_errno; + return -1; + } + } + + start = h2g(p); + } + + last = start + len - 1; + if (fileend_adj) { + pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1; + } else { + pass_last = last; + } + return mmap_end(start, last, start, pass_last, mmap_flags, page_flags); +} + static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, int target_prot, int flags, int page_flags, int fd, off_t offset) @@ -613,37 +763,9 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, if (host_page_size == TARGET_PAGE_SIZE) { return mmap_h_eq_g(start, len, host_prot, flags, page_flags, fd, offset); - } - - /* - * When mapping files into a memory area larger than the file, accesses - * to pages beyond the file size will cause a SIGBUS. - * - * For example, if mmaping a file of 100 bytes on a host with 4K pages - * emulating a target with 8K pages, the target expects to be able to - * access the first 8K. But the host will trap us on any access beyond - * 4K. - * - * When emulating a target with a larger page-size than the hosts, we - * may need to truncate file maps at EOF and add extra anonymous pages - * up to the targets page boundary. - */ - if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) { - struct stat sb; - - if (fstat(fd, &sb) == -1) { - return -1; - } - - /* Are we trying to create a map beyond EOF?. */ - if (offset + len > sb.st_size) { - /* - * If so, truncate the file map at eof aligned with - * the hosts real pagesize. Additional anonymous maps - * will be created beyond EOF. - */ - len = ROUND_UP(sb.st_size - offset, host_page_size); - } + } else if (host_page_size < TARGET_PAGE_SIZE) { + return mmap_h_lt_g(start, len, host_prot, flags, + page_flags, fd, offset, host_page_size); } if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { -- cgit v1.2.3 From eb5027ac618f3df76c93fe617a969f82dbeab49b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:58 +1100 Subject: linux-user: Split out mmap_h_gt_g Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-24-richard.henderson@linaro.org> --- linux-user/mmap.c | 288 ++++++++++++++++++++++++++---------------------------- 1 file changed, 139 insertions(+), 149 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index ff8f9f7ed0..82f4026283 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -282,7 +282,16 @@ static int do_munmap(void *addr, size_t len) return munmap(addr, len); } -/* map an incomplete host page */ +/* + * Map an incomplete host page. + * + * Here be dragons. This case will not work if there is an existing + * overlapping host page, which is file mapped, and for which the mapping + * is beyond the end of the file. In that case, we will see SIGBUS when + * trying to write a portion of this page. + * + * FIXME: Work around this with a temporary signal handler and longjmp. + */ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, int prot, int flags, int fd, off_t offset) { @@ -719,19 +728,138 @@ static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot, return mmap_end(start, last, start, pass_last, mmap_flags, page_flags); } +/* + * Special case host page size > target page size. + * + * The two special cases are address and file offsets that are valid + * for the guest that cannot be directly represented by the host. + */ +static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len, + int target_prot, int host_prot, + int flags, int page_flags, int fd, + off_t offset, int host_page_size) +{ + void *p, *want_p = g2h_untagged(start); + off_t host_offset = offset & -host_page_size; + abi_ulong last, real_start, real_last; + bool misaligned_offset = false; + size_t host_len; + + if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { + /* + * Adjust the offset to something representable on the host. + */ + host_len = len + offset - host_offset; + p = mmap(want_p, host_len, host_prot, flags, fd, host_offset); + if (p == MAP_FAILED) { + return -1; + } + + /* Update start to the file position at offset. */ + p += offset - host_offset; + + start = h2g(p); + last = start + len - 1; + return mmap_end(start, last, start, last, flags, page_flags); + } + + if (!(flags & MAP_ANONYMOUS)) { + misaligned_offset = (start ^ offset) & (host_page_size - 1); + + /* + * The fallback for misalignment is a private mapping + read. + * This carries none of semantics required of MAP_SHARED. + */ + if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) { + errno = EINVAL; + return -1; + } + } + + last = start + len - 1; + real_start = start & -host_page_size; + real_last = ROUND_UP(last, host_page_size) - 1; + + /* + * Handle the start and end of the mapping. + */ + if (real_start < start) { + abi_ulong real_page_last = real_start + host_page_size - 1; + if (last <= real_page_last) { + /* Entire allocation a subset of one host page. */ + if (!mmap_frag(real_start, start, last, target_prot, + flags, fd, offset)) { + return -1; + } + return mmap_end(start, last, -1, 0, flags, page_flags); + } + + if (!mmap_frag(real_start, start, real_page_last, target_prot, + flags, fd, offset)) { + return -1; + } + real_start = real_page_last + 1; + } + + if (last < real_last) { + abi_ulong real_page_start = real_last - host_page_size + 1; + if (!mmap_frag(real_page_start, real_page_start, last, + target_prot, flags, fd, + offset + real_page_start - start)) { + return -1; + } + real_last = real_page_start - 1; + } + + if (real_start > real_last) { + return mmap_end(start, last, -1, 0, flags, page_flags); + } + + /* + * Handle the middle of the mapping. + */ + + host_len = real_last - real_start + 1; + want_p += real_start - start; + + if (flags & MAP_ANONYMOUS) { + p = mmap(want_p, host_len, host_prot, flags, -1, 0); + } else if (!misaligned_offset) { + p = mmap(want_p, host_len, host_prot, flags, fd, + offset + real_start - start); + } else { + p = mmap(want_p, host_len, host_prot | PROT_WRITE, + flags | MAP_ANONYMOUS, -1, 0); + } + if (p != want_p) { + if (p != MAP_FAILED) { + do_munmap(p, host_len); + errno = EEXIST; + } + return -1; + } + + if (misaligned_offset) { + /* TODO: The read could be short. */ + if (pread(fd, p, host_len, offset + real_start - start) != host_len) { + do_munmap(p, host_len); + return -1; + } + if (!(host_prot & PROT_WRITE)) { + mprotect(p, host_len, host_prot); + } + } + + return mmap_end(start, last, -1, 0, flags, page_flags); +} + static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, int target_prot, int flags, int page_flags, int fd, off_t offset) { int host_page_size = qemu_real_host_page_size(); - abi_ulong ret, last, real_start, real_last, retaddr, host_len; - abi_ulong passthrough_start = -1, passthrough_last = 0; - off_t host_offset; int host_prot; - real_start = start & -host_page_size; - host_offset = offset & -host_page_size; - /* * For reserved_va, we are in full control of the allocation. * Find a suitable hole and convert to MAP_FIXED. @@ -745,6 +873,8 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, } flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; } else if (!(flags & MAP_FIXED)) { + abi_ulong real_start = start & -host_page_size; + off_t host_offset = offset & -host_page_size; size_t real_len = len + offset - host_offset; abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE); @@ -766,150 +896,10 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, } else if (host_page_size < TARGET_PAGE_SIZE) { return mmap_h_lt_g(start, len, host_prot, flags, page_flags, fd, offset, host_page_size); - } - - if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { - uintptr_t host_start; - void *p; - - host_len = len + offset - host_offset; - host_len = ROUND_UP(host_len, host_page_size); - - /* Note: we prefer to control the mapping address. */ - p = mmap(g2h_untagged(start), host_len, host_prot, - flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); - if (p == MAP_FAILED) { - return -1; - } - /* update start so that it points to the file position at 'offset' */ - host_start = (uintptr_t)p; - if (!(flags & MAP_ANONYMOUS)) { - p = mmap(g2h_untagged(start), len, host_prot, - flags | MAP_FIXED, fd, host_offset); - if (p == MAP_FAILED) { - do_munmap(g2h_untagged(start), host_len); - return -1; - } - host_start += offset - host_offset; - } - start = h2g(host_start); - last = start + len - 1; - passthrough_start = start; - passthrough_last = last; } else { - last = start + len - 1; - real_last = ROUND_UP(last, host_page_size) - 1; - - if (flags & MAP_FIXED_NOREPLACE) { - /* Validate that the chosen range is empty. */ - if (!page_check_range_empty(start, last)) { - errno = EEXIST; - return -1; - } - - /* - * With reserved_va, the entire address space is mmaped in the - * host to ensure it isn't accidentally used for something else. - * We have just checked that the guest address is not mapped - * within the guest, but need to replace the host reservation. - * - * Without reserved_va, despite the guest address check above, - * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite - * any host address mappings. - */ - if (reserved_va) { - flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; - } - } - - /* - * worst case: we cannot map the file because the offset is not - * aligned, so we read it - */ - if (!(flags & MAP_ANONYMOUS) && - (offset & (host_page_size - 1)) != (start & (host_page_size - 1))) { - /* - * msync() won't work here, so we return an error if write is - * possible while it is a shared mapping - */ - if ((flags & MAP_TYPE) == MAP_SHARED - && (target_prot & PROT_WRITE)) { - errno = EINVAL; - return -1; - } - retaddr = target_mmap(start, len, target_prot | PROT_WRITE, - (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) - | MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (retaddr == -1) { - return -1; - } - if (pread(fd, g2h_untagged(start), len, offset) == -1) { - return -1; - } - if (!(target_prot & PROT_WRITE)) { - ret = target_mprotect(start, len, target_prot); - assert(ret == 0); - } - return mmap_end(start, last, -1, 0, flags, page_flags); - } - - /* handle the start of the mapping */ - if (start > real_start) { - if (real_last == real_start + host_page_size - 1) { - /* one single host page */ - if (!mmap_frag(real_start, start, last, - target_prot, flags, fd, offset)) { - return -1; - } - return mmap_end(start, last, -1, 0, flags, page_flags); - } - if (!mmap_frag(real_start, start, - real_start + host_page_size - 1, - target_prot, flags, fd, offset)) { - return -1; - } - real_start += host_page_size; - } - /* handle the end of the mapping */ - if (last < real_last) { - abi_ulong real_page = real_last - host_page_size + 1; - if (!mmap_frag(real_page, real_page, last, - target_prot, flags, fd, - offset + real_page - start)) { - return -1; - } - real_last -= host_page_size; - } - - /* map the middle (easier) */ - if (real_start < real_last) { - void *p, *want_p; - off_t offset1; - size_t len1; - - if (flags & MAP_ANONYMOUS) { - offset1 = 0; - } else { - offset1 = offset + real_start - start; - } - len1 = real_last - real_start + 1; - want_p = g2h_untagged(real_start); - - p = mmap(want_p, len1, host_prot, flags, fd, offset1); - if (p != want_p) { - if (p != MAP_FAILED) { - do_munmap(p, len1); - errno = EEXIST; - } - return -1; - } - passthrough_start = real_start; - passthrough_last = real_last; - } + return mmap_h_gt_g(start, len, target_prot, host_prot, flags, + page_flags, fd, offset, host_page_size); } - return mmap_end(start, last, passthrough_start, passthrough_last, - flags, page_flags); } /* NOTE: all the constants are the HOST ones */ -- cgit v1.2.3 From e9206163d9d9cdd962abb00e40b22141a5e29684 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:57:59 +1100 Subject: tests/tcg: Remove run-test-mmap-* These tests are confused, because -p does not change the guest page size, but the host page size. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-25-richard.henderson@linaro.org> --- tests/tcg/alpha/Makefile.target | 3 --- tests/tcg/arm/Makefile.target | 3 --- tests/tcg/hppa/Makefile.target | 3 --- tests/tcg/i386/Makefile.target | 3 --- tests/tcg/m68k/Makefile.target | 3 --- tests/tcg/multiarch/Makefile.target | 9 --------- tests/tcg/ppc/Makefile.target | 12 ------------ tests/tcg/sh4/Makefile.target | 3 --- tests/tcg/sparc64/Makefile.target | 6 ------ 9 files changed, 45 deletions(-) delete mode 100644 tests/tcg/ppc/Makefile.target delete mode 100644 tests/tcg/sparc64/Makefile.target diff --git a/tests/tcg/alpha/Makefile.target b/tests/tcg/alpha/Makefile.target index b94500a7d9..fdd7ddf64e 100644 --- a/tests/tcg/alpha/Makefile.target +++ b/tests/tcg/alpha/Makefile.target @@ -13,6 +13,3 @@ test-cmov: test-cond.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) run-test-cmov: test-cmov - -# On Alpha Linux only supports 8k pages -EXTRA_RUNS+=run-test-mmap-8192 diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target index 3473f4619e..0a1965fce7 100644 --- a/tests/tcg/arm/Makefile.target +++ b/tests/tcg/arm/Makefile.target @@ -79,6 +79,3 @@ sha512-vector: sha512.c ARM_TESTS += sha512-vector TESTS += $(ARM_TESTS) - -# On ARM Linux only supports 4k pages -EXTRA_RUNS+=run-test-mmap-4096 diff --git a/tests/tcg/hppa/Makefile.target b/tests/tcg/hppa/Makefile.target index cdd0d572a7..ea5ae2186d 100644 --- a/tests/tcg/hppa/Makefile.target +++ b/tests/tcg/hppa/Makefile.target @@ -2,9 +2,6 @@ # # HPPA specific tweaks - specifically masking out broken tests -# On parisc Linux supports 4K/16K/64K (but currently only 4k works) -EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-16384 run-test-mmap-65536 - # This triggers failures for hppa-linux about 1% of the time # HPPA is the odd target that can't use the sigtramp page; # it requires the full vdso with dwarf2 unwind info. diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target index 9906f9e116..bbe2c44b2a 100644 --- a/tests/tcg/i386/Makefile.target +++ b/tests/tcg/i386/Makefile.target @@ -71,9 +71,6 @@ endif I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) TESTS=$(MULTIARCH_TESTS) $(I386_TESTS) -# On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack) -EXTRA_RUNS+=run-test-mmap-4096 - sha512-sse: CFLAGS=-msse4.1 -O3 sha512-sse: sha512.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) diff --git a/tests/tcg/m68k/Makefile.target b/tests/tcg/m68k/Makefile.target index 6ff214e60a..33f7b1b127 100644 --- a/tests/tcg/m68k/Makefile.target +++ b/tests/tcg/m68k/Makefile.target @@ -5,6 +5,3 @@ VPATH += $(SRC_PATH)/tests/tcg/m68k TESTS += trap denormal - -# On m68k Linux supports 4k and 8k pages (but 8k is currently broken) -EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192 diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index e10951a801..f11f3b084d 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -51,18 +51,9 @@ run-plugin-vma-pthread-with-%: vma-pthread $(call skip-test, $<, "flaky on CI?") endif -# We define the runner for test-mmap after the individual -# architectures have defined their supported pages sizes. If no -# additional page sizes are defined we only run the default test. - -# default case (host page size) run-test-mmap: test-mmap $(call run-test, test-mmap, $(QEMU) $<, $< (default)) -# additional page sizes (defined by each architecture adding to EXTRA_RUNS) -run-test-mmap-%: test-mmap - $(call run-test, test-mmap-$*, $(QEMU) -p $* $<, $< ($* byte pages)) - ifneq ($(GDB),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py diff --git a/tests/tcg/ppc/Makefile.target b/tests/tcg/ppc/Makefile.target deleted file mode 100644 index f5e08c7376..0000000000 --- a/tests/tcg/ppc/Makefile.target +++ /dev/null @@ -1,12 +0,0 @@ -# -*- Mode: makefile -*- -# -# PPC - included from tests/tcg/Makefile -# - -ifneq (,$(findstring 64,$(TARGET_NAME))) -# On PPC64 Linux can be configured with 4k (default) or 64k pages (currently broken) -EXTRA_RUNS+=run-test-mmap-4096 #run-test-mmap-65536 -else -# On PPC32 Linux supports 4K/16K/64K/256K (but currently only 4k works) -EXTRA_RUNS+=run-test-mmap-4096 #run-test-mmap-16384 run-test-mmap-65536 run-test-mmap-262144 -endif diff --git a/tests/tcg/sh4/Makefile.target b/tests/tcg/sh4/Makefile.target index 47c39a44b6..16eaa850a8 100644 --- a/tests/tcg/sh4/Makefile.target +++ b/tests/tcg/sh4/Makefile.target @@ -3,9 +3,6 @@ # SuperH specific tweaks # -# On sh Linux supports 4k, 8k, 16k and 64k pages (but only 4k currently works) -EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192 run-test-mmap-16384 run-test-mmap-65536 - # This triggers failures for sh4-linux about 10% of the time. # Random SIGSEGV at unpredictable guest address, cause unknown. run-signals: signals diff --git a/tests/tcg/sparc64/Makefile.target b/tests/tcg/sparc64/Makefile.target deleted file mode 100644 index 408dace783..0000000000 --- a/tests/tcg/sparc64/Makefile.target +++ /dev/null @@ -1,6 +0,0 @@ -# -*- Mode: makefile -*- -# -# sparc specific tweaks - -# On Sparc64 Linux support 8k pages -EXTRA_RUNS+=run-test-mmap-8192 -- cgit v1.2.3 From 6ada8619510002e7b0ad5925597232c750282592 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:00 +1100 Subject: tests/tcg: Extend file in linux-madvise.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When guest page size > host page size, this test can fail due to the SIGBUS protection hack. Avoid this by making sure that the file size is at least one guest page. Visible with alpha guest on x86_64 host. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-26-richard.henderson@linaro.org> --- tests/tcg/multiarch/linux/linux-madvise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/tcg/multiarch/linux/linux-madvise.c b/tests/tcg/multiarch/linux/linux-madvise.c index 29d0997e68..539fb3b772 100644 --- a/tests/tcg/multiarch/linux/linux-madvise.c +++ b/tests/tcg/multiarch/linux/linux-madvise.c @@ -42,6 +42,8 @@ static void test_file(void) assert(ret == 0); written = write(fd, &c, sizeof(c)); assert(written == sizeof(c)); + ret = ftruncate(fd, pagesize); + assert(ret == 0); page = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE, fd, 0); assert(page != MAP_FAILED); -- cgit v1.2.3 From 01e449809b0289a5535e935615e5e7f0a695f227 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:01 +1100 Subject: *-user: Deprecate and disable -p pagesize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This option controls the host page size. From the mis-usage in our own testsuite, this is easily confused with guest page size. The only thing that occurs when changing the host page size is that stuff breaks, because one cannot actually change the host page size. Therefore reject all but the no-op setting as part of the deprecation process. Reviewed-by: Warner Losh Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Acked-by: Helge Deller Message-Id: <20240102015808.132373-27-richard.henderson@linaro.org> --- bsd-user/main.c | 11 ++++++----- docs/about/deprecated.rst | 10 ++++++++++ docs/user/main.rst | 3 --- linux-user/main.c | 12 ++++++------ 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/bsd-user/main.c b/bsd-user/main.c index e5efb7b845..6ab3efd6c0 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -364,11 +364,12 @@ int main(int argc, char **argv) } else if (!strcmp(r, "L")) { interp_prefix = argv[optind++]; } else if (!strcmp(r, "p")) { - qemu_host_page_size = atoi(argv[optind++]); - if (qemu_host_page_size == 0 || - (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) { - fprintf(stderr, "page size must be a power of two\n"); - exit(1); + unsigned size, want = qemu_real_host_page_size(); + + r = argv[optind++]; + if (qemu_strtoui(r, NULL, 10, &size) || size != want) { + warn_report("Deprecated page size option cannot " + "change host page size (%u)", want); } } else if (!strcmp(r, "g")) { gdbstub = g_strdup(argv[optind++]); diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 36bd3e15ef..8565644da6 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -63,6 +63,16 @@ as short-form boolean values, and passed to plugins as ``arg_name=on``. However, short-form booleans are deprecated and full explicit ``arg_name=on`` form is preferred. +User-mode emulator command line arguments +----------------------------------------- + +``-p`` (since 9.0) +'''''''''''''''''' + +The ``-p`` option pretends to control the host page size. However, +it is not possible to change the host page size, and using the +option only causes failures. + QEMU Machine Protocol (QMP) commands ------------------------------------ diff --git a/docs/user/main.rst b/docs/user/main.rst index 7e7ad07409..d5fbb78d3c 100644 --- a/docs/user/main.rst +++ b/docs/user/main.rst @@ -87,9 +87,6 @@ Debug options: Activate logging of the specified items (use '-d help' for a list of log items) -``-p pagesize`` - Act as if the host page size was 'pagesize' bytes - ``-g port`` Wait gdb connection to port diff --git a/linux-user/main.c b/linux-user/main.c index e540acb84a..bad03f06d3 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -332,11 +332,11 @@ static void handle_arg_ld_prefix(const char *arg) static void handle_arg_pagesize(const char *arg) { - qemu_host_page_size = atoi(arg); - if (qemu_host_page_size == 0 || - (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) { - fprintf(stderr, "page size must be a power of two\n"); - exit(EXIT_FAILURE); + unsigned size, want = qemu_real_host_page_size(); + + if (qemu_strtoui(arg, NULL, 10, &size) || size != want) { + warn_report("Deprecated page size option cannot " + "change host page size (%u)", want); } } @@ -496,7 +496,7 @@ static const struct qemu_argument arg_table[] = { {"D", "QEMU_LOG_FILENAME", true, handle_arg_log_filename, "logfile", "write logs to 'logfile' (default stderr)"}, {"p", "QEMU_PAGESIZE", true, handle_arg_pagesize, - "pagesize", "set the host page size to 'pagesize'"}, + "pagesize", "deprecated change to host page size"}, {"one-insn-per-tb", "QEMU_ONE_INSN_PER_TB", false, handle_arg_one_insn_per_tb, "", "run with one guest instruction per emulated TB"}, -- cgit v1.2.3 From 8c45039f9ecab8bc5b123e39bb0433763524e39d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:02 +1100 Subject: cpu: Remove page_size_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move qemu_host_page_{size,mask} and HOST_PAGE_ALIGN into bsd-user. It should be removed from bsd-user as well, but defer that cleanup. Reviewed-by: Warner Losh Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Tested-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-28-richard.henderson@linaro.org> --- accel/tcg/translate-all.c | 1 - bsd-user/main.c | 12 ++++++++++++ bsd-user/qemu.h | 7 +++++++ cpu-target.c | 16 ---------------- include/exec/cpu-common.h | 7 ------- include/hw/core/cpu.h | 2 -- system/vl.c | 1 - 7 files changed, 19 insertions(+), 27 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 1c695efe02..c1f57e894a 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -256,7 +256,6 @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) void page_init(void) { - page_size_init(); page_table_config_init(); } diff --git a/bsd-user/main.c b/bsd-user/main.c index 6ab3efd6c0..512d4ab69f 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -49,6 +49,13 @@ #include "host-os.h" #include "target_arch_cpu.h" + +/* + * TODO: Remove these and rely only on qemu_real_host_page_size(). + */ +uintptr_t qemu_host_page_size; +intptr_t qemu_host_page_mask; + static bool opt_one_insn_per_tb; uintptr_t guest_base; bool have_guest_base; @@ -307,6 +314,9 @@ int main(int argc, char **argv) (void) envlist_setenv(envlist, *wrk); } + qemu_host_page_size = getpagesize(); + qemu_host_page_size = MAX(qemu_host_page_size, TARGET_PAGE_SIZE); + cpu_model = NULL; qemu_add_opts(&qemu_trace_opts); @@ -404,6 +414,8 @@ int main(int argc, char **argv) } } + qemu_host_page_mask = -qemu_host_page_size; + /* init debug */ { int mask = 0; diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index dc842fffa7..c05c512767 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -39,6 +39,13 @@ extern char **environ; #include "qemu/clang-tsa.h" #include "qemu-os.h" +/* + * TODO: Remove these and rely only on qemu_real_host_page_size(). + */ +extern uintptr_t qemu_host_page_size; +extern intptr_t qemu_host_page_mask; +#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size) + /* * This struct is used to hold certain information about the image. Basically, * it replicates in user space what would be certain task_struct fields in the diff --git a/cpu-target.c b/cpu-target.c index 86444cc2c6..4c0621bf33 100644 --- a/cpu-target.c +++ b/cpu-target.c @@ -45,9 +45,6 @@ #include "trace/trace-root.h" #include "qemu/accel.h" -uintptr_t qemu_host_page_size; -intptr_t qemu_host_page_mask; - #ifndef CONFIG_USER_ONLY static int cpu_common_post_load(void *opaque, int version_id) { @@ -474,16 +471,3 @@ const char *target_name(void) { return TARGET_NAME; } - -void page_size_init(void) -{ - /* NOTE: we can always suppose that qemu_host_page_size >= - TARGET_PAGE_SIZE */ - if (qemu_host_page_size == 0) { - qemu_host_page_size = qemu_real_host_page_size(); - } - if (qemu_host_page_size < TARGET_PAGE_SIZE) { - qemu_host_page_size = TARGET_PAGE_SIZE; - } - qemu_host_page_mask = -(intptr_t)qemu_host_page_size; -} diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 9ead1be100..6346df17ce 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -20,13 +20,6 @@ void cpu_exec_init_all(void); void cpu_exec_step_atomic(CPUState *cpu); -/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even - * when intptr_t is 32-bit and we are aligning a long long. - */ -extern uintptr_t qemu_host_page_size; -extern intptr_t qemu_host_page_mask; - -#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size) #define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size()) /* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index af1a29526d..d0e345419f 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -1179,8 +1179,6 @@ bool target_words_bigendian(void); const char *target_name(void); -void page_size_init(void); - #ifdef NEED_CPU_H #ifndef CONFIG_USER_ONLY diff --git a/system/vl.c b/system/vl.c index e480afd7a0..48aae6e053 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2118,7 +2118,6 @@ static void qemu_create_machine(QDict *qdict) } cpu_exec_init_all(); - page_size_init(); if (machine_class->hw_version) { qemu_set_hw_version(machine_class->hw_version); -- cgit v1.2.3 From 33402cea1f9bae189f80daa0a90fdebd1669ab62 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:03 +1100 Subject: accel/tcg: Disconnect TargetPageDataNode from page size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynamically size the node for the runtime target page size. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-29-richard.henderson@linaro.org> --- accel/tcg/user-exec.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 69b7429e31..3cac3a78c4 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -864,7 +864,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, typedef struct TargetPageDataNode { struct rcu_head rcu; IntervalTreeNode itree; - char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); + char data[] __attribute__((aligned)); } TargetPageDataNode; static IntervalTreeRoot targetdata_root; @@ -902,7 +902,8 @@ void page_reset_target_data(target_ulong start, target_ulong last) n_last = MIN(last, n->last); p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; - memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); + memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0, + p_len * TARGET_PAGE_DATA_SIZE); } } @@ -910,7 +911,7 @@ void *page_get_target_data(target_ulong address) { IntervalTreeNode *n; TargetPageDataNode *t; - target_ulong page, region; + target_ulong page, region, p_ofs; page = address & TARGET_PAGE_MASK; region = address & TBD_MASK; @@ -926,7 +927,8 @@ void *page_get_target_data(target_ulong address) mmap_lock(); n = interval_tree_iter_first(&targetdata_root, page, page); if (!n) { - t = g_new0(TargetPageDataNode, 1); + t = g_malloc0(sizeof(TargetPageDataNode) + + TPD_PAGES * TARGET_PAGE_DATA_SIZE); n = &t->itree; n->start = region; n->last = region | ~TBD_MASK; @@ -936,7 +938,8 @@ void *page_get_target_data(target_ulong address) } t = container_of(n, TargetPageDataNode, itree); - return t->data[(page - region) >> TARGET_PAGE_BITS]; + p_ofs = (page - region) >> TARGET_PAGE_BITS; + return t->data + p_ofs * TARGET_PAGE_DATA_SIZE; } #else void page_reset_target_data(target_ulong start, target_ulong last) { } -- cgit v1.2.3 From ff8a8bbc2ad198f879fc516abff0e9cf0c30f6cb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:04 +1100 Subject: linux-user: Allow TARGET_PAGE_BITS_VARY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If set, match the host and guest page sizes. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Acked-by: Helge Deller Message-Id: <20240102015808.132373-30-richard.henderson@linaro.org> --- linux-user/main.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index bad03f06d3..12bb839982 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -55,6 +55,7 @@ #include "loader.h" #include "user-mmap.h" #include "tcg/perf.h" +#include "exec/page-vary.h" #ifdef CONFIG_SEMIHOSTING #include "semihosting/semihost.h" @@ -680,6 +681,7 @@ int main(int argc, char **argv, char **envp) int i; int ret; int execfd; + int host_page_size; unsigned long max_reserved_va; bool preserve_argv0; @@ -791,6 +793,16 @@ int main(int argc, char **argv, char **envp) opt_one_insn_per_tb, &error_abort); ac->init_machine(NULL); } + + /* + * Finalize page size before creating CPUs. + * This will do nothing if !TARGET_PAGE_BITS_VARY. + * The most efficient setting is to match the host. + */ + host_page_size = qemu_real_host_page_size(); + set_preferred_target_page_bits(ctz32(host_page_size)); + finalize_target_page_bits(); + cpu = cpu_create(cpu_type); env = cpu_env(cpu); cpu_reset(cpu); @@ -804,8 +816,6 @@ int main(int argc, char **argv, char **envp) */ max_reserved_va = MAX_RESERVED_VA(cpu); if (reserved_va != 0) { - int host_page_size = qemu_real_host_page_size(); - if ((reserved_va + 1) % host_page_size) { char *s = size_to_str(host_page_size); fprintf(stderr, "Reserved virtual address not aligned mod %s\n", s); @@ -904,7 +914,7 @@ int main(int argc, char **argv, char **envp) * If we're in a chroot with no /proc, fall back to 1 page. */ if (mmap_min_addr == 0) { - mmap_min_addr = qemu_real_host_page_size(); + mmap_min_addr = host_page_size; qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx (fallback)\n", mmap_min_addr); -- cgit v1.2.3 From a575230f95cf6b05e5ba97f5f52e33b0878bc0aa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:05 +1100 Subject: target/arm: Enable TARGET_PAGE_BITS_VARY for AArch64 user-only Since aarch64 binaries are generally built for multiple page sizes, it is trivial to allow the page size to vary. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Acked-by: Helge Deller Message-Id: <20240102015808.132373-31-richard.henderson@linaro.org> --- target/arm/cpu-param.h | 6 +++++- target/arm/cpu.c | 51 ++++++++++++++++++++++++++++---------------------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index f9b462a98f..da3243ab21 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -19,9 +19,13 @@ #endif #ifdef CONFIG_USER_ONLY -#define TARGET_PAGE_BITS 12 # ifdef TARGET_AARCH64 # define TARGET_TAGGED_ADDRESSES +/* Allow user-only to vary page size from 4k */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +# else +# define TARGET_PAGE_BITS 12 # endif #else /* diff --git a/target/arm/cpu.c b/target/arm/cpu.c index b2ea5d6513..f3ed79cef2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1809,7 +1809,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) ARMCPU *cpu = ARM_CPU(dev); ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); CPUARMState *env = &cpu->env; - int pagebits; Error *local_err = NULL; #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) @@ -2100,28 +2099,36 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) !cpu_isar_feature(aa32_vfp_simd, cpu) || !arm_feature(env, ARM_FEATURE_XSCALE)); - if (arm_feature(env, ARM_FEATURE_V7) && - !arm_feature(env, ARM_FEATURE_M) && - !arm_feature(env, ARM_FEATURE_PMSA)) { - /* v7VMSA drops support for the old ARMv5 tiny pages, so we - * can use 4K pages. - */ - pagebits = 12; - } else { - /* For CPUs which might have tiny 1K pages, or which have an - * MPU and might have small region sizes, stick with 1K pages. - */ - pagebits = 10; - } - if (!set_preferred_target_page_bits(pagebits)) { - /* This can only ever happen for hotplugging a CPU, or if - * the board code incorrectly creates a CPU which it has - * promised via minimum_page_size that it will not. - */ - error_setg(errp, "This CPU requires a smaller page size than the " - "system is using"); - return; +#ifndef CONFIG_USER_ONLY + { + int pagebits; + if (arm_feature(env, ARM_FEATURE_V7) && + !arm_feature(env, ARM_FEATURE_M) && + !arm_feature(env, ARM_FEATURE_PMSA)) { + /* + * v7VMSA drops support for the old ARMv5 tiny pages, + * so we can use 4K pages. + */ + pagebits = 12; + } else { + /* + * For CPUs which might have tiny 1K pages, or which have an + * MPU and might have small region sizes, stick with 1K pages. + */ + pagebits = 10; + } + if (!set_preferred_target_page_bits(pagebits)) { + /* + * This can only ever happen for hotplugging a CPU, or if + * the board code incorrectly creates a CPU which it has + * promised via minimum_page_size that it will not. + */ + error_setg(errp, "This CPU requires a smaller page size " + "than the system is using"); + return; + } } +#endif /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it. * We don't support setting cluster ID ([16..23]) (known as Aff2 -- cgit v1.2.3 From 78b79b2cb349114f1b3584b67f94433dd08ba05e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:06 +1100 Subject: linux-user: Bound mmap_min_addr by host page size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bizzarely, it is possible to set /proc/sys/vm/mmap_min_addr to a value below the host page size. Fix that. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-32-richard.henderson@linaro.org> --- linux-user/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/main.c b/linux-user/main.c index 12bb839982..551acf1661 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -901,7 +901,7 @@ int main(int argc, char **argv, char **envp) if ((fp = fopen("/proc/sys/vm/mmap_min_addr", "r")) != NULL) { unsigned long tmp; if (fscanf(fp, "%lu", &tmp) == 1 && tmp != 0) { - mmap_min_addr = tmp; + mmap_min_addr = MAX(tmp, host_page_size); qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx\n", mmap_min_addr); } -- cgit v1.2.3 From 835e5fe9e2e1ccd259474a9eb5f5063497e4e4ab Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:07 +1100 Subject: target/ppc: Enable TARGET_PAGE_BITS_VARY for user-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since ppc binaries are generally built for multiple page sizes, it is trivial to allow the page size to vary. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-33-richard.henderson@linaro.org> --- target/ppc/cpu-param.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/ppc/cpu-param.h b/target/ppc/cpu-param.h index 0a0416e0a8..b7ad52de03 100644 --- a/target/ppc/cpu-param.h +++ b/target/ppc/cpu-param.h @@ -31,6 +31,13 @@ # define TARGET_PHYS_ADDR_SPACE_BITS 36 # define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif -#define TARGET_PAGE_BITS 12 + +#ifdef CONFIG_USER_ONLY +/* Allow user-only to vary page size from 4k */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +#else +# define TARGET_PAGE_BITS 12 +#endif #endif -- cgit v1.2.3 From f2ffdfab7e5ae5217567508bb74c329e3b19dd44 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Jan 2024 12:58:08 +1100 Subject: target/alpha: Enable TARGET_PAGE_BITS_VARY for user-only Since alpha binaries are generally built for multiple page sizes, it is trivial to allow the page size to vary. Signed-off-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Ilya Leoshkevich Acked-by: Helge Deller Message-Id: <20240102015808.132373-34-richard.henderson@linaro.org> --- target/alpha/cpu-param.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/target/alpha/cpu-param.h b/target/alpha/cpu-param.h index 68c46f7998..c969cb016b 100644 --- a/target/alpha/cpu-param.h +++ b/target/alpha/cpu-param.h @@ -9,10 +9,22 @@ #define ALPHA_CPU_PARAM_H #define TARGET_LONG_BITS 64 -#define TARGET_PAGE_BITS 13 /* ??? EV4 has 34 phys addr bits, EV5 has 40, EV6 has 44. */ #define TARGET_PHYS_ADDR_SPACE_BITS 44 -#define TARGET_VIRT_ADDR_SPACE_BITS (30 + TARGET_PAGE_BITS) + +#ifdef CONFIG_USER_ONLY +/* + * Allow user-only to vary page size. Real hardware allows only 8k and 64k, + * but since any variance means guests cannot assume a fixed value, allow + * a 4k minimum to match x86 host, which can minimize emulation issues. + */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +# define TARGET_VIRT_ADDR_SPACE_BITS 63 +#else +# define TARGET_PAGE_BITS 13 +# define TARGET_VIRT_ADDR_SPACE_BITS (30 + TARGET_PAGE_BITS) +#endif #endif -- cgit v1.2.3 From b816e1b5ba58a986b10cd830d6617f351979ab91 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 14 Feb 2024 04:18:01 +0000 Subject: linux-user: Remove pgb_dynamic alignment assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assertion was never correct, because the alignment is a composite of the image alignment and SHMLBA. Even if the image alignment didn't match the image address, an assertion would not be correct -- more appropriate would be an error message about an ill formed image. But the image cannot be held to SHMLBA under any circumstances. Fixes: ee94743034b ("linux-user: completely re-write init_guest_space") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2157 Signed-off-by: Richard Henderson Reported-by: Alexey Sheplyakov Reviewed-by: Philippe Mathieu-Daudé --- linux-user/elfload.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index cc2013c7b4..0c299a7c15 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3022,8 +3022,6 @@ static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr, uintptr_t brk, ret; PGBAddrs ga; - assert(QEMU_IS_ALIGNED(guest_loaddr, align)); - /* Try the identity map first. */ if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) { brk = (uintptr_t)sbrk(0); -- cgit v1.2.3 From ff202817dc2b0b3b42992fa7f1ce503f081068fe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Feb 2024 12:06:41 +0100 Subject: tcg/optimize: fix uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variables uext_opc and sext_opc are used without initialization if TCG_TARGET_extract_i{32,64}_valid returns false. The result, depending on the compiler, might be the generation of extract and sextract opcodes with invalid offset and count, or just random data in the TCG opcode stream. Fixes: ceb9ee06b71 ("tcg/optimize: Handle TCG_COND_TST{EQ,NE}", 2024-02-03) Cc: Richard Henderson Signed-off-by: Paolo Bonzini Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20240228110641.287205-1-pbonzini@redhat.com> Signed-off-by: Richard Henderson --- tcg/optimize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 79e701652b..752cc5c56b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2102,7 +2102,8 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc, uext_opc, sext_opc; + TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; + TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; -- cgit v1.2.3 From 4ef1f559f270c66b3ffc23f6c845ff3d008c6356 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 24 Feb 2024 02:29:41 +0000 Subject: linux-user/x86_64: Handle the vsyscall page in open_self_maps_{2,4} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the only case in which we expect to have no host memory backing for a guest memory page, because in general linux user processes cannot map any pages in the top half of the 64-bit address space. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2170 Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/syscall.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e384e14248..bc8c06522f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7994,6 +7994,10 @@ static void open_self_maps_4(const struct open_self_maps_data *d, path = "[heap]"; } else if (start == info->vdso) { path = "[vdso]"; +#ifdef TARGET_X86_64 + } else if (start == TARGET_VSYSCALL_PAGE) { + path = "[vsyscall]"; +#endif } /* Except null device (MAP_ANON), adjust offset for this fragment. */ @@ -8082,6 +8086,18 @@ static int open_self_maps_2(void *opaque, target_ulong guest_start, uintptr_t host_start = (uintptr_t)g2h_untagged(guest_start); uintptr_t host_last = (uintptr_t)g2h_untagged(guest_end - 1); +#ifdef TARGET_X86_64 + /* + * Because of the extremely high position of the page within the guest + * virtual address space, this is not backed by host memory at all. + * Therefore the loop below would fail. This is the only instance + * of not having host backing memory. + */ + if (guest_start == TARGET_VSYSCALL_PAGE) { + return open_self_maps_3(opaque, guest_start, guest_end, flags); + } +#endif + while (1) { IntervalTreeNode *n = interval_tree_iter_first(d->host_maps, host_start, host_start); -- cgit v1.2.3 From 01a721167a117403c83b947966be1a91ef4dcfba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 22 Feb 2024 11:32:21 -1000 Subject: linux-user/loongarch64: Remove TARGET_FORCE_SHMLBA The kernel abi was changed with commit d23b77953f5a4fbf94c05157b186aac2a247ae32 Author: Huacai Chen Date: Wed Jan 17 12:43:08 2024 +0800 LoongArch: Change SHMLBA from SZ_64K to PAGE_SIZE during the v6.8 cycle. Reviewed-by: Song Gao Signed-off-by: Richard Henderson --- linux-user/loongarch64/target_syscall.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/linux-user/loongarch64/target_syscall.h b/linux-user/loongarch64/target_syscall.h index 8b5de52124..39f229bb9c 100644 --- a/linux-user/loongarch64/target_syscall.h +++ b/linux-user/loongarch64/target_syscall.h @@ -38,11 +38,4 @@ struct target_pt_regs { #define TARGET_MCL_FUTURE 2 #define TARGET_MCL_ONFAULT 4 -#define TARGET_FORCE_SHMLBA - -static inline abi_ulong target_shmlba(CPULoongArchState *env) -{ - return 64 * KiB; -} - #endif -- cgit v1.2.3 From 9f7c97324c273015812fe93b85d000335d6fa7c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 22 Feb 2024 15:46:43 -1000 Subject: linux-user: Add strace for shmat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/strace.c | 24 ++++++++++++++++++++++++ linux-user/strace.list | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index cf26e55264..8d13e55a5b 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -670,6 +670,26 @@ print_semctl(CPUArchState *cpu_env, const struct syscallname *name, } #endif +static void +print_shmat(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + static const struct flags shmat_flags[] = { + FLAG_GENERIC(SHM_RND), + FLAG_GENERIC(SHM_REMAP), + FLAG_GENERIC(SHM_RDONLY), + FLAG_GENERIC(SHM_EXEC), + FLAG_END + }; + + print_syscall_prologue(name); + print_raw_param(TARGET_ABI_FMT_ld, arg0, 0); + print_pointer(arg1, 0); + print_flags(shmat_flags, arg2, 1); + print_syscall_epilogue(name); +} + #ifdef TARGET_NR_ipc static void print_ipc(CPUArchState *cpu_env, const struct syscallname *name, @@ -683,6 +703,10 @@ print_ipc(CPUArchState *cpu_env, const struct syscallname *name, print_ipc_cmd(arg3); qemu_log(",0x" TARGET_ABI_FMT_lx ")", arg4); break; + case IPCOP_shmat: + print_shmat(cpu_env, &(const struct syscallname){ .name = "shmat" }, + arg1, arg4, arg2, 0, 0, 0); + break; default: qemu_log(("%s(" TARGET_ABI_FMT_ld "," diff --git a/linux-user/strace.list b/linux-user/strace.list index 6655d4f26d..dfd4237d14 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -1398,7 +1398,7 @@ { TARGET_NR_sgetmask, "sgetmask" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_shmat -{ TARGET_NR_shmat, "shmat" , NULL, NULL, print_syscall_ret_addr }, +{ TARGET_NR_shmat, "shmat" , NULL, print_shmat, print_syscall_ret_addr }, #endif #ifdef TARGET_NR_shmctl { TARGET_NR_shmctl, "shmctl" , NULL, NULL, NULL }, -- cgit v1.2.3 From 78bc8ed9a8f092f6666d7a949ad67dac33cc014d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 22 Feb 2024 13:24:38 -1000 Subject: linux-user: Rewrite target_shmat Handle combined host and guest alignment requirements. Handle host and guest page size differences. Handle SHM_EXEC. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/115 Tested-by: Richard Purdie Signed-off-by: Richard Henderson --- linux-user/mmap.c | 166 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 133 insertions(+), 33 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 82f4026283..4505fd7376 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -1236,69 +1236,161 @@ static inline abi_ulong target_shmlba(CPUArchState *cpu_env) } #endif +#if defined(__arm__) || defined(__mips__) || defined(__sparc__) +#define HOST_FORCE_SHMLBA 1 +#else +#define HOST_FORCE_SHMLBA 0 +#endif + abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, abi_ulong shmaddr, int shmflg) { CPUState *cpu = env_cpu(cpu_env); - abi_ulong raddr; struct shmid_ds shm_info; int ret; - abi_ulong shmlba; + int h_pagesize; + int t_shmlba, h_shmlba, m_shmlba; + size_t t_len, h_len, m_len; /* shmat pointers are always untagged */ - /* find out the length of the shared memory segment */ + /* + * Because we can't use host shmat() unless the address is sufficiently + * aligned for the host, we'll need to check both. + * TODO: Could be fixed with softmmu. + */ + t_shmlba = target_shmlba(cpu_env); + h_pagesize = qemu_real_host_page_size(); + h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize); + m_shmlba = MAX(t_shmlba, h_shmlba); + + if (shmaddr) { + if (shmaddr & (m_shmlba - 1)) { + if (shmflg & SHM_RND) { + /* + * The guest is allowing the kernel to round the address. + * Assume that the guest is ok with us rounding to the + * host required alignment too. Anyway if we don't, we'll + * get an error from the kernel. + */ + shmaddr &= ~(m_shmlba - 1); + if (shmaddr == 0 && (shmflg & SHM_REMAP)) { + return -TARGET_EINVAL; + } + } else { + int require = TARGET_PAGE_SIZE; +#ifdef TARGET_FORCE_SHMLBA + require = t_shmlba; +#endif + /* + * Include host required alignment, as otherwise we cannot + * use host shmat at all. + */ + require = MAX(require, h_shmlba); + if (shmaddr & (require - 1)) { + return -TARGET_EINVAL; + } + } + } + } else { + if (shmflg & SHM_REMAP) { + return -TARGET_EINVAL; + } + } + /* All rounding now manually concluded. */ + shmflg &= ~SHM_RND; + + /* Find out the length of the shared memory segment. */ ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); if (is_error(ret)) { /* can't get length, bail out */ return ret; } + t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz); + h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize); + m_len = MAX(t_len, h_len); - shmlba = target_shmlba(cpu_env); - - if (shmaddr & (shmlba - 1)) { - if (shmflg & SHM_RND) { - shmaddr &= ~(shmlba - 1); - } else { - return -TARGET_EINVAL; - } - } - if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { + if (!guest_range_valid_untagged(shmaddr, m_len)) { return -TARGET_EINVAL; } WITH_MMAP_LOCK_GUARD() { - void *host_raddr; + bool mapped = false; + void *want, *test; abi_ulong last; - if (shmaddr) { - host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); + if (!shmaddr) { + shmaddr = mmap_find_vma(0, m_len, m_shmlba); + if (shmaddr == -1) { + return -TARGET_ENOMEM; + } + mapped = !reserved_va; + } else if (shmflg & SHM_REMAP) { + /* + * If host page size > target page size, the host shmat may map + * more memory than the guest expects. Reject a mapping that + * would replace memory in the unexpected gap. + * TODO: Could be fixed with softmmu. + */ + if (t_len < h_len && + !page_check_range_empty(shmaddr + t_len, + shmaddr + h_len - 1)) { + return -TARGET_EINVAL; + } } else { - abi_ulong mmap_start; + if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) { + return -TARGET_EINVAL; + } + } - /* In order to use the host shmat, we need to honor host SHMLBA. */ - mmap_start = mmap_find_vma(0, shm_info.shm_segsz, - MAX(SHMLBA, shmlba)); + /* All placement is now complete. */ + want = (void *)g2h_untagged(shmaddr); - if (mmap_start == -1) { - return -TARGET_ENOMEM; + /* + * Map anonymous pages across the entire range, then remap with + * the shared memory. This is required for a number of corner + * cases for which host and guest page sizes differ. + */ + if (h_len != t_len) { + int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE); + int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS + | (reserved_va || (shmflg & SHM_REMAP) + ? MAP_FIXED : MAP_FIXED_NOREPLACE); + + test = mmap(want, m_len, mmap_p, mmap_f, -1, 0); + if (unlikely(test != want)) { + /* shmat returns EINVAL not EEXIST like mmap. */ + ret = (test == MAP_FAILED && errno != EEXIST + ? get_errno(-1) : -TARGET_EINVAL); + if (mapped) { + do_munmap(want, m_len); + } + return ret; } - host_raddr = shmat(shmid, g2h_untagged(mmap_start), - shmflg | SHM_REMAP); + mapped = true; } - if (host_raddr == (void *)-1) { - return get_errno(-1); + if (reserved_va || mapped) { + shmflg |= SHM_REMAP; + } + test = shmat(shmid, want, shmflg); + if (test == MAP_FAILED) { + ret = get_errno(-1); + if (mapped) { + do_munmap(want, m_len); + } + return ret; } - raddr = h2g(host_raddr); - last = raddr + shm_info.shm_segsz - 1; + assert(test == want); - page_set_flags(raddr, last, + last = shmaddr + m_len - 1; + page_set_flags(shmaddr, last, PAGE_VALID | PAGE_RESET | PAGE_READ | - (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE)); + (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) | + (shmflg & SHM_EXEC ? PAGE_EXEC : 0)); - shm_region_rm_complete(raddr, last); - shm_region_add(raddr, last); + shm_region_rm_complete(shmaddr, last); + shm_region_add(shmaddr, last); } /* @@ -1312,7 +1404,15 @@ abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, tb_flush(cpu); } - return raddr; + if (qemu_loglevel_mask(CPU_LOG_PAGE)) { + FILE *f = qemu_log_trylock(); + if (f) { + fprintf(f, "page layout changed following shmat\n"); + page_dump(f); + qemu_log_unlock(f); + } + } + return shmaddr; } abi_long target_shmdt(abi_ulong shmaddr) -- cgit v1.2.3 From 04dadd22aed00e5a2955ab078d7edd676812cf41 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 23 Feb 2024 12:31:40 +0100 Subject: tests/tcg: Check that shmat() does not break /proc/self/maps Add a regression test for a recently fixed issue, where shmat() desynced the guest and the host view of the address space and caused open("/proc/self/maps") to SEGV. Signed-off-by: Ilya Leoshkevich Message-Id: Signed-off-by: Richard Henderson --- tests/tcg/multiarch/linux/linux-shmat-maps.c | 55 ++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/tcg/multiarch/linux/linux-shmat-maps.c diff --git a/tests/tcg/multiarch/linux/linux-shmat-maps.c b/tests/tcg/multiarch/linux/linux-shmat-maps.c new file mode 100644 index 0000000000..0ccf7a973a --- /dev/null +++ b/tests/tcg/multiarch/linux/linux-shmat-maps.c @@ -0,0 +1,55 @@ +/* + * Test that shmat() does not break /proc/self/maps. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include +#include +#include +#include +#include +#include + +int main(void) +{ + char buf[128]; + int err, fd; + int shmid; + ssize_t n; + void *p; + + shmid = shmget(IPC_PRIVATE, 1, IPC_CREAT | 0600); + assert(shmid != -1); + + /* + * The original bug required a non-NULL address, which skipped the + * mmap_find_vma step, which could result in a host mapping smaller + * than the target mapping. Choose an address at random. + */ + p = shmat(shmid, (void *)0x800000, SHM_RND); + if (p == (void *)-1) { + /* + * Because we are now running the testcase for all guests for which + * we have a cross-compiler, the above random address might conflict + * with the guest executable in some way. Rather than stopping, + * continue with a system supplied address, which should never fail. + */ + p = shmat(shmid, NULL, 0); + assert(p != (void *)-1); + } + + fd = open("/proc/self/maps", O_RDONLY); + assert(fd != -1); + do { + n = read(fd, buf, sizeof(buf)); + assert(n >= 0); + } while (n != 0); + close(fd); + + err = shmdt(p); + assert(err == 0); + err = shmctl(shmid, IPC_RMID, NULL); + assert(err == 0); + + return EXIT_SUCCESS; +} -- cgit v1.2.3