diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2024-03-05 09:45:22 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2024-03-05 09:45:22 +0000 |
commit | 4eac9dfbd72d346505642fb45ac3141c7eb2c516 (patch) | |
tree | c69ae2cc7e1afa1f6e80eef5fee6b3da73cca10e | |
parent | 52e7db443bd8d233acc3977bd150bdadb62db86c (diff) | |
parent | 04dadd22aed00e5a2955ab078d7edd676812cf41 (diff) |
Merge tag 'pull-tcg-20240301' of https://gitlab.com/rth7680/qemu into staging
linux-user: Rewrite elf coredump
tcg/aarch64: Apple does not align __int128_t in even registers
accel/tcg: Fixes for page tables in mmio memory
linux-user: Remove qemu_host_page_{size,mask}, HOST_PAGE_ALIGN
migration: Remove qemu_host_page_size
hw/tpm: Remove qemu_host_page_size
softmmu: Remove qemu_host_page_{size,mask}, HOST_PAGE_ALIGN
linux-user: Split and reorganize target_mmap.
*-user: Deprecate and disable -p pagesize
linux-user: Allow TARGET_PAGE_BITS_VARY
target/alpha: Enable TARGET_PAGE_BITS_VARY for user-only
target/arm: Enable TARGET_PAGE_BITS_VARY for AArch64 user-only
target/ppc: Enable TARGET_PAGE_BITS_VARY for user-only
linux-user: Remove pgb_dynamic alignment assertion
tcg/optimize: fix uninitialized variable
linux-user: Rewrite shmat
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmXiXxQdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/H3QgApu4OgadviJuOBenT
# yaGiq+iG4wTL5vVZFK8CgMtq59dJbgJSCooh7U8dn5hIhVuvOU7odUm6embt+4WZ
# 0fDZIjrRvdDMM3LdLFhfdZszMNg6w2ceN9dn5iLkW3wxjRBpTzZNbxhh2Sg308+Q
# oNd+MlYLijDvQP97+tlQ/PBtndLfV5FkpU74ZinWRgcpcT6oH9sP6TRlAVttefy7
# 3GsIXhDKGoDa/0Jpy86qE//3FUaVRqqcNlAIPXMf47ABQ2y2lZlwsfyty7s55sVW
# KgdXdH1GiCgxIonVg4bYvovnwKVH5xHlpsJY48jQtBXR/4exPBFBpeTc422E0Sed
# swpayg==
# =W3pb
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 01 Mar 2024 23:04:52 GMT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-tcg-20240301' of https://gitlab.com/rth7680/qemu: (60 commits)
tests/tcg: Check that shmat() does not break /proc/self/maps
linux-user: Rewrite target_shmat
linux-user: Add strace for shmat
linux-user/loongarch64: Remove TARGET_FORCE_SHMLBA
linux-user/x86_64: Handle the vsyscall page in open_self_maps_{2,4}
tcg/optimize: fix uninitialized variable
linux-user: Remove pgb_dynamic alignment assertion
target/alpha: Enable TARGET_PAGE_BITS_VARY for user-only
target/ppc: Enable TARGET_PAGE_BITS_VARY for user-only
linux-user: Bound mmap_min_addr by host page size
target/arm: Enable TARGET_PAGE_BITS_VARY for AArch64 user-only
linux-user: Allow TARGET_PAGE_BITS_VARY
accel/tcg: Disconnect TargetPageDataNode from page size
cpu: Remove page_size_init
*-user: Deprecate and disable -p pagesize
tests/tcg: Extend file in linux-madvise.c
tests/tcg: Remove run-test-mmap-*
linux-user: Split out mmap_h_gt_g
linux-user: Split out mmap_h_lt_g
linux-user: Split out mmap_h_eq_g
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
39 files changed, 1126 insertions, 1006 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 977576ca14..52239a441f 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -396,6 +396,14 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) uint64_t cs_base; uint32_t flags, cflags; + /* + * By definition we've just finished a TB, so I/O is OK. + * Avoid the possibility of calling cpu_io_recompile() if + * a page table walk triggered by tb_lookup() calling + * probe_access_internal() happens to touch an MMIO device. + * The next TB, if we chain to it, will clear the flag again. + */ + cpu->neg.can_do_io = true; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); cflags = curr_cflags(cpu); diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 047cd2cc0a..6243bcb179 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2022,7 +2022,6 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, MemoryRegion *mr; hwaddr mr_offset; MemTxAttrs attrs; - uint64_t ret; tcg_debug_assert(size > 0 && size <= 8); @@ -2030,12 +2029,9 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); - ret = int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx, - type, ra, mr, mr_offset); - bql_unlock(); - - return ret; + BQL_LOCK_GUARD(); + return int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx, + type, ra, mr, mr_offset); } static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, @@ -2054,13 +2050,11 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); + BQL_LOCK_GUARD(); a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset); b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset + size - 8); - bql_unlock(); - return int128_make128(b, a); } @@ -2569,7 +2563,6 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, hwaddr mr_offset; MemoryRegion *mr; MemTxAttrs attrs; - uint64_t ret; tcg_debug_assert(size > 0 && size <= 8); @@ -2577,12 +2570,9 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); - ret = int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx, - ra, mr, mr_offset); - bql_unlock(); - - return ret; + BQL_LOCK_GUARD(); + return int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx, + ra, mr, mr_offset); } static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, @@ -2593,7 +2583,6 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, MemoryRegion *mr; hwaddr mr_offset; MemTxAttrs attrs; - uint64_t ret; tcg_debug_assert(size > 8 && size <= 16); @@ -2601,14 +2590,11 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - bql_lock(); + BQL_LOCK_GUARD(); int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8, mmu_idx, ra, mr, mr_offset); - ret = int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8, - size - 8, mmu_idx, ra, mr, mr_offset + 8); - bql_unlock(); - - return ret; + return int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8, + size - 8, mmu_idx, ra, mr, mr_offset + 8); } /* diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 1c695efe02..c1f57e894a 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -256,7 +256,6 @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) void page_init(void) { - page_size_init(); page_table_config_init(); } diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index 68b252cb8e..3cac3a78c4 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -651,16 +651,17 @@ void page_protect(tb_page_addr_t address) { PageFlagsNode *p; target_ulong start, last; + int host_page_size = qemu_real_host_page_size(); int prot; assert_memory_lock(); - if (qemu_host_page_size <= TARGET_PAGE_SIZE) { + if (host_page_size <= TARGET_PAGE_SIZE) { start = address & TARGET_PAGE_MASK; last = start + TARGET_PAGE_SIZE - 1; } else { - start = address & qemu_host_page_mask; - last = start + qemu_host_page_size - 1; + start = address & -host_page_size; + last = start + host_page_size - 1; } p = pageflags_find(start, last); @@ -671,7 +672,7 @@ void page_protect(tb_page_addr_t address) if (unlikely(p->itree.last < last)) { /* More than one protection region covers the one host page. */ - assert(TARGET_PAGE_SIZE < qemu_host_page_size); + assert(TARGET_PAGE_SIZE < host_page_size); while ((p = pageflags_next(p, start, last)) != NULL) { prot |= p->flags; } @@ -679,7 +680,7 @@ void page_protect(tb_page_addr_t address) if (prot & PAGE_WRITE) { pageflags_set_clear(start, last, 0, PAGE_WRITE); - mprotect(g2h_untagged(start), qemu_host_page_size, + mprotect(g2h_untagged(start), last - start + 1, prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE); } } @@ -725,18 +726,19 @@ int page_unprotect(target_ulong address, uintptr_t pc) } #endif } else { + int host_page_size = qemu_real_host_page_size(); target_ulong start, len, i; int prot; - if (qemu_host_page_size <= TARGET_PAGE_SIZE) { + if (host_page_size <= TARGET_PAGE_SIZE) { start = address & TARGET_PAGE_MASK; len = TARGET_PAGE_SIZE; prot = p->flags | PAGE_WRITE; pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0); current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc); } else { - start = address & qemu_host_page_mask; - len = qemu_host_page_size; + start = address & -host_page_size; + len = host_page_size; prot = 0; for (i = 0; i < len; i += TARGET_PAGE_SIZE) { @@ -862,7 +864,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, typedef struct TargetPageDataNode { struct rcu_head rcu; IntervalTreeNode itree; - char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); + char data[] __attribute__((aligned)); } TargetPageDataNode; static IntervalTreeRoot targetdata_root; @@ -900,7 +902,8 @@ void page_reset_target_data(target_ulong start, target_ulong last) n_last = MIN(last, n->last); p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; - memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); + memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0, + p_len * TARGET_PAGE_DATA_SIZE); } } @@ -908,7 +911,7 @@ void *page_get_target_data(target_ulong address) { IntervalTreeNode *n; TargetPageDataNode *t; - target_ulong page, region; + target_ulong page, region, p_ofs; page = address & TARGET_PAGE_MASK; region = address & TBD_MASK; @@ -924,7 +927,8 @@ void *page_get_target_data(target_ulong address) mmap_lock(); n = interval_tree_iter_first(&targetdata_root, page, page); if (!n) { - t = g_new0(TargetPageDataNode, 1); + t = g_malloc0(sizeof(TargetPageDataNode) + + TPD_PAGES * TARGET_PAGE_DATA_SIZE); n = &t->itree; n->start = region; n->last = region | ~TBD_MASK; @@ -934,7 +938,8 @@ void *page_get_target_data(target_ulong address) } t = container_of(n, TargetPageDataNode, itree); - return t->data[(page - region) >> TARGET_PAGE_BITS]; + p_ofs = (page - region) >> TARGET_PAGE_BITS; + return t->data + p_ofs * TARGET_PAGE_DATA_SIZE; } #else void page_reset_target_data(target_ulong start, target_ulong last) { } diff --git a/bsd-user/main.c b/bsd-user/main.c index e5efb7b845..512d4ab69f 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -49,6 +49,13 @@ #include "host-os.h" #include "target_arch_cpu.h" + +/* + * TODO: Remove these and rely only on qemu_real_host_page_size(). + */ +uintptr_t qemu_host_page_size; +intptr_t qemu_host_page_mask; + static bool opt_one_insn_per_tb; uintptr_t guest_base; bool have_guest_base; @@ -307,6 +314,9 @@ int main(int argc, char **argv) (void) envlist_setenv(envlist, *wrk); } + qemu_host_page_size = getpagesize(); + qemu_host_page_size = MAX(qemu_host_page_size, TARGET_PAGE_SIZE); + cpu_model = NULL; qemu_add_opts(&qemu_trace_opts); @@ -364,11 +374,12 @@ int main(int argc, char **argv) } else if (!strcmp(r, "L")) { interp_prefix = argv[optind++]; } else if (!strcmp(r, "p")) { - qemu_host_page_size = atoi(argv[optind++]); - if (qemu_host_page_size == 0 || - (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) { - fprintf(stderr, "page size must be a power of two\n"); - exit(1); + unsigned size, want = qemu_real_host_page_size(); + + r = argv[optind++]; + if (qemu_strtoui(r, NULL, 10, &size) || size != want) { + warn_report("Deprecated page size option cannot " + "change host page size (%u)", want); } } else if (!strcmp(r, "g")) { gdbstub = g_strdup(argv[optind++]); @@ -403,6 +414,8 @@ int main(int argc, char **argv) } } + qemu_host_page_mask = -qemu_host_page_size; + /* init debug */ { int mask = 0; diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index dc842fffa7..c05c512767 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -40,6 +40,13 @@ extern char **environ; #include "qemu-os.h" /* + * TODO: Remove these and rely only on qemu_real_host_page_size(). + */ +extern uintptr_t qemu_host_page_size; +extern intptr_t qemu_host_page_mask; +#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size) + +/* * This struct is used to hold certain information about the image. Basically, * it replicates in user space what would be certain task_struct fields in the * kernel diff --git a/cpu-target.c b/cpu-target.c index 86444cc2c6..4c0621bf33 100644 --- a/cpu-target.c +++ b/cpu-target.c @@ -45,9 +45,6 @@ #include "trace/trace-root.h" #include "qemu/accel.h" -uintptr_t qemu_host_page_size; -intptr_t qemu_host_page_mask; - #ifndef CONFIG_USER_ONLY static int cpu_common_post_load(void *opaque, int version_id) { @@ -474,16 +471,3 @@ const char *target_name(void) { return TARGET_NAME; } - -void page_size_init(void) -{ - /* NOTE: we can always suppose that qemu_host_page_size >= - TARGET_PAGE_SIZE */ - if (qemu_host_page_size == 0) { - qemu_host_page_size = qemu_real_host_page_size(); - } - if (qemu_host_page_size < TARGET_PAGE_SIZE) { - qemu_host_page_size = TARGET_PAGE_SIZE; - } - qemu_host_page_mask = -(intptr_t)qemu_host_page_size; -} diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 36bd3e15ef..8565644da6 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -63,6 +63,16 @@ as short-form boolean values, and passed to plugins as ``arg_name=on``. However, short-form booleans are deprecated and full explicit ``arg_name=on`` form is preferred. +User-mode emulator command line arguments +----------------------------------------- + +``-p`` (since 9.0) +'''''''''''''''''' + +The ``-p`` option pretends to control the host page size. However, +it is not possible to change the host page size, and using the +option only causes failures. + QEMU Machine Protocol (QMP) commands ------------------------------------ diff --git a/docs/user/main.rst b/docs/user/main.rst index 7e7ad07409..d5fbb78d3c 100644 --- a/docs/user/main.rst +++ b/docs/user/main.rst @@ -87,9 +87,6 @@ Debug options: Activate logging of the specified items (use '-d help' for a list of log items) -``-p pagesize`` - Act as if the host page size was 'pagesize' bytes - ``-g port`` Wait gdb connection to port diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c index 7f74e26ec6..f27ed6c35e 100644 --- a/hw/tpm/tpm_ppi.c +++ b/hw/tpm/tpm_ppi.c @@ -47,8 +47,10 @@ void tpm_ppi_reset(TPMPPI *tpmppi) void tpm_ppi_init(TPMPPI *tpmppi, MemoryRegion *m, hwaddr addr, Object *obj) { - tpmppi->buf = qemu_memalign(qemu_real_host_page_size(), - HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); + size_t host_page_size = qemu_real_host_page_size(); + + tpmppi->buf = qemu_memalign(host_page_size, + ROUND_UP(TPM_PPI_ADDR_SIZE, host_page_size)); memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", TPM_PPI_ADDR_SIZE, tpmppi->buf); vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 9ead1be100..6346df17ce 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -20,13 +20,6 @@ void cpu_exec_init_all(void); void cpu_exec_step_atomic(CPUState *cpu); -/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even - * when intptr_t is 32-bit and we are aligning a long long. - */ -extern uintptr_t qemu_host_page_size; -extern intptr_t qemu_host_page_mask; - -#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size) #define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size()) /* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index af1a29526d..d0e345419f 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -1179,8 +1179,6 @@ bool target_words_bigendian(void); const char *target_name(void); -void page_size_init(void); - #ifdef NEED_CPU_H #ifndef CONFIG_USER_ONLY diff --git a/linux-user/elfload.c b/linux-user/elfload.c index b8eef893d0..0c299a7c15 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -460,6 +460,7 @@ enum { static bool init_guest_commpage(void) { ARMCPU *cpu = ARM_CPU(thread_cpu); + int host_page_size = qemu_real_host_page_size(); abi_ptr commpage; void *want; void *addr; @@ -472,10 +473,12 @@ static bool init_guest_commpage(void) return true; } - commpage = HI_COMMPAGE & -qemu_host_page_size; + commpage = HI_COMMPAGE & -host_page_size; want = g2h_untagged(commpage); - addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + (commpage < reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE), + -1, 0); if (addr == MAP_FAILED) { perror("Allocating guest commpage"); @@ -488,12 +491,12 @@ static bool init_guest_commpage(void) /* Set kernel helper versions; rest of page is 0. */ __put_user(5, (uint32_t *)g2h_untagged(0xffff0ffcu)); - if (mprotect(addr, qemu_host_page_size, PROT_READ)) { + if (mprotect(addr, host_page_size, PROT_READ)) { perror("Protecting guest commpage"); exit(EXIT_FAILURE); } - page_set_flags(commpage, commpage | ~qemu_host_page_mask, + page_set_flags(commpage, commpage | (host_page_size - 1), PAGE_READ | PAGE_EXEC | PAGE_VALID); return true; } @@ -1532,10 +1535,14 @@ static bool init_guest_commpage(void) 0x3a, 0x68, 0x3b, 0x00, /* trap 0 */ }; - void *want = g2h_untagged(LO_COMMPAGE & -qemu_host_page_size); - void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + int host_page_size = qemu_real_host_page_size(); + void *want, *addr; + want = g2h_untagged(LO_COMMPAGE & -host_page_size); + addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + (reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE), + -1, 0); if (addr == MAP_FAILED) { perror("Allocating guest commpage"); exit(EXIT_FAILURE); @@ -1544,9 +1551,9 @@ static bool init_guest_commpage(void) return false; } - memcpy(addr, kuser_page, sizeof(kuser_page)); + memcpy(g2h_untagged(LO_COMMPAGE), kuser_page, sizeof(kuser_page)); - if (mprotect(addr, qemu_host_page_size, PROT_READ)) { + if (mprotect(addr, host_page_size, PROT_READ)) { perror("Protecting guest commpage"); exit(EXIT_FAILURE); } @@ -1970,16 +1977,20 @@ static inline void init_thread(struct target_pt_regs *regs, static bool init_guest_commpage(void) { - void *want = g2h_untagged(LO_COMMPAGE); - void *addr = mmap(want, qemu_host_page_size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + /* If reserved_va, then we have already mapped 0 page on the host. */ + if (!reserved_va) { + void *want, *addr; - if (addr == MAP_FAILED) { - perror("Allocating guest commpage"); - exit(EXIT_FAILURE); - } - if (addr != want) { - return false; + want = g2h_untagged(LO_COMMPAGE); + addr = mmap(want, TARGET_PAGE_SIZE, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); + if (addr == MAP_FAILED) { + perror("Allocating guest commpage"); + exit(EXIT_FAILURE); + } + if (addr != want) { + return false; + } } /* @@ -2679,13 +2690,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff)); NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); - if ((info->alignment & ~qemu_host_page_mask) != 0) { - /* Target doesn't support host page size alignment */ - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); - } else { - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(MAX(TARGET_PAGE_SIZE, - qemu_host_page_size))); - } + NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info ? interp_info->load_addr : 0)); NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); NEW_AUX_ENT(AT_ENTRY, info->entry); @@ -2893,7 +2898,7 @@ static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr, /* Add any HI_COMMPAGE not covered by reserved_va. */ if (reserved_va < HI_COMMPAGE) { - ga->bounds[n][0] = HI_COMMPAGE & qemu_host_page_mask; + ga->bounds[n][0] = HI_COMMPAGE & qemu_real_host_page_mask(); ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1; n++; } @@ -3017,8 +3022,6 @@ static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr, uintptr_t brk, ret; PGBAddrs ga; - assert(QEMU_IS_ALIGNED(guest_loaddr, align)); - /* Try the identity map first. */ if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) { brk = (uintptr_t)sbrk(0); @@ -3075,7 +3078,7 @@ void probe_guest_base(const char *image_name, abi_ulong guest_loaddr, abi_ulong guest_hiaddr) { /* In order to use host shmat, we must be able to honor SHMLBA. */ - uintptr_t align = MAX(SHMLBA, qemu_host_page_size); + uintptr_t align = MAX(SHMLBA, TARGET_PAGE_SIZE); /* Sanity check the guest binary. */ if (reserved_va) { @@ -3912,8 +3915,9 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ - target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); } #ifdef TARGET_MIPS info->interp_fp_abi = interp_info.fp_abi; @@ -3963,6 +3967,8 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) } #ifdef USE_ELF_CORE_DUMP +#include "exec/translate-all.h" + /* * Definitions to generate Intel SVR4-like core files. * These mostly have the same names as the SVR4 types with "target_elf_" @@ -4002,18 +4008,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) * Example for ARM target is provided in this file. */ -/* An ELF note in memory */ -struct memelfnote { - const char *name; - size_t namesz; - size_t namesz_rounded; - int type; - size_t datasz; - size_t datasz_rounded; - void *data; - size_t notesz; -}; - struct target_elf_siginfo { abi_int si_signo; /* signal number */ abi_int si_code; /* extra code */ @@ -4053,77 +4047,6 @@ struct target_elf_prpsinfo { char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -/* Here is the structure in which status of each thread is captured. */ -struct elf_thread_status { - QTAILQ_ENTRY(elf_thread_status) ets_link; - struct target_elf_prstatus prstatus; /* NT_PRSTATUS */ -#if 0 - elf_fpregset_t fpu; /* NT_PRFPREG */ - struct task_struct *thread; - elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ -#endif - struct memelfnote notes[1]; - int num_notes; -}; - -struct elf_note_info { - struct memelfnote *notes; - struct target_elf_prstatus *prstatus; /* NT_PRSTATUS */ - struct target_elf_prpsinfo *psinfo; /* NT_PRPSINFO */ - - QTAILQ_HEAD(, elf_thread_status) thread_list; -#if 0 - /* - * Current version of ELF coredump doesn't support - * dumping fp regs etc. - */ - elf_fpregset_t *fpu; - elf_fpxregset_t *xfpu; - int thread_status_size; -#endif - int notes_size; - int numnote; -}; - -struct vm_area_struct { - target_ulong vma_start; /* start vaddr of memory region */ - target_ulong vma_end; /* end vaddr of memory region */ - abi_ulong vma_flags; /* protection etc. flags for the region */ - QTAILQ_ENTRY(vm_area_struct) vma_link; -}; - -struct mm_struct { - QTAILQ_HEAD(, vm_area_struct) mm_mmap; - int mm_count; /* number of mappings */ -}; - -static struct mm_struct *vma_init(void); -static void vma_delete(struct mm_struct *); -static int vma_add_mapping(struct mm_struct *, target_ulong, - target_ulong, abi_ulong); -static int vma_get_mapping_count(const struct mm_struct *); -static struct vm_area_struct *vma_first(const struct mm_struct *); -static struct vm_area_struct *vma_next(struct vm_area_struct *); -static abi_ulong vma_dump_size(const struct vm_area_struct *); -static int vma_walker(void *priv, target_ulong start, target_ulong end, - unsigned long flags); - -static void fill_elf_header(struct elfhdr *, int, uint16_t, uint32_t); -static void fill_note(struct memelfnote *, const char *, int, - unsigned int, void *); -static void fill_prstatus(struct target_elf_prstatus *, const TaskState *, int); -static int fill_psinfo(struct target_elf_prpsinfo *, const TaskState *); -static void fill_auxv_note(struct memelfnote *, const TaskState *); -static void fill_elf_note_phdr(struct elf_phdr *, int, off_t); -static size_t note_size(const struct memelfnote *); -static void free_note_info(struct elf_note_info *); -static int fill_note_info(struct elf_note_info *, long, const CPUArchState *); -static void fill_thread_info(struct elf_note_info *, const CPUArchState *); - -static int dump_write(int, const void *, size_t); -static int write_note(struct memelfnote *, int); -static int write_note_info(struct elf_note_info *, int); - #ifdef BSWAP_NEEDED static void bswap_prstatus(struct target_elf_prstatus *prstatus) { @@ -4166,145 +4089,66 @@ static inline void bswap_note(struct elf_note *en) { } #endif /* BSWAP_NEEDED */ /* - * Minimal support for linux memory regions. These are needed - * when we are finding out what memory exactly belongs to - * emulated process. No locks needed here, as long as - * thread that received the signal is stopped. - */ - -static struct mm_struct *vma_init(void) -{ - struct mm_struct *mm; - - if ((mm = g_malloc(sizeof (*mm))) == NULL) - return (NULL); - - mm->mm_count = 0; - QTAILQ_INIT(&mm->mm_mmap); - - return (mm); -} - -static void vma_delete(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - while ((vma = vma_first(mm)) != NULL) { - QTAILQ_REMOVE(&mm->mm_mmap, vma, vma_link); - g_free(vma); - } - g_free(mm); -} - -static int vma_add_mapping(struct mm_struct *mm, target_ulong start, - target_ulong end, abi_ulong flags) -{ - struct vm_area_struct *vma; - - if ((vma = g_malloc0(sizeof (*vma))) == NULL) - return (-1); - - vma->vma_start = start; - vma->vma_end = end; - vma->vma_flags = flags; - - QTAILQ_INSERT_TAIL(&mm->mm_mmap, vma, vma_link); - mm->mm_count++; - - return (0); -} - -static struct vm_area_struct *vma_first(const struct mm_struct *mm) -{ - return (QTAILQ_FIRST(&mm->mm_mmap)); -} - -static struct vm_area_struct *vma_next(struct vm_area_struct *vma) -{ - return (QTAILQ_NEXT(vma, vma_link)); -} - -static int vma_get_mapping_count(const struct mm_struct *mm) -{ - return (mm->mm_count); -} - -/* * Calculate file (dump) size of given memory region. */ -static abi_ulong vma_dump_size(const struct vm_area_struct *vma) +static size_t vma_dump_size(target_ulong start, target_ulong end, + unsigned long flags) { - /* if we cannot even read the first page, skip it */ - if (!access_ok_untagged(VERIFY_READ, vma->vma_start, TARGET_PAGE_SIZE)) - return (0); + /* The area must be readable. */ + if (!(flags & PAGE_READ)) { + return 0; + } /* * Usually we don't dump executable pages as they contain * non-writable code that debugger can read directly from - * target library etc. However, thread stacks are marked - * also executable so we read in first page of given region - * and check whether it contains elf header. If there is - * no elf header, we dump it. + * target library etc. If there is no elf header, we dump it. */ - if (vma->vma_flags & PROT_EXEC) { - char page[TARGET_PAGE_SIZE]; - - if (copy_from_user(page, vma->vma_start, sizeof (page))) { - return 0; - } - if ((page[EI_MAG0] == ELFMAG0) && - (page[EI_MAG1] == ELFMAG1) && - (page[EI_MAG2] == ELFMAG2) && - (page[EI_MAG3] == ELFMAG3)) { - /* - * Mappings are possibly from ELF binary. Don't dump - * them. - */ - return (0); - } + if (!(flags & PAGE_WRITE_ORG) && + (flags & PAGE_EXEC) && + memcmp(g2h_untagged(start), ELFMAG, SELFMAG) == 0) { + return 0; } - return (vma->vma_end - vma->vma_start); + return end - start; } -static int vma_walker(void *priv, target_ulong start, target_ulong end, - unsigned long flags) +static size_t size_note(const char *name, size_t datasz) { - struct mm_struct *mm = (struct mm_struct *)priv; + size_t namesz = strlen(name) + 1; - vma_add_mapping(mm, start, end, flags); - return (0); + namesz = ROUND_UP(namesz, 4); + datasz = ROUND_UP(datasz, 4); + + return sizeof(struct elf_note) + namesz + datasz; } -static void fill_note(struct memelfnote *note, const char *name, int type, - unsigned int sz, void *data) +static void *fill_note(void **pptr, int type, const char *name, size_t datasz) { - unsigned int namesz; + void *ptr = *pptr; + struct elf_note *n = ptr; + size_t namesz = strlen(name) + 1; - namesz = strlen(name) + 1; - note->name = name; - note->namesz = namesz; - note->namesz_rounded = roundup(namesz, sizeof (int32_t)); - note->type = type; - note->datasz = sz; - note->datasz_rounded = roundup(sz, sizeof (int32_t)); + n->n_namesz = namesz; + n->n_descsz = datasz; + n->n_type = type; + bswap_note(n); - note->data = data; + ptr += sizeof(*n); + memcpy(ptr, name, namesz); - /* - * We calculate rounded up note size here as specified by - * ELF document. - */ - note->notesz = sizeof (struct elf_note) + - note->namesz_rounded + note->datasz_rounded; + namesz = ROUND_UP(namesz, 4); + datasz = ROUND_UP(datasz, 4); + + *pptr = ptr + namesz + datasz; + return ptr + namesz; } static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, uint32_t flags) { - (void) memset(elf, 0, sizeof(*elf)); + memcpy(elf->e_ident, ELFMAG, SELFMAG); - (void) memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; elf->e_ident[EI_DATA] = ELF_DATA; elf->e_ident[EI_VERSION] = EV_CURRENT; @@ -4322,95 +4166,79 @@ static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, bswap_ehdr(elf); } -static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) +static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset) { phdr->p_type = PT_NOTE; phdr->p_offset = offset; - phdr->p_vaddr = 0; - phdr->p_paddr = 0; phdr->p_filesz = sz; - phdr->p_memsz = 0; - phdr->p_flags = 0; - phdr->p_align = 0; bswap_phdr(phdr, 1); } -static size_t note_size(const struct memelfnote *note) +static void fill_prstatus_note(void *data, const TaskState *ts, + CPUState *cpu, int signr) { - return (note->notesz); -} + /* + * Because note memory is only aligned to 4, and target_elf_prstatus + * may well have higher alignment requirements, fill locally and + * memcpy to the destination afterward. + */ + struct target_elf_prstatus prstatus = { + .pr_info.si_signo = signr, + .pr_cursig = signr, + .pr_pid = ts->ts_tid, + .pr_ppid = getppid(), + .pr_pgrp = getpgrp(), + .pr_sid = getsid(0), + }; -static void fill_prstatus(struct target_elf_prstatus *prstatus, - const TaskState *ts, int signr) -{ - (void) memset(prstatus, 0, sizeof (*prstatus)); - prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; - prstatus->pr_pid = ts->ts_tid; - prstatus->pr_ppid = getppid(); - prstatus->pr_pgrp = getpgrp(); - prstatus->pr_sid = getsid(0); - - bswap_prstatus(prstatus); + elf_core_copy_regs(&prstatus.pr_reg, cpu_env(cpu)); + bswap_prstatus(&prstatus); + memcpy(data, &prstatus, sizeof(prstatus)); } -static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts) +static void fill_prpsinfo_note(void *data, const TaskState *ts) { + /* + * Because note memory is only aligned to 4, and target_elf_prpsinfo + * may well have higher alignment requirements, fill locally and + * memcpy to the destination afterward. + */ + struct target_elf_prpsinfo psinfo; char *base_filename; - unsigned int i, len; - - (void) memset(psinfo, 0, sizeof (*psinfo)); + size_t len; len = ts->info->env_strings - ts->info->arg_strings; - if (len >= ELF_PRARGSZ) - len = ELF_PRARGSZ - 1; - if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_strings, len)) { - return -EFAULT; - } - for (i = 0; i < len; i++) - if (psinfo->pr_psargs[i] == 0) - psinfo->pr_psargs[i] = ' '; - psinfo->pr_psargs[len] = 0; - - psinfo->pr_pid = getpid(); - psinfo->pr_ppid = getppid(); - psinfo->pr_pgrp = getpgrp(); - psinfo->pr_sid = getsid(0); - psinfo->pr_uid = getuid(); - psinfo->pr_gid = getgid(); + len = MIN(len, ELF_PRARGSZ); + memcpy(&psinfo.pr_psargs, g2h_untagged(ts->info->arg_strings), len); + for (size_t i = 0; i < len; i++) { + if (psinfo.pr_psargs[i] == 0) { + psinfo.pr_psargs[i] = ' '; + } + } + + psinfo.pr_pid = getpid(); + psinfo.pr_ppid = getppid(); + psinfo.pr_pgrp = getpgrp(); + psinfo.pr_sid = getsid(0); + psinfo.pr_uid = getuid(); + psinfo.pr_gid = getgid(); base_filename = g_path_get_basename(ts->bprm->filename); /* * Using strncpy here is fine: at max-length, * this field is not NUL-terminated. */ - (void) strncpy(psinfo->pr_fname, base_filename, - sizeof(psinfo->pr_fname)); - + strncpy(psinfo.pr_fname, base_filename, sizeof(psinfo.pr_fname)); g_free(base_filename); - bswap_psinfo(psinfo); - return (0); + + bswap_psinfo(&psinfo); + memcpy(data, &psinfo, sizeof(psinfo)); } -static void fill_auxv_note(struct memelfnote *note, const TaskState *ts) +static void fill_auxv_note(void *data, const TaskState *ts) { - elf_addr_t auxv = (elf_addr_t)ts->info->saved_auxv; - elf_addr_t orig_auxv = auxv; - void *ptr; - int len = ts->info->auxv_len; - - /* - * Auxiliary vector is stored in target process stack. It contains - * {type, value} pairs that we need to dump into note. This is not - * strictly necessary but we do it here for sake of completeness. - */ - - /* read in whole auxv vector and copy it to memelfnote */ - ptr = lock_user(VERIFY_READ, orig_auxv, len, 0); - if (ptr != NULL) { - fill_note(note, "CORE", NT_AUXV, len, ptr); - unlock_user(ptr, auxv, len); - } + memcpy(data, g2h_untagged(ts->info->saved_auxv), ts->info->auxv_len); } /* @@ -4434,27 +4262,9 @@ static int dump_write(int fd, const void *ptr, size_t size) { const char *bufp = (const char *)ptr; ssize_t bytes_written, bytes_left; - struct rlimit dumpsize; - off_t pos; bytes_written = 0; - getrlimit(RLIMIT_CORE, &dumpsize); - if ((pos = lseek(fd, 0, SEEK_CUR))==-1) { - if (errno == ESPIPE) { /* not a seekable stream */ - bytes_left = size; - } else { - return pos; - } - } else { - if (dumpsize.rlim_cur <= pos) { - return -1; - } else if (dumpsize.rlim_cur == RLIM_INFINITY) { - bytes_left = size; - } else { - size_t limit_left=dumpsize.rlim_cur - pos; - bytes_left = limit_left >= size ? size : limit_left ; - } - } + bytes_left = size; /* * In normal conditions, single write(2) should do but @@ -4476,135 +4286,76 @@ static int dump_write(int fd, const void *ptr, size_t size) return (0); } -static int write_note(struct memelfnote *men, int fd) +static int wmr_page_unprotect_regions(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { - struct elf_note en; - - en.n_namesz = men->namesz; - en.n_type = men->type; - en.n_descsz = men->datasz; + if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) { + size_t step = MAX(TARGET_PAGE_SIZE, qemu_real_host_page_size()); - bswap_note(&en); - - if (dump_write(fd, &en, sizeof(en)) != 0) - return (-1); - if (dump_write(fd, men->name, men->namesz_rounded) != 0) - return (-1); - if (dump_write(fd, men->data, men->datasz_rounded) != 0) - return (-1); - - return (0); -} - -static void fill_thread_info(struct elf_note_info *info, const CPUArchState *env) -{ - CPUState *cpu = env_cpu((CPUArchState *)env); - TaskState *ts = (TaskState *)cpu->opaque; - struct elf_thread_status *ets; - - ets = g_malloc0(sizeof (*ets)); - ets->num_notes = 1; /* only prstatus is dumped */ - fill_prstatus(&ets->prstatus, ts, 0); - elf_core_copy_regs(&ets->prstatus.pr_reg, env); - fill_note(&ets->notes[0], "CORE", NT_PRSTATUS, sizeof (ets->prstatus), - &ets->prstatus); - - QTAILQ_INSERT_TAIL(&info->thread_list, ets, ets_link); - - info->notes_size += note_size(&ets->notes[0]); + while (1) { + page_unprotect(start, 0); + if (end - start <= step) { + break; + } + start += step; + } + } + return 0; } -static void init_note_info(struct elf_note_info *info) -{ - /* Initialize the elf_note_info structure so that it is at - * least safe to call free_note_info() on it. Must be - * called before calling fill_note_info(). - */ - memset(info, 0, sizeof (*info)); - QTAILQ_INIT(&info->thread_list); -} +typedef struct { + unsigned count; + size_t size; +} CountAndSizeRegions; -static int fill_note_info(struct elf_note_info *info, - long signr, const CPUArchState *env) +static int wmr_count_and_size_regions(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { -#define NUMNOTES 3 - CPUState *cpu = env_cpu((CPUArchState *)env); - TaskState *ts = (TaskState *)cpu->opaque; - int i; - - info->notes = g_new0(struct memelfnote, NUMNOTES); - if (info->notes == NULL) - return (-ENOMEM); - info->prstatus = g_malloc0(sizeof (*info->prstatus)); - if (info->prstatus == NULL) - return (-ENOMEM); - info->psinfo = g_malloc0(sizeof (*info->psinfo)); - if (info->prstatus == NULL) - return (-ENOMEM); - - /* - * First fill in status (and registers) of current thread - * including process info & aux vector. - */ - fill_prstatus(info->prstatus, ts, signr); - elf_core_copy_regs(&info->prstatus->pr_reg, env); - fill_note(&info->notes[0], "CORE", NT_PRSTATUS, - sizeof (*info->prstatus), info->prstatus); - fill_psinfo(info->psinfo, ts); - fill_note(&info->notes[1], "CORE", NT_PRPSINFO, - sizeof (*info->psinfo), info->psinfo); - fill_auxv_note(&info->notes[2], ts); - info->numnote = 3; - - info->notes_size = 0; - for (i = 0; i < info->numnote; i++) - info->notes_size += note_size(&info->notes[i]); - - /* read and fill status of all threads */ - WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) { - CPU_FOREACH(cpu) { - if (cpu == thread_cpu) { - continue; - } - fill_thread_info(info, cpu_env(cpu)); - } - } + CountAndSizeRegions *css = opaque; - return (0); + css->count++; + css->size += vma_dump_size(start, end, flags); + return 0; } -static void free_note_info(struct elf_note_info *info) +typedef struct { + struct elf_phdr *phdr; + off_t offset; +} FillRegionPhdr; + +static int wmr_fill_region_phdr(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { - struct elf_thread_status *ets; + FillRegionPhdr *d = opaque; + struct elf_phdr *phdr = d->phdr; - while (!QTAILQ_EMPTY(&info->thread_list)) { - ets = QTAILQ_FIRST(&info->thread_list); - QTAILQ_REMOVE(&info->thread_list, ets, ets_link); - g_free(ets); - } + phdr->p_type = PT_LOAD; + phdr->p_vaddr = start; + phdr->p_paddr = 0; + phdr->p_filesz = vma_dump_size(start, end, flags); + phdr->p_offset = d->offset; + d->offset += phdr->p_filesz; + phdr->p_memsz = end - start; + phdr->p_flags = (flags & PAGE_READ ? PF_R : 0) + | (flags & PAGE_WRITE_ORG ? PF_W : 0) + | (flags & PAGE_EXEC ? PF_X : 0); + phdr->p_align = ELF_EXEC_PAGESIZE; - g_free(info->prstatus); - g_free(info->psinfo); - g_free(info->notes); + bswap_phdr(phdr, 1); + d->phdr = phdr + 1; + return 0; } -static int write_note_info(struct elf_note_info *info, int fd) +static int wmr_write_region(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { - struct elf_thread_status *ets; - int i, error = 0; + int fd = *(int *)opaque; + size_t size = vma_dump_size(start, end, flags); - /* write prstatus, psinfo and auxv for current thread */ - for (i = 0; i < info->numnote; i++) - if ((error = write_note(&info->notes[i], fd)) != 0) - return (error); - - /* write prstatus for each thread */ - QTAILQ_FOREACH(ets, &info->thread_list, ets_link) { - if ((error = write_note(&ets->notes[0], fd)) != 0) - return (error); + if (!size) { + return 0; } - - return (0); + return dump_write(fd, g2h_untagged(start), size); } /* @@ -4654,151 +4405,125 @@ static int elf_core_dump(int signr, const CPUArchState *env) { const CPUState *cpu = env_cpu((CPUArchState *)env); const TaskState *ts = (const TaskState *)cpu->opaque; - struct vm_area_struct *vma = NULL; - g_autofree char *corefile = NULL; - struct elf_note_info info; - struct elfhdr elf; - struct elf_phdr phdr; struct rlimit dumpsize; - struct mm_struct *mm = NULL; - off_t offset = 0, data_offset = 0; - int segs = 0; + CountAndSizeRegions css; + off_t offset, note_offset, data_offset; + size_t note_size; + int cpus, ret; int fd = -1; - - init_note_info(&info); - - errno = 0; + CPUState *cpu_iter; if (prctl(PR_GET_DUMPABLE) == 0) { return 0; } - if (getrlimit(RLIMIT_CORE, &dumpsize) == 0 && dumpsize.rlim_cur == 0) { + if (getrlimit(RLIMIT_CORE, &dumpsize) < 0 || dumpsize.rlim_cur == 0) { return 0; } - corefile = core_dump_filename(ts); + cpu_list_lock(); + mmap_lock(); - if ((fd = open(corefile, O_WRONLY | O_CREAT, - S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) - return (-errno); + /* By unprotecting, we merge vmas that might be split. */ + walk_memory_regions(NULL, wmr_page_unprotect_regions); /* * Walk through target process memory mappings and - * set up structure containing this information. After - * this point vma_xxx functions can be used. + * set up structure containing this information. */ - if ((mm = vma_init()) == NULL) - goto out; + memset(&css, 0, sizeof(css)); + walk_memory_regions(&css, wmr_count_and_size_regions); - walk_memory_regions(mm, vma_walker); - segs = vma_get_mapping_count(mm); - - /* - * Construct valid coredump ELF header. We also - * add one more segment for notes. - */ - fill_elf_header(&elf, segs + 1, ELF_MACHINE, 0); - if (dump_write(fd, &elf, sizeof (elf)) != 0) - goto out; + cpus = 0; + CPU_FOREACH(cpu_iter) { + cpus++; + } - /* fill in the in-memory version of notes */ - if (fill_note_info(&info, signr, env) < 0) - goto out; + offset = sizeof(struct elfhdr); + offset += (css.count + 1) * sizeof(struct elf_phdr); + note_offset = offset; - offset += sizeof (elf); /* elf header */ - offset += (segs + 1) * sizeof (struct elf_phdr); /* program headers */ + offset += size_note("CORE", ts->info->auxv_len); + offset += size_note("CORE", sizeof(struct target_elf_prpsinfo)); + offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus; + note_size = offset - note_offset; + data_offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); - /* write out notes program header */ - fill_elf_note_phdr(&phdr, info.notes_size, offset); + /* Do not dump if the corefile size exceeds the limit. */ + if (dumpsize.rlim_cur != RLIM_INFINITY + && dumpsize.rlim_cur < data_offset + css.size) { + errno = 0; + goto out; + } - offset += info.notes_size; - if (dump_write(fd, &phdr, sizeof (phdr)) != 0) + { + g_autofree char *corefile = core_dump_filename(ts); + fd = open(corefile, O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + } + if (fd < 0) { goto out; + } /* - * ELF specification wants data to start at page boundary so - * we align it here. + * There is a fair amount of alignment padding within the notes + * as well as preceeding the process memory. Allocate a zeroed + * block to hold it all. Write all of the headers directly into + * this buffer and then write it out as a block. */ - data_offset = offset = roundup(offset, ELF_EXEC_PAGESIZE); + { + g_autofree void *header = g_malloc0(data_offset); + FillRegionPhdr frp; + void *hptr, *dptr; + + /* Create elf file header. */ + hptr = header; + fill_elf_header(hptr, css.count + 1, ELF_MACHINE, 0); + hptr += sizeof(struct elfhdr); + + /* Create elf program headers. */ + fill_elf_note_phdr(hptr, note_size, note_offset); + hptr += sizeof(struct elf_phdr); + + frp.phdr = hptr; + frp.offset = data_offset; + walk_memory_regions(&frp, wmr_fill_region_phdr); + hptr = frp.phdr; + + /* Create the notes. */ + dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len); + fill_auxv_note(dptr, ts); + + dptr = fill_note(&hptr, NT_PRPSINFO, "CORE", + sizeof(struct target_elf_prpsinfo)); + fill_prpsinfo_note(dptr, ts); + + CPU_FOREACH(cpu_iter) { + dptr = fill_note(&hptr, NT_PRSTATUS, "CORE", + sizeof(struct target_elf_prstatus)); + fill_prstatus_note(dptr, ts, cpu_iter, + cpu_iter == cpu ? signr : 0); + } - /* - * Write program headers for memory regions mapped in - * the target process. - */ - for (vma = vma_first(mm); vma != NULL; vma = vma_next(vma)) { - (void) memset(&phdr, 0, sizeof (phdr)); - - phdr.p_type = PT_LOAD; - phdr.p_offset = offset; - phdr.p_vaddr = vma->vma_start; - phdr.p_paddr = 0; - phdr.p_filesz = vma_dump_size(vma); - offset += phdr.p_filesz; - phdr.p_memsz = vma->vma_end - vma->vma_start; - phdr.p_flags = vma->vma_flags & PROT_READ ? PF_R : 0; - if (vma->vma_flags & PROT_WRITE) - phdr.p_flags |= PF_W; - if (vma->vma_flags & PROT_EXEC) - phdr.p_flags |= PF_X; - phdr.p_align = ELF_EXEC_PAGESIZE; - - bswap_phdr(&phdr, 1); - if (dump_write(fd, &phdr, sizeof(phdr)) != 0) { + if (dump_write(fd, header, data_offset) < 0) { goto out; } } /* - * Next we write notes just after program headers. No - * alignment needed here. + * Finally write process memory into the corefile as well. */ - if (write_note_info(&info, fd) < 0) + if (walk_memory_regions(&fd, wmr_write_region) < 0) { goto out; - - /* align data to page boundary */ - if (lseek(fd, data_offset, SEEK_SET) != data_offset) - goto out; - - /* - * Finally we can dump process memory into corefile as well. - */ - for (vma = vma_first(mm); vma != NULL; vma = vma_next(vma)) { - abi_ulong addr; - abi_ulong end; - - end = vma->vma_start + vma_dump_size(vma); - - for (addr = vma->vma_start; addr < end; - addr += TARGET_PAGE_SIZE) { - char page[TARGET_PAGE_SIZE]; - int error; - - /* - * Read in page from target process memory and - * write it to coredump file. - */ - error = copy_from_user(page, addr, sizeof (page)); - if (error != 0) { - (void) fprintf(stderr, "unable to dump " TARGET_ABI_FMT_lx "\n", - addr); - errno = -error; - goto out; - } - if (dump_write(fd, page, TARGET_PAGE_SIZE) < 0) - goto out; - } } + errno = 0; out: - free_note_info(&info); - if (mm != NULL) - vma_delete(mm); - (void) close(fd); - - if (errno != 0) - return (-errno); - return (0); + ret = -errno; + mmap_unlock(); + cpu_list_unlock(); + close(fd); + return ret; } #endif /* USE_ELF_CORE_DUMP */ diff --git a/linux-user/loongarch64/target_syscall.h b/linux-user/loongarch64/target_syscall.h index 8b5de52124..39f229bb9c 100644 --- a/linux-user/loongarch64/target_syscall.h +++ b/linux-user/loongarch64/target_syscall.h @@ -38,11 +38,4 @@ struct target_pt_regs { #define TARGET_MCL_FUTURE 2 #define TARGET_MCL_ONFAULT 4 -#define TARGET_FORCE_SHMLBA - -static inline abi_ulong target_shmlba(CPULoongArchState *env) -{ - return 64 * KiB; -} - #endif diff --git a/linux-user/main.c b/linux-user/main.c index 74b2fbb393..551acf1661 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -55,6 +55,7 @@ #include "loader.h" #include "user-mmap.h" #include "tcg/perf.h" +#include "exec/page-vary.h" #ifdef CONFIG_SEMIHOSTING #include "semihosting/semihost.h" @@ -332,11 +333,11 @@ static void handle_arg_ld_prefix(const char *arg) static void handle_arg_pagesize(const char *arg) { - qemu_host_page_size = atoi(arg); - if (qemu_host_page_size == 0 || - (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) { - fprintf(stderr, "page size must be a power of two\n"); - exit(EXIT_FAILURE); + unsigned size, want = qemu_real_host_page_size(); + + if (qemu_strtoui(arg, NULL, 10, &size) || size != want) { + warn_report("Deprecated page size option cannot " + "change host page size (%u)", want); } } @@ -496,7 +497,7 @@ static const struct qemu_argument arg_table[] = { {"D", "QEMU_LOG_FILENAME", true, handle_arg_log_filename, "logfile", "write logs to 'logfile' (default stderr)"}, {"p", "QEMU_PAGESIZE", true, handle_arg_pagesize, - "pagesize", "set the host page size to 'pagesize'"}, + "pagesize", "deprecated change to host page size"}, {"one-insn-per-tb", "QEMU_ONE_INSN_PER_TB", false, handle_arg_one_insn_per_tb, "", "run with one guest instruction per emulated TB"}, @@ -680,6 +681,7 @@ int main(int argc, char **argv, char **envp) int i; int ret; int execfd; + int host_page_size; unsigned long max_reserved_va; bool preserve_argv0; @@ -781,7 +783,7 @@ int main(int argc, char **argv, char **envp) } cpu_type = parse_cpu_option(cpu_model); - /* init tcg before creating CPUs and to get qemu_host_page_size */ + /* init tcg before creating CPUs */ { AccelState *accel = current_accel(); AccelClass *ac = ACCEL_GET_CLASS(accel); @@ -791,6 +793,16 @@ int main(int argc, char **argv, char **envp) opt_one_insn_per_tb, &error_abort); ac->init_machine(NULL); } + + /* + * Finalize page size before creating CPUs. + * This will do nothing if !TARGET_PAGE_BITS_VARY. + * The most efficient setting is to match the host. + */ + host_page_size = qemu_real_host_page_size(); + set_preferred_target_page_bits(ctz32(host_page_size)); + finalize_target_page_bits(); + cpu = cpu_create(cpu_type); env = cpu_env(cpu); cpu_reset(cpu); @@ -804,8 +816,8 @@ int main(int argc, char **argv, char **envp) */ max_reserved_va = MAX_RESERVED_VA(cpu); if (reserved_va != 0) { - if ((reserved_va + 1) % qemu_host_page_size) { - char *s = size_to_str(qemu_host_page_size); + if ((reserved_va + 1) % host_page_size) { + char *s = size_to_str(host_page_size); fprintf(stderr, "Reserved virtual address not aligned mod %s\n", s); g_free(s); exit(EXIT_FAILURE); @@ -889,7 +901,7 @@ int main(int argc, char **argv, char **envp) if ((fp = fopen("/proc/sys/vm/mmap_min_addr", "r")) != NULL) { unsigned long tmp; if (fscanf(fp, "%lu", &tmp) == 1 && tmp != 0) { - mmap_min_addr = tmp; + mmap_min_addr = MAX(tmp, host_page_size); qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx\n", mmap_min_addr); } @@ -902,7 +914,7 @@ int main(int argc, char **argv, char **envp) * If we're in a chroot with no /proc, fall back to 1 page. */ if (mmap_min_addr == 0) { - mmap_min_addr = qemu_host_page_size; + mmap_min_addr = host_page_size; qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx (fallback)\n", mmap_min_addr); diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 96c9433e27..4505fd7376 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -165,6 +165,7 @@ static int target_to_host_prot(int prot) /* NOTE: all the constants are the HOST ones, but addresses are target. */ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) { + int host_page_size = qemu_real_host_page_size(); abi_ulong starts[3]; abi_ulong lens[3]; int prots[3]; @@ -189,13 +190,13 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) } last = start + len - 1; - host_start = start & qemu_host_page_mask; - host_last = HOST_PAGE_ALIGN(last) - 1; + host_start = start & -host_page_size; + host_last = ROUND_UP(last, host_page_size) - 1; nranges = 0; mmap_lock(); - if (host_last - host_start < qemu_host_page_size) { + if (host_last - host_start < host_page_size) { /* Single host page contains all guest pages: sum the prot. */ prot1 = target_prot; for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { @@ -205,7 +206,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) prot1 |= page_get_flags(a + 1); } starts[nranges] = host_start; - lens[nranges] = qemu_host_page_size; + lens[nranges] = host_page_size; prots[nranges] = prot1; nranges++; } else { @@ -218,10 +219,10 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) /* If the resulting sum differs, create a new range. */ if (prot1 != target_prot) { starts[nranges] = host_start; - lens[nranges] = qemu_host_page_size; + lens[nranges] = host_page_size; prots[nranges] = prot1; nranges++; - host_start += qemu_host_page_size; + host_start += host_page_size; } } @@ -233,9 +234,9 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) } /* If the resulting sum differs, create a new range. */ if (prot1 != target_prot) { - host_last -= qemu_host_page_size; + host_last -= host_page_size; starts[nranges] = host_last + 1; - lens[nranges] = qemu_host_page_size; + lens[nranges] = host_page_size; prots[nranges] = prot1; nranges++; } @@ -266,10 +267,35 @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) return ret; } -/* map an incomplete host page */ +/* + * Perform munmap on behalf of the target, with host parameters. + * If reserved_va, we must replace the memory reservation. + */ +static int do_munmap(void *addr, size_t len) +{ + if (reserved_va) { + void *ptr = mmap(addr, len, PROT_NONE, + MAP_FIXED | MAP_ANONYMOUS + | MAP_PRIVATE | MAP_NORESERVE, -1, 0); + return ptr == addr ? 0 : -1; + } + return munmap(addr, len); +} + +/* + * Map an incomplete host page. + * + * Here be dragons. This case will not work if there is an existing + * overlapping host page, which is file mapped, and for which the mapping + * is beyond the end of the file. In that case, we will see SIGBUS when + * trying to write a portion of this page. + * + * FIXME: Work around this with a temporary signal handler and longjmp. + */ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, int prot, int flags, int fd, off_t offset) { + int host_page_size = qemu_real_host_page_size(); abi_ulong real_last; void *host_start; int prot_old, prot_new; @@ -286,7 +312,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, return false; } - real_last = real_start + qemu_host_page_size - 1; + real_last = real_start + host_page_size - 1; host_start = g2h_untagged(real_start); /* Get the protection of the target pages outside the mapping. */ @@ -304,12 +330,12 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, * outside of the fragment we need to map. Allocate a new host * page to cover, discarding whatever else may have been present. */ - void *p = mmap(host_start, qemu_host_page_size, + void *p = mmap(host_start, host_page_size, target_to_host_prot(prot), flags | MAP_ANONYMOUS, -1, 0); if (p != host_start) { if (p != MAP_FAILED) { - munmap(p, qemu_host_page_size); + do_munmap(p, host_page_size); errno = EEXIST; } return false; @@ -324,7 +350,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, /* Adjust protection to be able to write. */ if (!(host_prot_old & PROT_WRITE)) { host_prot_old |= PROT_WRITE; - mprotect(host_start, qemu_host_page_size, host_prot_old); + mprotect(host_start, host_page_size, host_prot_old); } /* Read or zero the new guest pages. */ @@ -338,7 +364,7 @@ static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, /* Put final protection */ if (host_prot_new != host_prot_old) { - mprotect(host_start, qemu_host_page_size, host_prot_new); + mprotect(host_start, host_page_size, host_prot_new); } return true; } @@ -373,21 +399,21 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, */ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) { + int host_page_size = qemu_real_host_page_size(); void *ptr, *prev; abi_ulong addr; int wrapped, repeat; - align = MAX(align, qemu_host_page_size); + align = MAX(align, host_page_size); /* If 'start' == 0, then a default start address is used. */ if (start == 0) { start = mmap_next_start; } else { - start &= qemu_host_page_mask; + start &= -host_page_size; } start = ROUND_UP(start, align); - - size = HOST_PAGE_ALIGN(size); + size = ROUND_UP(size, host_page_size); if (reserved_va) { return mmap_find_vma_reserved(start, size, align); @@ -488,302 +514,463 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) } } -/* NOTE: all the constants are the HOST ones */ -abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, - int flags, int fd, off_t offset) +/* + * Record a successful mmap within the user-exec interval tree. + */ +static abi_long mmap_end(abi_ulong start, abi_ulong last, + abi_ulong passthrough_start, + abi_ulong passthrough_last, + int flags, int page_flags) { - abi_ulong ret, last, real_start, real_last, retaddr, host_len; - abi_ulong passthrough_start = -1, passthrough_last = 0; - int page_flags; - off_t host_offset; - - mmap_lock(); - trace_target_mmap(start, len, target_prot, flags, fd, offset); - - if (!len) { - errno = EINVAL; - goto fail; + if (flags & MAP_ANONYMOUS) { + page_flags |= PAGE_ANON; } - - page_flags = validate_prot_to_pageflags(target_prot); - if (!page_flags) { - errno = EINVAL; - goto fail; + page_flags |= PAGE_RESET; + if (passthrough_start > passthrough_last) { + page_set_flags(start, last, page_flags); + } else { + if (start < passthrough_start) { + page_set_flags(start, passthrough_start - 1, page_flags); + } + page_set_flags(passthrough_start, passthrough_last, + page_flags | PAGE_PASSTHROUGH); + if (passthrough_last < last) { + page_set_flags(passthrough_last + 1, last, page_flags); + } } - - /* Also check for overflows... */ - len = TARGET_PAGE_ALIGN(len); - if (!len) { - errno = ENOMEM; - goto fail; + shm_region_rm_complete(start, last); + trace_target_mmap_complete(start); + if (qemu_loglevel_mask(CPU_LOG_PAGE)) { + FILE *f = qemu_log_trylock(); + if (f) { + fprintf(f, "page layout changed following mmap\n"); + page_dump(f); + qemu_log_unlock(f); + } } + return start; +} - if (offset & ~TARGET_PAGE_MASK) { - errno = EINVAL; - goto fail; - } +/* + * Special case host page size == target page size, + * where there are no edge conditions. + */ +static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len, + int host_prot, int flags, int page_flags, + int fd, off_t offset) +{ + void *p, *want_p = g2h_untagged(start); + abi_ulong last; - /* - * If we're mapping shared memory, ensure we generate code for parallel - * execution and flush old translations. This will work up to the level - * supported by the host -- anything that requires EXCP_ATOMIC will not - * be atomic with respect to an external process. - */ - if (flags & MAP_SHARED) { - CPUState *cpu = thread_cpu; - if (!(cpu->tcg_cflags & CF_PARALLEL)) { - cpu->tcg_cflags |= CF_PARALLEL; - tb_flush(cpu); - } + p = mmap(want_p, len, host_prot, flags, fd, offset); + if (p == MAP_FAILED) { + return -1; + } + /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */ + if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) { + do_munmap(p, len); + errno = EEXIST; + return -1; } - real_start = start & qemu_host_page_mask; - host_offset = offset & qemu_host_page_mask; + start = h2g(p); + last = start + len - 1; + return mmap_end(start, last, start, last, flags, page_flags); +} - /* - * If the user is asking for the kernel to find a location, do that - * before we truncate the length for mapping files below. - */ - if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { - host_len = len + offset - host_offset; - host_len = HOST_PAGE_ALIGN(host_len); - start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); - if (start == (abi_ulong)-1) { - errno = ENOMEM; - goto fail; - } - } +/* + * Special case host page size < target page size. + * + * The two special cases are increased guest alignment, and mapping + * past the end of a file. + * + * When mapping files into a memory area larger than the file, + * accesses to pages beyond the file size will cause a SIGBUS. + * + * For example, if mmaping a file of 100 bytes on a host with 4K + * pages emulating a target with 8K pages, the target expects to + * be able to access the first 8K. But the host will trap us on + * any access beyond 4K. + * + * When emulating a target with a larger page-size than the hosts, + * we may need to truncate file maps at EOF and add extra anonymous + * pages up to the targets page boundary. + * + * This workaround only works for files that do not change. + * If the file is later extended (e.g. ftruncate), the SIGBUS + * vanishes and the proper behaviour is that changes within the + * anon page should be reflected in the file. + * + * However, this case is rather common with executable images, + * so the workaround is important for even trivial tests, whereas + * the mmap of of a file being extended is less common. + */ +static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot, + int mmap_flags, int page_flags, int fd, + off_t offset, int host_page_size) +{ + void *p, *want_p = g2h_untagged(start); + off_t fileend_adj = 0; + int flags = mmap_flags; + abi_ulong last, pass_last; - /* - * When mapping files into a memory area larger than the file, accesses - * to pages beyond the file size will cause a SIGBUS. - * - * For example, if mmaping a file of 100 bytes on a host with 4K pages - * emulating a target with 8K pages, the target expects to be able to - * access the first 8K. But the host will trap us on any access beyond - * 4K. - * - * When emulating a target with a larger page-size than the hosts, we - * may need to truncate file maps at EOF and add extra anonymous pages - * up to the targets page boundary. - */ - if ((qemu_real_host_page_size() < qemu_host_page_size) && - !(flags & MAP_ANONYMOUS)) { + if (!(flags & MAP_ANONYMOUS)) { struct stat sb; if (fstat(fd, &sb) == -1) { - goto fail; + return -1; } - - /* Are we trying to create a map beyond EOF?. */ - if (offset + len > sb.st_size) { + if (offset >= sb.st_size) { /* - * If so, truncate the file map at eof aligned with - * the hosts real pagesize. Additional anonymous maps - * will be created beyond EOF. + * The entire map is beyond the end of the file. + * Transform it to an anonymous mapping. */ - len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); + flags |= MAP_ANONYMOUS; + fd = -1; + offset = 0; + } else if (offset + len > sb.st_size) { + /* + * A portion of the map is beyond the end of the file. + * Truncate the file portion of the allocation. + */ + fileend_adj = offset + len - sb.st_size; } } - if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { - uintptr_t host_start; - int host_prot; - void *p; + if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) { + if (fileend_adj) { + p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0); + } else { + p = mmap(want_p, len, host_prot, flags, fd, offset); + } + if (p != want_p) { + if (p != MAP_FAILED) { + /* Host does not support MAP_FIXED_NOREPLACE: emulate. */ + do_munmap(p, len); + errno = EEXIST; + } + return -1; + } - host_len = len + offset - host_offset; - host_len = HOST_PAGE_ALIGN(host_len); - host_prot = target_to_host_prot(target_prot); + if (fileend_adj) { + void *t = mmap(p, len - fileend_adj, host_prot, + (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, + fd, offset); + + if (t == MAP_FAILED) { + int save_errno = errno; + + /* + * We failed a map over the top of the successful anonymous + * mapping above. The only failure mode is running out of VMAs, + * and there's nothing that we can do to detect that earlier. + * If we have replaced an existing mapping with MAP_FIXED, + * then we cannot properly recover. It's a coin toss whether + * it would be better to exit or continue here. + */ + if (!(flags & MAP_FIXED_NOREPLACE) && + !page_check_range_empty(start, start + len - 1)) { + qemu_log("QEMU target_mmap late failure: %s", + strerror(save_errno)); + } + + do_munmap(want_p, len); + errno = save_errno; + return -1; + } + } + } else { + size_t host_len, part_len; /* - * Note: we prefer to control the mapping address. It is - * especially important if qemu_host_page_size > - * qemu_real_host_page_size. + * Take care to align the host memory. Perform a larger anonymous + * allocation and extract the aligned portion. Remap the file on + * top of that. */ - p = mmap(g2h_untagged(start), host_len, host_prot, - flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); + host_len = len + TARGET_PAGE_SIZE - host_page_size; + p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { - goto fail; + return -1; + } + + part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1); + if (part_len) { + part_len = TARGET_PAGE_SIZE - part_len; + do_munmap(p, part_len); + p += part_len; + host_len -= part_len; } - /* update start so that it points to the file position at 'offset' */ - host_start = (uintptr_t)p; + if (len < host_len) { + do_munmap(p + len, host_len - len); + } + if (!(flags & MAP_ANONYMOUS)) { - p = mmap(g2h_untagged(start), len, host_prot, - flags | MAP_FIXED, fd, host_offset); - if (p == MAP_FAILED) { - munmap(g2h_untagged(start), host_len); - goto fail; + void *t = mmap(p, len - fileend_adj, host_prot, + flags | MAP_FIXED, fd, offset); + + if (t == MAP_FAILED) { + int save_errno = errno; + do_munmap(p, len); + errno = save_errno; + return -1; } - host_start += offset - host_offset; } - start = h2g(host_start); - last = start + len - 1; - passthrough_start = start; - passthrough_last = last; + + start = h2g(p); + } + + last = start + len - 1; + if (fileend_adj) { + pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1; } else { - if (start & ~TARGET_PAGE_MASK) { - errno = EINVAL; - goto fail; + pass_last = last; + } + return mmap_end(start, last, start, pass_last, mmap_flags, page_flags); +} + +/* + * Special case host page size > target page size. + * + * The two special cases are address and file offsets that are valid + * for the guest that cannot be directly represented by the host. + */ +static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len, + int target_prot, int host_prot, + int flags, int page_flags, int fd, + off_t offset, int host_page_size) +{ + void *p, *want_p = g2h_untagged(start); + off_t host_offset = offset & -host_page_size; + abi_ulong last, real_start, real_last; + bool misaligned_offset = false; + size_t host_len; + + if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { + /* + * Adjust the offset to something representable on the host. + */ + host_len = len + offset - host_offset; + p = mmap(want_p, host_len, host_prot, flags, fd, host_offset); + if (p == MAP_FAILED) { + return -1; } + + /* Update start to the file position at offset. */ + p += offset - host_offset; + + start = h2g(p); last = start + len - 1; - real_last = HOST_PAGE_ALIGN(last) - 1; + return mmap_end(start, last, start, last, flags, page_flags); + } + + if (!(flags & MAP_ANONYMOUS)) { + misaligned_offset = (start ^ offset) & (host_page_size - 1); /* - * Test if requested memory area fits target address space - * It can fail only on 64-bit host with 32-bit target. - * On any other target/host host mmap() handles this error correctly. + * The fallback for misalignment is a private mapping + read. + * This carries none of semantics required of MAP_SHARED. */ - if (last < start || !guest_range_valid_untagged(start, len)) { - errno = ENOMEM; - goto fail; + if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) { + errno = EINVAL; + return -1; } + } - if (flags & MAP_FIXED_NOREPLACE) { - /* Validate that the chosen range is empty. */ - if (!page_check_range_empty(start, last)) { - errno = EEXIST; - goto fail; - } + last = start + len - 1; + real_start = start & -host_page_size; + real_last = ROUND_UP(last, host_page_size) - 1; - /* - * With reserved_va, the entire address space is mmaped in the - * host to ensure it isn't accidentally used for something else. - * We have just checked that the guest address is not mapped - * within the guest, but need to replace the host reservation. - * - * Without reserved_va, despite the guest address check above, - * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite - * any host address mappings. - */ - if (reserved_va) { - flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; + /* + * Handle the start and end of the mapping. + */ + if (real_start < start) { + abi_ulong real_page_last = real_start + host_page_size - 1; + if (last <= real_page_last) { + /* Entire allocation a subset of one host page. */ + if (!mmap_frag(real_start, start, last, target_prot, + flags, fd, offset)) { + return -1; } + return mmap_end(start, last, -1, 0, flags, page_flags); } - /* - * worst case: we cannot map the file because the offset is not - * aligned, so we read it - */ - if (!(flags & MAP_ANONYMOUS) && - (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { - /* - * msync() won't work here, so we return an error if write is - * possible while it is a shared mapping - */ - if ((flags & MAP_TYPE) == MAP_SHARED - && (target_prot & PROT_WRITE)) { - errno = EINVAL; - goto fail; - } - retaddr = target_mmap(start, len, target_prot | PROT_WRITE, - (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) - | MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (retaddr == -1) { - goto fail; - } - if (pread(fd, g2h_untagged(start), len, offset) == -1) { - goto fail; - } - if (!(target_prot & PROT_WRITE)) { - ret = target_mprotect(start, len, target_prot); - assert(ret == 0); - } - goto the_end; + if (!mmap_frag(real_start, start, real_page_last, target_prot, + flags, fd, offset)) { + return -1; } + real_start = real_page_last + 1; + } - /* handle the start of the mapping */ - if (start > real_start) { - if (real_last == real_start + qemu_host_page_size - 1) { - /* one single host page */ - if (!mmap_frag(real_start, start, last, - target_prot, flags, fd, offset)) { - goto fail; - } - goto the_end1; - } - if (!mmap_frag(real_start, start, - real_start + qemu_host_page_size - 1, - target_prot, flags, fd, offset)) { - goto fail; - } - real_start += qemu_host_page_size; + if (last < real_last) { + abi_ulong real_page_start = real_last - host_page_size + 1; + if (!mmap_frag(real_page_start, real_page_start, last, + target_prot, flags, fd, + offset + real_page_start - start)) { + return -1; } - /* handle the end of the mapping */ - if (last < real_last) { - abi_ulong real_page = real_last - qemu_host_page_size + 1; - if (!mmap_frag(real_page, real_page, last, - target_prot, flags, fd, - offset + real_page - start)) { - goto fail; - } - real_last -= qemu_host_page_size; + real_last = real_page_start - 1; + } + + if (real_start > real_last) { + return mmap_end(start, last, -1, 0, flags, page_flags); + } + + /* + * Handle the middle of the mapping. + */ + + host_len = real_last - real_start + 1; + want_p += real_start - start; + + if (flags & MAP_ANONYMOUS) { + p = mmap(want_p, host_len, host_prot, flags, -1, 0); + } else if (!misaligned_offset) { + p = mmap(want_p, host_len, host_prot, flags, fd, + offset + real_start - start); + } else { + p = mmap(want_p, host_len, host_prot | PROT_WRITE, + flags | MAP_ANONYMOUS, -1, 0); + } + if (p != want_p) { + if (p != MAP_FAILED) { + do_munmap(p, host_len); + errno = EEXIST; } + return -1; + } - /* map the middle (easier) */ - if (real_start < real_last) { - void *p, *want_p; - off_t offset1; - size_t len1; + if (misaligned_offset) { + /* TODO: The read could be short. */ + if (pread(fd, p, host_len, offset + real_start - start) != host_len) { + do_munmap(p, host_len); + return -1; + } + if (!(host_prot & PROT_WRITE)) { + mprotect(p, host_len, host_prot); + } + } - if (flags & MAP_ANONYMOUS) { - offset1 = 0; - } else { - offset1 = offset + real_start - start; + return mmap_end(start, last, -1, 0, flags, page_flags); +} + +static abi_long target_mmap__locked(abi_ulong start, abi_ulong len, + int target_prot, int flags, int page_flags, + int fd, off_t offset) +{ + int host_page_size = qemu_real_host_page_size(); + int host_prot; + + /* + * For reserved_va, we are in full control of the allocation. + * Find a suitable hole and convert to MAP_FIXED. + */ + if (reserved_va) { + if (flags & MAP_FIXED_NOREPLACE) { + /* Validate that the chosen range is empty. */ + if (!page_check_range_empty(start, start + len - 1)) { + errno = EEXIST; + return -1; } - len1 = real_last - real_start + 1; - want_p = g2h_untagged(real_start); - - p = mmap(want_p, len1, target_to_host_prot(target_prot), - flags, fd, offset1); - if (p != want_p) { - if (p != MAP_FAILED) { - munmap(p, len1); - errno = EEXIST; - } - goto fail; + flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; + } else if (!(flags & MAP_FIXED)) { + abi_ulong real_start = start & -host_page_size; + off_t host_offset = offset & -host_page_size; + size_t real_len = len + offset - host_offset; + abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE); + + start = mmap_find_vma(real_start, real_len, align); + if (start == (abi_ulong)-1) { + errno = ENOMEM; + return -1; } - passthrough_start = real_start; - passthrough_last = real_last; + start += offset - host_offset; + flags |= MAP_FIXED; } } - the_end1: - if (flags & MAP_ANONYMOUS) { - page_flags |= PAGE_ANON; - } - page_flags |= PAGE_RESET; - if (passthrough_start > passthrough_last) { - page_set_flags(start, last, page_flags); + + host_prot = target_to_host_prot(target_prot); + + if (host_page_size == TARGET_PAGE_SIZE) { + return mmap_h_eq_g(start, len, host_prot, flags, + page_flags, fd, offset); + } else if (host_page_size < TARGET_PAGE_SIZE) { + return mmap_h_lt_g(start, len, host_prot, flags, + page_flags, fd, offset, host_page_size); } else { - if (start < passthrough_start) { - page_set_flags(start, passthrough_start - 1, page_flags); + return mmap_h_gt_g(start, len, target_prot, host_prot, flags, + page_flags, fd, offset, host_page_size); + } +} + +/* NOTE: all the constants are the HOST ones */ +abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, + int flags, int fd, off_t offset) +{ + abi_long ret; + int page_flags; + + trace_target_mmap(start, len, target_prot, flags, fd, offset); + + if (!len) { + errno = EINVAL; + return -1; + } + + page_flags = validate_prot_to_pageflags(target_prot); + if (!page_flags) { + errno = EINVAL; + return -1; + } + + /* Also check for overflows... */ + len = TARGET_PAGE_ALIGN(len); + if (!len || len != (size_t)len) { + errno = ENOMEM; + return -1; + } + + if (offset & ~TARGET_PAGE_MASK) { + errno = EINVAL; + return -1; + } + if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) { + if (start & ~TARGET_PAGE_MASK) { + errno = EINVAL; + return -1; } - page_set_flags(passthrough_start, passthrough_last, - page_flags | PAGE_PASSTHROUGH); - if (passthrough_last < last) { - page_set_flags(passthrough_last + 1, last, page_flags); + if (!guest_range_valid_untagged(start, len)) { + errno = ENOMEM; + return -1; } } - shm_region_rm_complete(start, last); - the_end: - trace_target_mmap_complete(start); - if (qemu_loglevel_mask(CPU_LOG_PAGE)) { - FILE *f = qemu_log_trylock(); - if (f) { - fprintf(f, "page layout changed following mmap\n"); - page_dump(f); - qemu_log_unlock(f); + + mmap_lock(); + + ret = target_mmap__locked(start, len, target_prot, flags, + page_flags, fd, offset); + + mmap_unlock(); + + /* + * If we're mapping shared memory, ensure we generate code for parallel + * execution and flush old translations. This will work up to the level + * supported by the host -- anything that requires EXCP_ATOMIC will not + * be atomic with respect to an external process. + */ + if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) { + CPUState *cpu = thread_cpu; + if (!(cpu->tcg_cflags & CF_PARALLEL)) { + cpu->tcg_cflags |= CF_PARALLEL; + tb_flush(cpu); } } - mmap_unlock(); - return start; -fail: - mmap_unlock(); - return -1; + + return ret; } static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) { + int host_page_size = qemu_real_host_page_size(); abi_ulong real_start; abi_ulong real_last; abi_ulong real_len; @@ -793,8 +980,8 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) int prot; last = start + len - 1; - real_start = start & qemu_host_page_mask; - real_last = HOST_PAGE_ALIGN(last) - 1; + real_start = start & -host_page_size; + real_last = ROUND_UP(last, host_page_size) - 1; /* * If guest pages remain on the first or last host pages, @@ -802,7 +989,7 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) * The single page special case is required for the last page, * lest real_start overflow to zero. */ - if (real_last - real_start < qemu_host_page_size) { + if (real_last - real_start < host_page_size) { prot = 0; for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { prot |= page_get_flags(a); @@ -818,14 +1005,14 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) prot |= page_get_flags(a); } if (prot != 0) { - real_start += qemu_host_page_size; + real_start += host_page_size; } for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { prot |= page_get_flags(a + 1); } if (prot != 0) { - real_last -= qemu_host_page_size; + real_last -= host_page_size; } if (real_last < real_start) { @@ -836,13 +1023,7 @@ static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) real_len = real_last - real_start + 1; host_start = g2h_untagged(real_start); - if (reserved_va) { - void *ptr = mmap(host_start, real_len, PROT_NONE, - MAP_FIXED | MAP_ANONYMOUS - | MAP_PRIVATE | MAP_NORESERVE, -1, 0); - return ptr == host_start ? 0 : -1; - } - return munmap(host_start, real_len); + return do_munmap(host_start, real_len); } int target_munmap(abi_ulong start, abi_ulong len) @@ -1055,69 +1236,161 @@ static inline abi_ulong target_shmlba(CPUArchState *cpu_env) } #endif +#if defined(__arm__) || defined(__mips__) || defined(__sparc__) +#define HOST_FORCE_SHMLBA 1 +#else +#define HOST_FORCE_SHMLBA 0 +#endif + abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, abi_ulong shmaddr, int shmflg) { CPUState *cpu = env_cpu(cpu_env); - abi_ulong raddr; struct shmid_ds shm_info; int ret; - abi_ulong shmlba; + int h_pagesize; + int t_shmlba, h_shmlba, m_shmlba; + size_t t_len, h_len, m_len; /* shmat pointers are always untagged */ - /* find out the length of the shared memory segment */ + /* + * Because we can't use host shmat() unless the address is sufficiently + * aligned for the host, we'll need to check both. + * TODO: Could be fixed with softmmu. + */ + t_shmlba = target_shmlba(cpu_env); + h_pagesize = qemu_real_host_page_size(); + h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize); + m_shmlba = MAX(t_shmlba, h_shmlba); + + if (shmaddr) { + if (shmaddr & (m_shmlba - 1)) { + if (shmflg & SHM_RND) { + /* + * The guest is allowing the kernel to round the address. + * Assume that the guest is ok with us rounding to the + * host required alignment too. Anyway if we don't, we'll + * get an error from the kernel. + */ + shmaddr &= ~(m_shmlba - 1); + if (shmaddr == 0 && (shmflg & SHM_REMAP)) { + return -TARGET_EINVAL; + } + } else { + int require = TARGET_PAGE_SIZE; +#ifdef TARGET_FORCE_SHMLBA + require = t_shmlba; +#endif + /* + * Include host required alignment, as otherwise we cannot + * use host shmat at all. + */ + require = MAX(require, h_shmlba); + if (shmaddr & (require - 1)) { + return -TARGET_EINVAL; + } + } + } + } else { + if (shmflg & SHM_REMAP) { + return -TARGET_EINVAL; + } + } + /* All rounding now manually concluded. */ + shmflg &= ~SHM_RND; + + /* Find out the length of the shared memory segment. */ ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); if (is_error(ret)) { /* can't get length, bail out */ return ret; } + t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz); + h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize); + m_len = MAX(t_len, h_len); - shmlba = target_shmlba(cpu_env); - - if (shmaddr & (shmlba - 1)) { - if (shmflg & SHM_RND) { - shmaddr &= ~(shmlba - 1); - } else { - return -TARGET_EINVAL; - } - } - if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { + if (!guest_range_valid_untagged(shmaddr, m_len)) { return -TARGET_EINVAL; } WITH_MMAP_LOCK_GUARD() { - void *host_raddr; + bool mapped = false; + void *want, *test; abi_ulong last; - if (shmaddr) { - host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); + if (!shmaddr) { + shmaddr = mmap_find_vma(0, m_len, m_shmlba); + if (shmaddr == -1) { + return -TARGET_ENOMEM; + } + mapped = !reserved_va; + } else if (shmflg & SHM_REMAP) { + /* + * If host page size > target page size, the host shmat may map + * more memory than the guest expects. Reject a mapping that + * would replace memory in the unexpected gap. + * TODO: Could be fixed with softmmu. + */ + if (t_len < h_len && + !page_check_range_empty(shmaddr + t_len, + shmaddr + h_len - 1)) { + return -TARGET_EINVAL; + } } else { - abi_ulong mmap_start; + if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) { + return -TARGET_EINVAL; + } + } - /* In order to use the host shmat, we need to honor host SHMLBA. */ - mmap_start = mmap_find_vma(0, shm_info.shm_segsz, - MAX(SHMLBA, shmlba)); + /* All placement is now complete. */ + want = (void *)g2h_untagged(shmaddr); - if (mmap_start == -1) { - return -TARGET_ENOMEM; + /* + * Map anonymous pages across the entire range, then remap with + * the shared memory. This is required for a number of corner + * cases for which host and guest page sizes differ. + */ + if (h_len != t_len) { + int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE); + int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS + | (reserved_va || (shmflg & SHM_REMAP) + ? MAP_FIXED : MAP_FIXED_NOREPLACE); + + test = mmap(want, m_len, mmap_p, mmap_f, -1, 0); + if (unlikely(test != want)) { + /* shmat returns EINVAL not EEXIST like mmap. */ + ret = (test == MAP_FAILED && errno != EEXIST + ? get_errno(-1) : -TARGET_EINVAL); + if (mapped) { + do_munmap(want, m_len); + } + return ret; } - host_raddr = shmat(shmid, g2h_untagged(mmap_start), - shmflg | SHM_REMAP); + mapped = true; } - if (host_raddr == (void *)-1) { - return get_errno(-1); + if (reserved_va || mapped) { + shmflg |= SHM_REMAP; + } + test = shmat(shmid, want, shmflg); + if (test == MAP_FAILED) { + ret = get_errno(-1); + if (mapped) { + do_munmap(want, m_len); + } + return ret; } - raddr = h2g(host_raddr); - last = raddr + shm_info.shm_segsz - 1; + assert(test == want); - page_set_flags(raddr, last, + last = shmaddr + m_len - 1; + page_set_flags(shmaddr, last, PAGE_VALID | PAGE_RESET | PAGE_READ | - (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE)); + (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) | + (shmflg & SHM_EXEC ? PAGE_EXEC : 0)); - shm_region_rm_complete(raddr, last); - shm_region_add(raddr, last); + shm_region_rm_complete(shmaddr, last); + shm_region_add(shmaddr, last); } /* @@ -1131,7 +1404,15 @@ abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, tb_flush(cpu); } - return raddr; + if (qemu_loglevel_mask(CPU_LOG_PAGE)) { + FILE *f = qemu_log_trylock(); + if (f) { + fprintf(f, "page layout changed following shmat\n"); + page_dump(f); + qemu_log_unlock(f); + } + } + return shmaddr; } abi_long target_shmdt(abi_ulong shmaddr) diff --git a/linux-user/strace.c b/linux-user/strace.c index cf26e55264..8d13e55a5b 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -670,6 +670,26 @@ print_semctl(CPUArchState *cpu_env, const struct syscallname *name, } #endif +static void +print_shmat(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + static const struct flags shmat_flags[] = { + FLAG_GENERIC(SHM_RND), + FLAG_GENERIC(SHM_REMAP), + FLAG_GENERIC(SHM_RDONLY), + FLAG_GENERIC(SHM_EXEC), + FLAG_END + }; + + print_syscall_prologue(name); + print_raw_param(TARGET_ABI_FMT_ld, arg0, 0); + print_pointer(arg1, 0); + print_flags(shmat_flags, arg2, 1); + print_syscall_epilogue(name); +} + #ifdef TARGET_NR_ipc static void print_ipc(CPUArchState *cpu_env, const struct syscallname *name, @@ -683,6 +703,10 @@ print_ipc(CPUArchState *cpu_env, const struct syscallname *name, print_ipc_cmd(arg3); qemu_log(",0x" TARGET_ABI_FMT_lx ")", arg4); break; + case IPCOP_shmat: + print_shmat(cpu_env, &(const struct syscallname){ .name = "shmat" }, + arg1, arg4, arg2, 0, 0, 0); + break; default: qemu_log(("%s(" TARGET_ABI_FMT_ld "," diff --git a/linux-user/strace.list b/linux-user/strace.list index 6655d4f26d..dfd4237d14 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -1398,7 +1398,7 @@ { TARGET_NR_sgetmask, "sgetmask" , NULL, NULL, NULL }, #endif #ifdef TARGET_NR_shmat -{ TARGET_NR_shmat, "shmat" , NULL, NULL, print_syscall_ret_addr }, +{ TARGET_NR_shmat, "shmat" , NULL, print_shmat, print_syscall_ret_addr }, #endif #ifdef TARGET_NR_shmctl { TARGET_NR_shmctl, "shmctl" , NULL, NULL, NULL }, diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e384e14248..bc8c06522f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7994,6 +7994,10 @@ static void open_self_maps_4(const struct open_self_maps_data *d, path = "[heap]"; } else if (start == info->vdso) { path = "[vdso]"; +#ifdef TARGET_X86_64 + } else if (start == TARGET_VSYSCALL_PAGE) { + path = "[vsyscall]"; +#endif } /* Except null device (MAP_ANON), adjust offset for this fragment. */ @@ -8082,6 +8086,18 @@ static int open_self_maps_2(void *opaque, target_ulong guest_start, uintptr_t host_start = (uintptr_t)g2h_untagged(guest_start); uintptr_t host_last = (uintptr_t)g2h_untagged(guest_end - 1); +#ifdef TARGET_X86_64 + /* + * Because of the extremely high position of the page within the guest + * virtual address space, this is not backed by host memory at all. + * Therefore the loop below would fail. This is the only instance + * of not having host backing memory. + */ + if (guest_start == TARGET_VSYSCALL_PAGE) { + return open_self_maps_3(opaque, guest_start, guest_end, flags); + } +#endif + while (1) { IntervalTreeNode *n = interval_tree_iter_first(d->host_maps, host_start, host_start); diff --git a/migration/ram.c b/migration/ram.c index 45a00b45ed..83fd780fc2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2934,7 +2934,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) { RAMState **rsp = opaque; RAMBlock *block; - int ret; + int ret, max_hg_page_size; if (compress_threads_save_setup()) { return -1; @@ -2949,6 +2949,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f; + /* + * ??? Mirrors the previous value of qemu_host_page_size, + * but is this really what was intended for the migration? + */ + max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE); + WITH_RCU_READ_LOCK_GUARD() { qemu_put_be64(f, ram_bytes_total_with_ignored() | RAM_SAVE_FLAG_MEM_SIZE); @@ -2957,8 +2963,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->used_length); - if (migrate_postcopy_ram() && block->page_size != - qemu_host_page_size) { + if (migrate_postcopy_ram() && + block->page_size != max_hg_page_size) { qemu_put_be64(f, block->page_size); } if (migrate_ignore_shared()) { @@ -3791,6 +3797,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) int ret = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = migration_incoming_postcopy_advised(); + int max_hg_page_size; assert(block); @@ -3808,9 +3815,16 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) return ret; } } + + /* + * ??? Mirrors the previous value of qemu_host_page_size, + * but is this really what was intended for the migration? + */ + max_hg_page_size = MAX(qemu_real_host_page_size(), TARGET_PAGE_SIZE); + /* For postcopy we need to check hugepage sizes match */ if (postcopy_advised && migrate_postcopy_ram() && - block->page_size != qemu_host_page_size) { + block->page_size != max_hg_page_size) { uint64_t remote_page_size = qemu_get_be64(f); if (remote_page_size != block->page_size) { error_report("Mismatched RAM page size %s " diff --git a/system/physmem.c b/system/physmem.c index e3ebc19eef..3adda08ebf 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1680,7 +1680,8 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) assert(block); - newsize = HOST_PAGE_ALIGN(newsize); + newsize = TARGET_PAGE_ALIGN(newsize); + newsize = REAL_HOST_PAGE_ALIGN(newsize); if (block->used_length == newsize) { /* @@ -1916,7 +1917,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, return NULL; } - size = HOST_PAGE_ALIGN(size); + size = TARGET_PAGE_ALIGN(size); + size = REAL_HOST_PAGE_ALIGN(size); + file_size = get_file_size(fd); if (file_size > offset && file_size < (offset + size)) { error_setg(errp, "backing store size 0x%" PRIx64 @@ -2014,13 +2017,17 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, { RAMBlock *new_block; Error *local_err = NULL; + int align; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | RAM_NORESERVE)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); - size = HOST_PAGE_ALIGN(size); - max_size = HOST_PAGE_ALIGN(max_size); + align = qemu_real_host_page_size(); + align = MAX(align, TARGET_PAGE_SIZE); + size = ROUND_UP(size, align); + max_size = ROUND_UP(max_size, align); + new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->resized = resized; @@ -3511,7 +3518,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) * fallocate works on hugepages and shmem * shared anonymous memory requires madvise REMOVE */ - need_madvise = (rb->page_size == qemu_host_page_size); + need_madvise = (rb->page_size == qemu_real_host_page_size()); need_fallocate = rb->fd != -1; if (need_fallocate) { /* For a file, this causes the area of the file to be zero'd diff --git a/system/vl.c b/system/vl.c index e480afd7a0..48aae6e053 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2118,7 +2118,6 @@ static void qemu_create_machine(QDict *qdict) } cpu_exec_init_all(); - page_size_init(); if (machine_class->hw_version) { qemu_set_hw_version(machine_class->hw_version); diff --git a/target/alpha/cpu-param.h b/target/alpha/cpu-param.h index 68c46f7998..c969cb016b 100644 --- a/target/alpha/cpu-param.h +++ b/target/alpha/cpu-param.h @@ -9,10 +9,22 @@ #define ALPHA_CPU_PARAM_H #define TARGET_LONG_BITS 64 -#define TARGET_PAGE_BITS 13 /* ??? EV4 has 34 phys addr bits, EV5 has 40, EV6 has 44. */ #define TARGET_PHYS_ADDR_SPACE_BITS 44 -#define TARGET_VIRT_ADDR_SPACE_BITS (30 + TARGET_PAGE_BITS) + +#ifdef CONFIG_USER_ONLY +/* + * Allow user-only to vary page size. Real hardware allows only 8k and 64k, + * but since any variance means guests cannot assume a fixed value, allow + * a 4k minimum to match x86 host, which can minimize emulation issues. + */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +# define TARGET_VIRT_ADDR_SPACE_BITS 63 +#else +# define TARGET_PAGE_BITS 13 +# define TARGET_VIRT_ADDR_SPACE_BITS (30 + TARGET_PAGE_BITS) +#endif #endif diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index f9b462a98f..da3243ab21 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -19,9 +19,13 @@ #endif #ifdef CONFIG_USER_ONLY -#define TARGET_PAGE_BITS 12 # ifdef TARGET_AARCH64 # define TARGET_TAGGED_ADDRESSES +/* Allow user-only to vary page size from 4k */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +# else +# define TARGET_PAGE_BITS 12 # endif #else /* diff --git a/target/arm/cpu.c b/target/arm/cpu.c index b2ea5d6513..f3ed79cef2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1809,7 +1809,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) ARMCPU *cpu = ARM_CPU(dev); ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); CPUARMState *env = &cpu->env; - int pagebits; Error *local_err = NULL; #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) @@ -2100,28 +2099,36 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) !cpu_isar_feature(aa32_vfp_simd, cpu) || !arm_feature(env, ARM_FEATURE_XSCALE)); - if (arm_feature(env, ARM_FEATURE_V7) && - !arm_feature(env, ARM_FEATURE_M) && - !arm_feature(env, ARM_FEATURE_PMSA)) { - /* v7VMSA drops support for the old ARMv5 tiny pages, so we - * can use 4K pages. - */ - pagebits = 12; - } else { - /* For CPUs which might have tiny 1K pages, or which have an - * MPU and might have small region sizes, stick with 1K pages. - */ - pagebits = 10; - } - if (!set_preferred_target_page_bits(pagebits)) { - /* This can only ever happen for hotplugging a CPU, or if - * the board code incorrectly creates a CPU which it has - * promised via minimum_page_size that it will not. - */ - error_setg(errp, "This CPU requires a smaller page size than the " - "system is using"); - return; +#ifndef CONFIG_USER_ONLY + { + int pagebits; + if (arm_feature(env, ARM_FEATURE_V7) && + !arm_feature(env, ARM_FEATURE_M) && + !arm_feature(env, ARM_FEATURE_PMSA)) { + /* + * v7VMSA drops support for the old ARMv5 tiny pages, + * so we can use 4K pages. + */ + pagebits = 12; + } else { + /* + * For CPUs which might have tiny 1K pages, or which have an + * MPU and might have small region sizes, stick with 1K pages. + */ + pagebits = 10; + } + if (!set_preferred_target_page_bits(pagebits)) { + /* + * This can only ever happen for hotplugging a CPU, or if + * the board code incorrectly creates a CPU which it has + * promised via minimum_page_size that it will not. + */ + error_setg(errp, "This CPU requires a smaller page size " + "than the system is using"); + return; + } } +#endif /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it. * We don't support setting cluster ID ([16..23]) (known as Aff2 diff --git a/target/ppc/cpu-param.h b/target/ppc/cpu-param.h index 0a0416e0a8..b7ad52de03 100644 --- a/target/ppc/cpu-param.h +++ b/target/ppc/cpu-param.h @@ -31,6 +31,13 @@ # define TARGET_PHYS_ADDR_SPACE_BITS 36 # define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif -#define TARGET_PAGE_BITS 12 + +#ifdef CONFIG_USER_ONLY +/* Allow user-only to vary page size from 4k */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +#else +# define TARGET_PAGE_BITS 12 +#endif #endif diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index ef5ebe91bd..85d5746e47 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -55,7 +55,11 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL -#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#ifdef CONFIG_DARWIN +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#else +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#endif #define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL #define have_lse (cpuinfo & CPUINFO_LSE) diff --git a/tcg/optimize.c b/tcg/optimize.c index 79e701652b..752cc5c56b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -2102,7 +2102,8 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc, uext_opc, sext_opc; + TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; + TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; diff --git a/tests/tcg/alpha/Makefile.target b/tests/tcg/alpha/Makefile.target index b94500a7d9..fdd7ddf64e 100644 --- a/tests/tcg/alpha/Makefile.target +++ b/tests/tcg/alpha/Makefile.target @@ -13,6 +13,3 @@ test-cmov: test-cond.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) run-test-cmov: test-cmov - -# On Alpha Linux only supports 8k pages -EXTRA_RUNS+=run-test-mmap-8192 diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target index 3473f4619e..0a1965fce7 100644 --- a/tests/tcg/arm/Makefile.target +++ b/tests/tcg/arm/Makefile.target @@ -79,6 +79,3 @@ sha512-vector: sha512.c ARM_TESTS += sha512-vector TESTS += $(ARM_TESTS) - -# On ARM Linux only supports 4k pages -EXTRA_RUNS+=run-test-mmap-4096 diff --git a/tests/tcg/hppa/Makefile.target b/tests/tcg/hppa/Makefile.target index cdd0d572a7..ea5ae2186d 100644 --- a/tests/tcg/hppa/Makefile.target +++ b/tests/tcg/hppa/Makefile.target @@ -2,9 +2,6 @@ # # HPPA specific tweaks - specifically masking out broken tests -# On parisc Linux supports 4K/16K/64K (but currently only 4k works) -EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-16384 run-test-mmap-65536 - # This triggers failures for hppa-linux about 1% of the time # HPPA is the odd target that can't use the sigtramp page; # it requires the full vdso with dwarf2 unwind info. diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target index 9906f9e116..bbe2c44b2a 100644 --- a/tests/tcg/i386/Makefile.target +++ b/tests/tcg/i386/Makefile.target @@ -71,9 +71,6 @@ endif I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) TESTS=$(MULTIARCH_TESTS) $(I386_TESTS) -# On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack) -EXTRA_RUNS+=run-test-mmap-4096 - sha512-sse: CFLAGS=-msse4.1 -O3 sha512-sse: sha512.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) diff --git a/tests/tcg/m68k/Makefile.target b/tests/tcg/m68k/Makefile.target index 6ff214e60a..33f7b1b127 100644 --- a/tests/tcg/m68k/Makefile.target +++ b/tests/tcg/m68k/Makefile.target @@ -5,6 +5,3 @@ VPATH += $(SRC_PATH)/tests/tcg/m68k TESTS += trap denormal - -# On m68k Linux supports 4k and 8k pages (but 8k is currently broken) -EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192 diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index e10951a801..f11f3b084d 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -51,18 +51,9 @@ run-plugin-vma-pthread-with-%: vma-pthread $(call skip-test, $<, "flaky on CI?") endif -# We define the runner for test-mmap after the individual -# architectures have defined their supported pages sizes. If no -# additional page sizes are defined we only run the default test. - -# default case (host page size) run-test-mmap: test-mmap $(call run-test, test-mmap, $(QEMU) $<, $< (default)) -# additional page sizes (defined by each architecture adding to EXTRA_RUNS) -run-test-mmap-%: test-mmap - $(call run-test, test-mmap-$*, $(QEMU) -p $* $<, $< ($* byte pages)) - ifneq ($(GDB),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py diff --git a/tests/tcg/multiarch/linux/linux-madvise.c b/tests/tcg/multiarch/linux/linux-madvise.c index 29d0997e68..539fb3b772 100644 --- a/tests/tcg/multiarch/linux/linux-madvise.c +++ b/tests/tcg/multiarch/linux/linux-madvise.c @@ -42,6 +42,8 @@ static void test_file(void) assert(ret == 0); written = write(fd, &c, sizeof(c)); assert(written == sizeof(c)); + ret = ftruncate(fd, pagesize); + assert(ret == 0); page = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE, fd, 0); assert(page != MAP_FAILED); diff --git a/tests/tcg/multiarch/linux/linux-shmat-maps.c b/tests/tcg/multiarch/linux/linux-shmat-maps.c new file mode 100644 index 0000000000..0ccf7a973a --- /dev/null +++ b/tests/tcg/multiarch/linux/linux-shmat-maps.c @@ -0,0 +1,55 @@ +/* + * Test that shmat() does not break /proc/self/maps. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include <assert.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <unistd.h> + +int main(void) +{ + char buf[128]; + int err, fd; + int shmid; + ssize_t n; + void *p; + + shmid = shmget(IPC_PRIVATE, 1, IPC_CREAT | 0600); + assert(shmid != -1); + + /* + * The original bug required a non-NULL address, which skipped the + * mmap_find_vma step, which could result in a host mapping smaller + * than the target mapping. Choose an address at random. + */ + p = shmat(shmid, (void *)0x800000, SHM_RND); + if (p == (void *)-1) { + /* + * Because we are now running the testcase for all guests for which + * we have a cross-compiler, the above random address might conflict + * with the guest executable in some way. Rather than stopping, + * continue with a system supplied address, which should never fail. + */ + p = shmat(shmid, NULL, 0); + assert(p != (void *)-1); + } + + fd = open("/proc/self/maps", O_RDONLY); + assert(fd != -1); + do { + n = read(fd, buf, sizeof(buf)); + assert(n >= 0); + } while (n != 0); + close(fd); + + err = shmdt(p); + assert(err == 0); + err = shmctl(shmid, IPC_RMID, NULL); + assert(err == 0); + + return EXIT_SUCCESS; +} diff --git a/tests/tcg/ppc/Makefile.target b/tests/tcg/ppc/Makefile.target deleted file mode 100644 index f5e08c7376..0000000000 --- a/tests/tcg/ppc/Makefile.target +++ /dev/null @@ -1,12 +0,0 @@ -# -*- Mode: makefile -*- -# -# PPC - included from tests/tcg/Makefile -# - -ifneq (,$(findstring 64,$(TARGET_NAME))) -# On PPC64 Linux can be configured with 4k (default) or 64k pages (currently broken) -EXTRA_RUNS+=run-test-mmap-4096 #run-test-mmap-65536 -else -# On PPC32 Linux supports 4K/16K/64K/256K (but currently only 4k works) -EXTRA_RUNS+=run-test-mmap-4096 #run-test-mmap-16384 run-test-mmap-65536 run-test-mmap-262144 -endif diff --git a/tests/tcg/sh4/Makefile.target b/tests/tcg/sh4/Makefile.target index 47c39a44b6..16eaa850a8 100644 --- a/tests/tcg/sh4/Makefile.target +++ b/tests/tcg/sh4/Makefile.target @@ -3,9 +3,6 @@ # SuperH specific tweaks # -# On sh Linux supports 4k, 8k, 16k and 64k pages (but only 4k currently works) -EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192 run-test-mmap-16384 run-test-mmap-65536 - # This triggers failures for sh4-linux about 10% of the time. # Random SIGSEGV at unpredictable guest address, cause unknown. run-signals: signals diff --git a/tests/tcg/sparc64/Makefile.target b/tests/tcg/sparc64/Makefile.target deleted file mode 100644 index 408dace783..0000000000 --- a/tests/tcg/sparc64/Makefile.target +++ /dev/null @@ -1,6 +0,0 @@ -# -*- Mode: makefile -*- -# -# sparc specific tweaks - -# On Sparc64 Linux support 8k pages -EXTRA_RUNS+=run-test-mmap-8192 |