diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2022-10-26 10:53:48 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2022-10-26 10:53:49 -0400 |
commit | 344744e148e6e865f5a57e745b02a87e5ea534ad (patch) | |
tree | 2c9a136a0a13c907cbeb17e1adb05989bb8888e4 | |
parent | 08a5d04606292b3cf6f5756bf2a095654a290626 (diff) | |
parent | e38c24cb580735883769558801d9e2f2ba9f04c1 (diff) |
Merge tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu into staging
dump queue
Hi
The "dump" queue, with:
- [PATCH v3/v4 0/9] dump: Cleanup and consolidation
- [PATCH v4 0/4] dump: add 32-bit guest Windows support
# -----BEGIN PGP SIGNATURE-----
#
# iQJQBAABCAA6FiEEh6m9kz+HxgbSdvYt2ujhCXWWnOUFAmNY9gMcHG1hcmNhbmRy
# ZS5sdXJlYXVAcmVkaGF0LmNvbQAKCRDa6OEJdZac5ZUtD/kByfamsq/8hnS6N/ok
# xs9kXO+HZA1A1Kng19RjYWbTka1LpEAf6y6tPtV27l5rWJZxCgqFp3Q2VKQyzAxl
# Bcf4gvEhUDJI87jHrZ8WBJ0JvPL8pKNjPn4JUPOQO+6kX8A/3XTwAyvH/T3uxlTo
# I+4HLwY0EkJ6NU6Cokud5Uo36Zj7JghKrBxTDrd3NC0qSy8xOoIsB5Pbp2PVKuX2
# F5Zfll3F+NUDsj9zmMR6agP4PBUJUB680TtvMpMZXb2BXumKDLngthCLRtGrgsDh
# ChjYr6xkRS9qlXn0PWIYsUyDucDuRFfqTz/Pa9OcGhQuQfIfQiGOM2IFQUE3UcuN
# OphJEFi44za3E7xEZziAGIFmro+k8zX2fjgN3+mApxpBjUAF/uzoW1VzIIdx65Gh
# H/IguECFu7AwMxPucRUI7PkwexgIcqpufeTRqep2nCFsAwS6bS+obzrAzIMd9kj1
# ApLhj36lkub0Tn77B8bkf1TYJnpBcYbGZpmPCILtOxpBZGlXm++KD1DKAYt6rbnR
# 8rQugZNRzEB92aSRTkLJ6QKsqudnbR9ssGbOdEJP+v1fgVtFzYbgygx5QMezGkRw
# vRLWrNbDLog+uYpI2Kb30ItU7+bsDrads9n/gqiGvTP887T3alCtRdIq+Fb28oor
# tSBhBMqMOtccMy3k+EoXBXX5gw==
# =BUEY
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 26 Oct 2022 04:55:31 EDT
# gpg: using RSA key 87A9BD933F87C606D276F62DDAE8E10975969CE5
# gpg: issuer "marcandre.lureau@redhat.com"
# gpg: Good signature from "Marc-André Lureau <marcandre.lureau@redhat.com>" [full]
# gpg: aka "Marc-André Lureau <marcandre.lureau@gmail.com>" [full]
# Primary key fingerprint: 87A9 BD93 3F87 C606 D276 F62D DAE8 E109 7596 9CE5
* tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu:
dump/win_dump: limit number of processed PRCBs
s390x: pv: Add dump support
s390x: Add KVM PV dump interface
include/elf.h: add s390x note types
s390x: Introduce PV query interface
s390x: Add protected dump cap
dump: Add architecture section and section string table support
dump: Reintroduce memory_offset and section_offset
dump: Reorder struct DumpState
dump: Write ELF section headers right after ELF header
dump: Use a buffer for ELF section data and headers
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | dump/dump.c | 288 | ||||
-rw-r--r-- | dump/win_dump.c | 7 | ||||
-rw-r--r-- | hw/s390x/pv.c | 112 | ||||
-rw-r--r-- | hw/s390x/s390-virtio-ccw.c | 6 | ||||
-rw-r--r-- | include/elf.h | 2 | ||||
-rw-r--r-- | include/hw/s390x/pv.h | 19 | ||||
-rw-r--r-- | include/sysemu/dump-arch.h | 3 | ||||
-rw-r--r-- | include/sysemu/dump.h | 26 | ||||
-rw-r--r-- | target/s390x/arch_dump.c | 262 | ||||
-rw-r--r-- | target/s390x/kvm/kvm.c | 7 | ||||
-rw-r--r-- | target/s390x/kvm/kvm_s390x.h | 1 | ||||
-rw-r--r-- | target/s390x/kvm/meson.build | 2 | ||||
-rw-r--r-- | target/s390x/kvm/stubs.c | 12 |
13 files changed, 649 insertions, 98 deletions
diff --git a/dump/dump.c b/dump/dump.c index 236559b03a..df117c847f 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -103,6 +103,7 @@ static int dump_cleanup(DumpState *s) memory_mapping_list_free(&s->list); close(s->fd); g_free(s->guest_note); + g_array_unref(s->string_table_buf); s->guest_note = NULL; if (s->resume) { if (s->detached) { @@ -152,11 +153,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); elf_header->e_phnum = cpu_to_dump16(s, phnum); - if (s->shdr_num) { - elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); - elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); - elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); - } + elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); + elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); + elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); } static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) @@ -180,11 +180,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); elf_header->e_phnum = cpu_to_dump16(s, phnum); - if (s->shdr_num) { - elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); - elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); - elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); - } + elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); + elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); + elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); } static void write_elf_header(DumpState *s, Error **errp) @@ -195,6 +194,8 @@ static void write_elf_header(DumpState *s, Error **errp) void *header_ptr; int ret; + /* The NULL header and the shstrtab are always defined */ + assert(s->shdr_num >= 2); if (dump_is_64bit(s)) { prepare_elf64_header(s, &elf64_header); header_size = sizeof(elf64_header); @@ -380,30 +381,136 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) } } -static void write_elf_section(DumpState *s, int type, Error **errp) +static void prepare_elf_section_hdr_zero(DumpState *s) { - Elf32_Shdr shdr32; - Elf64_Shdr shdr64; + if (dump_is_64bit(s)) { + Elf64_Shdr *shdr64 = s->elf_section_hdrs; + + shdr64->sh_info = cpu_to_dump32(s, s->phdr_num); + } else { + Elf32_Shdr *shdr32 = s->elf_section_hdrs; + + shdr32->sh_info = cpu_to_dump32(s, s->phdr_num); + } +} + +static void prepare_elf_section_hdr_string(DumpState *s, void *buff) +{ + uint64_t index = s->string_table_buf->len; + const char strtab[] = ".shstrtab"; + Elf32_Shdr shdr32 = {}; + Elf64_Shdr shdr64 = {}; int shdr_size; void *shdr; - int ret; - if (type == 0) { - shdr_size = sizeof(Elf32_Shdr); - memset(&shdr32, 0, shdr_size); - shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); - shdr = &shdr32; - } else { + g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab)); + if (dump_is_64bit(s)) { shdr_size = sizeof(Elf64_Shdr); - memset(&shdr64, 0, shdr_size); - shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr64.sh_type = SHT_STRTAB; + shdr64.sh_offset = s->section_offset + s->elf_section_data_size; + shdr64.sh_name = index; + shdr64.sh_size = s->string_table_buf->len; shdr = &shdr64; + } else { + shdr_size = sizeof(Elf32_Shdr); + shdr32.sh_type = SHT_STRTAB; + shdr32.sh_offset = s->section_offset + s->elf_section_data_size; + shdr32.sh_name = index; + shdr32.sh_size = s->string_table_buf->len; + shdr = &shdr32; + } + memcpy(buff, shdr, shdr_size); +} + +static bool prepare_elf_section_hdrs(DumpState *s, Error **errp) +{ + size_t len, sizeof_shdr; + void *buff_hdr; + + /* + * Section ordering: + * - HDR zero + * - Arch section hdrs + * - String table hdr + */ + sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + len = sizeof_shdr * s->shdr_num; + s->elf_section_hdrs = g_malloc0(len); + buff_hdr = s->elf_section_hdrs; + + /* + * The first section header is ALWAYS a special initial section + * header. + * + * The header should be 0 with one exception being that if + * phdr_num is PN_XNUM then the sh_info field contains the real + * number of segment entries. + * + * As we zero allocate the buffer we will only need to modify + * sh_info for the PN_XNUM case. + */ + if (s->phdr_num >= PN_XNUM) { + prepare_elf_section_hdr_zero(s); } + buff_hdr += sizeof_shdr; + + /* Add architecture defined section headers */ + if (s->dump_info.arch_sections_write_hdr_fn + && s->shdr_num > 2) { + buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr); + + if (s->shdr_num >= SHN_LORESERVE) { + error_setg_errno(errp, EINVAL, + "dump: too many architecture defined sections"); + return false; + } + } + + /* + * String table is the last section since strings are added via + * arch_sections_write_hdr(). + */ + prepare_elf_section_hdr_string(s, buff_hdr); + return true; +} - ret = fd_write_vmcore(shdr, shdr_size, s); +static void write_elf_section_headers(DumpState *s, Error **errp) +{ + size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + int ret; + + if (!prepare_elf_section_hdrs(s, errp)) { + return; + } + + ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); if (ret < 0) { - error_setg_errno(errp, -ret, - "dump: failed to write section header table"); + error_setg_errno(errp, -ret, "dump: failed to write section headers"); + } + + g_free(s->elf_section_hdrs); +} + +static void write_elf_sections(DumpState *s, Error **errp) +{ + int ret; + + if (s->elf_section_data_size) { + /* Write architecture section data */ + ret = fd_write_vmcore(s->elf_section_data, + s->elf_section_data_size, s); + if (ret < 0) { + error_setg_errno(errp, -ret, + "dump: failed to write architecture section data"); + return; + } + } + + /* Write string table */ + ret = fd_write_vmcore(s->string_table_buf->data, + s->string_table_buf->len, s); + if (ret < 0) { + error_setg_errno(errp, -ret, "dump: failed to write string table data"); } } @@ -554,6 +661,8 @@ static void dump_begin(DumpState *s, Error **errp) * -------------- * | elf header | * -------------- + * | sctn_hdr | + * -------------- * | PT_NOTE | * -------------- * | PT_LOAD | @@ -562,8 +671,6 @@ static void dump_begin(DumpState *s, Error **errp) * -------------- * | PT_LOAD | * -------------- - * | sec_hdr | - * -------------- * | elf note | * -------------- * | memory | @@ -579,6 +686,12 @@ static void dump_begin(DumpState *s, Error **errp) return; } + /* write section headers to vmcore */ + write_elf_section_headers(s, errp); + if (*errp) { + return; + } + /* write PT_NOTE to vmcore */ write_elf_phdr_note(s, errp); if (*errp) { @@ -591,21 +704,13 @@ static void dump_begin(DumpState *s, Error **errp) return; } - /* write section to vmcore */ - if (s->shdr_num) { - write_elf_section(s, 1, errp); - if (*errp) { - return; - } - } - /* write notes to vmcore */ write_elf_notes(s, errp); } -static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, - int64_t filter_area_start, - int64_t filter_area_length) +int64_t dump_filtered_memblock_size(GuestPhysBlock *block, + int64_t filter_area_start, + int64_t filter_area_length) { int64_t size, left, right; @@ -623,9 +728,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, return size; } -static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, - int64_t filter_area_start, - int64_t filter_area_length) +int64_t dump_filtered_memblock_start(GuestPhysBlock *block, + int64_t filter_area_start, + int64_t filter_area_length) { if (filter_area_length) { /* return -1 if the block is not within filter area */ @@ -665,6 +770,31 @@ static void dump_iterate(DumpState *s, Error **errp) } } +static void dump_end(DumpState *s, Error **errp) +{ + int rc; + ERRP_GUARD(); + + if (s->elf_section_data_size) { + s->elf_section_data = g_malloc0(s->elf_section_data_size); + } + + /* Adds the architecture defined section data to s->elf_section_data */ + if (s->dump_info.arch_sections_write_fn && + s->elf_section_data_size) { + rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data); + if (rc) { + error_setg_errno(errp, rc, + "dump: failed to get arch section data"); + g_free(s->elf_section_data); + return; + } + } + + /* write sections to vmcore */ + write_elf_sections(s, errp); +} + static void create_vmcore(DumpState *s, Error **errp) { ERRP_GUARD(); @@ -674,7 +804,14 @@ static void create_vmcore(DumpState *s, Error **errp) return; } + /* Iterate over memory and dump it to file */ dump_iterate(s, errp); + if (*errp) { + return; + } + + /* Write the section data */ + dump_end(s, errp); } static int write_start_flat_header(int fd) @@ -1684,6 +1821,14 @@ static void dump_init(DumpState *s, int fd, bool has_format, s->filter_area_begin = begin; s->filter_area_length = length; + /* First index is 0, it's the special null name */ + s->string_table_buf = g_array_new(FALSE, TRUE, 1); + /* + * Allocate the null name, due to the clearing option set to true + * it will be 0. + */ + g_array_set_size(s->string_table_buf, 1); + memory_mapping_list_init(&s->list); guest_phys_blocks_init(&s->guest_phys_blocks); @@ -1820,38 +1965,53 @@ static void dump_init(DumpState *s, int fd, bool has_format, } /* - * calculate phdr_num + * The first section header is always a special one in which most + * fields are 0. The section header string table is also always + * set. + */ + s->shdr_num = 2; + + /* + * Adds the number of architecture sections to shdr_num and sets + * elf_section_data_size so we know the offsets and sizes of all + * parts. + */ + if (s->dump_info.arch_sections_add_fn) { + s->dump_info.arch_sections_add_fn(s); + } + + /* + * calculate shdr_num so we know the offsets and sizes of all + * parts. + * Calculate phdr_num * - * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow + * The absolute maximum amount of phdrs is UINT32_MAX - 1 as + * sh_info is 32 bit. There's special handling once we go over + * UINT16_MAX - 1 but that is handled in the ehdr and section + * code. */ - s->phdr_num = 1; /* PT_NOTE */ - if (s->list.num < UINT16_MAX - 2) { - s->shdr_num = 0; + s->phdr_num = 1; /* Reserve PT_NOTE */ + if (s->list.num <= UINT32_MAX - 1) { s->phdr_num += s->list.num; } else { - /* sh_info of section 0 holds the real number of phdrs */ - s->shdr_num = 1; - - /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ - if (s->list.num <= UINT32_MAX - 1) { - s->phdr_num += s->list.num; - } else { - s->phdr_num = UINT32_MAX; - } + s->phdr_num = UINT32_MAX; } + /* + * Now that the number of section and program headers is known we + * can calculate the offsets of the headers and data. + */ if (dump_is_64bit(s)) { - s->phdr_offset = sizeof(Elf64_Ehdr); - s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; - s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; - s->memory_offset = s->note_offset + s->note_size; + s->shdr_offset = sizeof(Elf64_Ehdr); + s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; + s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; } else { - - s->phdr_offset = sizeof(Elf32_Ehdr); - s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; - s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; - s->memory_offset = s->note_offset + s->note_size; + s->shdr_offset = sizeof(Elf32_Ehdr); + s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; + s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; } + s->memory_offset = s->note_offset + s->note_size; + s->section_offset = s->memory_offset + s->total_size; return; diff --git a/dump/win_dump.c b/dump/win_dump.c index fd91350fbb..f20b6051b6 100644 --- a/dump/win_dump.c +++ b/dump/win_dump.c @@ -273,6 +273,13 @@ static void patch_and_save_context(WinDumpHeader *h, bool x64, uint64_t Context; WinContext ctx; + if (i >= WIN_DUMP_FIELD(NumberProcessors)) { + warn_report("win-dump: number of QEMU CPUs is bigger than" + " NumberProcessors (%u) in guest Windows", + WIN_DUMP_FIELD(NumberProcessors)); + return; + } + if (cpu_read_ptr(x64, first_cpu, KiProcessorBlock + i * win_dump_ptr_size(x64), &Prcb)) { diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c index 401b63d6cb..728ba24547 100644 --- a/hw/s390x/pv.c +++ b/hw/s390x/pv.c @@ -20,6 +20,11 @@ #include "exec/confidential-guest-support.h" #include "hw/s390x/ipl.h" #include "hw/s390x/pv.h" +#include "target/s390x/kvm/kvm_s390x.h" + +static bool info_valid; +static struct kvm_s390_pv_info_vm info_vm; +static struct kvm_s390_pv_info_dump info_dump; static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) { @@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) } \ } +int s390_pv_query_info(void) +{ + struct kvm_s390_pv_info info = { + .header.id = KVM_PV_INFO_VM, + .header.len_max = sizeof(info.header) + sizeof(info.vm), + }; + int rc; + + /* Info API's first user is dump so they are bundled */ + if (!kvm_s390_get_protected_dump()) { + return 0; + } + + rc = s390_pv_cmd(KVM_PV_INFO, &info); + if (rc) { + error_report("KVM PV INFO cmd %x failed: %s", + info.header.id, strerror(-rc)); + return rc; + } + memcpy(&info_vm, &info.vm, sizeof(info.vm)); + + info.header.id = KVM_PV_INFO_DUMP; + info.header.len_max = sizeof(info.header) + sizeof(info.dump); + rc = s390_pv_cmd(KVM_PV_INFO, &info); + if (rc) { + error_report("KVM PV INFO cmd %x failed: %s", + info.header.id, strerror(-rc)); + return rc; + } + + memcpy(&info_dump, &info.dump, sizeof(info.dump)); + info_valid = true; + + return rc; +} + int s390_pv_vm_enable(void) { return s390_pv_cmd(KVM_PV_ENABLE, NULL); @@ -114,6 +155,77 @@ void s390_pv_inject_reset_error(CPUState *cs) env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; } +uint64_t kvm_s390_pv_dmp_get_size_cpu(void) +{ + return info_dump.dump_cpu_buffer_len; +} + +uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) +{ + return info_dump.dump_config_finalize_len; +} + +uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) +{ + return info_dump.dump_config_mem_buffer_per_1m; +} + +bool kvm_s390_pv_info_basic_valid(void) +{ + return info_valid; +} + +static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr, + uint64_t len) +{ + struct kvm_s390_pv_dmp dmp = { + .subcmd = subcmd, + .buff_addr = uaddr, + .buff_len = len, + .gaddr = gaddr, + }; + int ret; + + ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp); + if (ret) { + error_report("KVM DUMP command %ld failed", subcmd); + } + return ret; +} + +int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) +{ + struct kvm_s390_pv_dmp dmp = { + .subcmd = KVM_PV_DUMP_CPU, + .buff_addr = (uint64_t)buff, + .gaddr = 0, + .buff_len = info_dump.dump_cpu_buffer_len, + }; + struct kvm_pv_cmd pv = { + .cmd = KVM_PV_DUMP, + .data = (uint64_t)&dmp, + }; + + return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv); +} + +int kvm_s390_dump_init(void) +{ + return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0); +} + +int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest) +{ + return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest, + gaddr, len); +} + +int kvm_s390_dump_completion_data(void *buff) +{ + return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0, + info_dump.dump_config_finalize_len); +} + #define TYPE_S390_PV_GUEST "s390-pv-guest" OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 03855c7231..1cc20d8717 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -366,6 +366,12 @@ static int s390_machine_protect(S390CcwMachineState *ms) ms->pv = true; + /* Will return 0 if API is not available since it's not vital */ + rc = s390_pv_query_info(); + if (rc) { + goto out_err; + } + /* Set SE header and unpack */ rc = s390_ipl_prepare_pv_header(); if (rc) { diff --git a/include/elf.h b/include/elf.h index 3d6b9062c0..8bf1e72720 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1650,6 +1650,8 @@ typedef struct elf64_shdr { #define NT_TASKSTRUCT 4 #define NT_AUXV 6 #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ +#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */ diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h index 1f1f545bfc..9360aa1091 100644 --- a/include/hw/s390x/pv.h +++ b/include/hw/s390x/pv.h @@ -38,6 +38,7 @@ static inline bool s390_is_pv(void) return ccw->pv; } +int s390_pv_query_info(void); int s390_pv_vm_enable(void); void s390_pv_vm_disable(void); int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); @@ -46,8 +47,17 @@ void s390_pv_prep_reset(void); int s390_pv_verify(void); void s390_pv_unshare(void); void s390_pv_inject_reset_error(CPUState *cs); +uint64_t kvm_s390_pv_dmp_get_size_cpu(void); +uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); +uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); +bool kvm_s390_pv_info_basic_valid(void); +int kvm_s390_dump_init(void); +int kvm_s390_dump_cpu(S390CPU *cpu, void *buff); +int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest); +int kvm_s390_dump_completion_data(void *buff); #else /* CONFIG_KVM */ static inline bool s390_is_pv(void) { return false; } +static inline int s390_pv_query_info(void) { return 0; } static inline int s390_pv_vm_enable(void) { return 0; } static inline void s390_pv_vm_disable(void) {} static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } @@ -56,6 +66,15 @@ static inline void s390_pv_prep_reset(void) {} static inline int s390_pv_verify(void) { return 0; } static inline void s390_pv_unshare(void) {} static inline void s390_pv_inject_reset_error(CPUState *cs) {}; +static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } +static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } +static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } +static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } +static inline int kvm_s390_dump_init(void) { return 0; } +static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; } +static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, + void *dest) { return 0; } +static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } #endif /* CONFIG_KVM */ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h index e25b02e990..59bbc9be38 100644 --- a/include/sysemu/dump-arch.h +++ b/include/sysemu/dump-arch.h @@ -21,6 +21,9 @@ typedef struct ArchDumpInfo { uint32_t page_size; /* The target's page size. If it's variable and * unknown, then this should be the maximum. */ uint64_t phys_base; /* The target's physmem base. */ + void (*arch_sections_add_fn)(DumpState *s); + uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); + int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); } ArchDumpInfo; struct GuestPhysBlockList; /* memory_mapping.h */ diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h index b62513d87d..4ffed0b659 100644 --- a/include/sysemu/dump.h +++ b/include/sysemu/dump.h @@ -154,15 +154,8 @@ typedef struct DumpState { GuestPhysBlockList guest_phys_blocks; ArchDumpInfo dump_info; MemoryMappingList list; - uint32_t phdr_num; - uint32_t shdr_num; bool resume; bool detached; - ssize_t note_size; - hwaddr shdr_offset; - hwaddr phdr_offset; - hwaddr section_offset; - hwaddr note_offset; hwaddr memory_offset; int fd; @@ -177,6 +170,20 @@ typedef struct DumpState { int64_t filter_area_begin; /* Start address of partial guest memory area */ int64_t filter_area_length; /* Length of partial guest memory area */ + /* Elf dump related data */ + uint32_t phdr_num; + uint32_t shdr_num; + ssize_t note_size; + hwaddr shdr_offset; + hwaddr phdr_offset; + hwaddr section_offset; + hwaddr note_offset; + + void *elf_section_hdrs; /* Pointer to section header buffer */ + void *elf_section_data; /* Pointer to section data buffer */ + uint64_t elf_section_data_size; /* Size of section data */ + GArray *string_table_buf; /* String table data buffer */ + uint8_t *note_buf; /* buffer for notes */ size_t note_buf_offset; /* the writing place in note_buf */ uint32_t nr_cpus; /* number of guest's cpu */ @@ -208,4 +215,9 @@ typedef struct DumpState { uint16_t cpu_to_dump16(DumpState *s, uint16_t val); uint32_t cpu_to_dump32(DumpState *s, uint32_t val); uint64_t cpu_to_dump64(DumpState *s, uint64_t val); + +int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start, + int64_t filter_area_length); +int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start, + int64_t filter_area_length); #endif diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c index f60a14920d..a2329141e8 100644 --- a/target/s390x/arch_dump.c +++ b/target/s390x/arch_dump.c @@ -12,11 +12,13 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "cpu.h" #include "s390x-internal.h" #include "elf.h" #include "sysemu/dump.h" - +#include "hw/s390x/pv.h" +#include "kvm/kvm_s390x.h" struct S390xUserRegsStruct { uint64_t psw[2]; @@ -76,9 +78,16 @@ typedef struct noteStruct { uint64_t todcmp; uint32_t todpreg; uint64_t ctrs[16]; + uint8_t dynamic[1]; /* + * Would be a flexible array member, if + * that was legal inside a union. Real + * size comes from PV info interface. + */ } contents; } QEMU_PACKED Note; +static bool pv_dump_initialized; + static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id) { int i; @@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id) note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa)); } +static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id) +{ + note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA); + if (!pv_dump_initialized) { + return; + } + kvm_s390_dump_cpu(cpu, ¬e->contents.dynamic); +} typedef struct NoteFuncDescStruct { int contents_size; + uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */ void (*note_contents_func)(Note *note, S390CPU *cpu, int id); + bool pvonly; } NoteFuncDesc; static const NoteFuncDesc note_core[] = { - {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus}, - {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset}, - { 0, NULL} + {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false}, + {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false}, + { 0, NULL, NULL, false} }; static const NoteFuncDesc note_linux[] = { - {sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix}, - {sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs}, - {sizeof_field(Note, contents.timer), s390x_write_elf64_timer}, - {sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp}, - {sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg}, - {sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo}, - {sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi}, - {sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb}, - { 0, NULL} + {sizeof_field(Note, contents.prefix), NULL, s390x_write_elf64_prefix, false}, + {sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs, false}, + {sizeof_field(Note, contents.timer), NULL, s390x_write_elf64_timer, false}, + {sizeof_field(Note, contents.todcmp), NULL, s390x_write_elf64_todcmp, false}, + {sizeof_field(Note, contents.todpreg), NULL, s390x_write_elf64_todpreg, false}, + {sizeof_field(Note, contents.vregslo), NULL, s390x_write_elf64_vregslo, false}, + {sizeof_field(Note, contents.vregshi), NULL, s390x_write_elf64_vregshi, false}, + {sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb, false}, + {0, kvm_s390_pv_dmp_get_size_cpu, s390x_write_elf64_pv, true}, + { 0, NULL, NULL, false} }; static int s390x_write_elf64_notes(const char *note_name, @@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name, DumpState *s, const NoteFuncDesc *funcs) { - Note note; + Note note, *notep; const NoteFuncDesc *nf; - int note_size; + int note_size, content_size; int ret = -1; assert(strlen(note_name) < sizeof(note.name)); for (nf = funcs; nf->note_contents_func; nf++) { - memset(¬e, 0, sizeof(note)); - note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); - note.hdr.n_descsz = cpu_to_be32(nf->contents_size); - g_strlcpy(note.name, note_name, sizeof(note.name)); - (*nf->note_contents_func)(¬e, cpu, id); + notep = ¬e; + if (nf->pvonly && !s390_is_pv()) { + continue; + } + + content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size; + note_size = sizeof(note) - sizeof(notep->contents) + content_size; + + /* Notes with dynamic sizes need to allocate a note */ + if (nf->note_size_func) { + notep = g_malloc(note_size); + } + + memset(notep, 0, sizeof(note)); - note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; - ret = f(¬e, note_size, s); + /* Setup note header data */ + notep->hdr.n_descsz = cpu_to_be32(content_size); + notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); + g_strlcpy(notep->name, note_name, sizeof(notep->name)); + + /* Get contents and write them out */ + (*nf->note_contents_func)(notep, cpu, id); + ret = f(notep, note_size, s); + + if (nf->note_size_func) { + g_free(notep); + } if (ret < 0) { return -1; @@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); } +/* PV dump section size functions */ +static uint64_t get_mem_state_size_from_len(uint64_t len) +{ + return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state(); +} + +static uint64_t get_size_mem_state(DumpState *s) +{ + return get_mem_state_size_from_len(s->total_size); +} + +static uint64_t get_size_completion_data(DumpState *s) +{ + return kvm_s390_pv_dmp_get_size_completion_data(); +} + +/* PV dump section data functions*/ +static int get_data_completion(DumpState *s, uint8_t *buff) +{ + int rc; + + if (!pv_dump_initialized) { + return 0; + } + rc = kvm_s390_dump_completion_data(buff); + if (!rc) { + pv_dump_initialized = false; + } + return rc; +} + +static int get_mem_state(DumpState *s, uint8_t *buff) +{ + int64_t memblock_size, memblock_start; + GuestPhysBlock *block; + uint64_t off; + int rc; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { + memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, + s->filter_area_length); + if (memblock_start == -1) { + continue; + } + + memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, + s->filter_area_length); + + off = get_mem_state_size_from_len(block->target_start); + + rc = kvm_s390_dump_mem_state(block->target_start, + get_mem_state_size_from_len(memblock_size), + buff + off); + if (rc) { + return rc; + } + } + + return 0; +} + +static struct sections { + uint64_t (*sections_size_func)(DumpState *s); + int (*sections_contents_func)(DumpState *s, uint8_t *buff); + char sctn_str[12]; +} sections[] = { + { get_size_mem_state, get_mem_state, "pv_mem_meta"}, + { get_size_completion_data, get_data_completion, "pv_compl"}, + {NULL , NULL, ""} +}; + +static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff) +{ + Elf64_Shdr *shdr = (void *)buff; + struct sections *sctn = sections; + uint64_t off = s->section_offset; + + if (!pv_dump_initialized) { + return 0; + } + + for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) { + memset(shdr, 0, sizeof(*shdr)); + shdr->sh_type = SHT_PROGBITS; + shdr->sh_offset = off; + shdr->sh_size = sctn->sections_size_func(s); + shdr->sh_name = s->string_table_buf->len; + g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str)); + } + + return (uintptr_t)shdr - (uintptr_t)buff; +} + + +/* Add arch specific number of sections and their respective sizes */ +static void arch_sections_add(DumpState *s) +{ + struct sections *sctn = sections; + + /* + * We only do a PV dump if we are running a PV guest, KVM supports + * the dump API and we got valid dump length information. + */ + if (!s390_is_pv() || !kvm_s390_get_protected_dump() || + !kvm_s390_pv_info_basic_valid()) { + return; + } + + /* + * Start the UV dump process by doing the initialize dump call via + * KVM as the proxy. + */ + if (!kvm_s390_dump_init()) { + pv_dump_initialized = true; + } else { + /* + * Dump init failed, maybe the guest owner disabled dumping. + * We'll continue the non-PV dump process since this is no + * reason to crash qemu. + */ + return; + } + + for (; sctn->sections_size_func; sctn++) { + s->shdr_num += 1; + s->elf_section_data_size += sctn->sections_size_func(s); + } +} + +/* + * After the PV dump has been initialized, the CPU data has been + * fetched and memory has been dumped, we need to grab the tweak data + * and the completion data. + */ +static int arch_sections_write(DumpState *s, uint8_t *buff) +{ + struct sections *sctn = sections; + int rc; + + if (!pv_dump_initialized) { + return -EINVAL; + } + + for (; sctn->sections_size_func; sctn++) { + rc = sctn->sections_contents_func(s, buff); + buff += sctn->sections_size_func(s); + if (rc) { + return rc; + } + } + return 0; +} + int cpu_get_dump_info(ArchDumpInfo *info, const struct GuestPhysBlockList *guest_phys_blocks) { info->d_machine = EM_S390; info->d_endian = ELFDATA2MSB; info->d_class = ELFCLASS64; - + /* + * This is evaluated for each dump so we can freely switch + * between PV and non-PV. + */ + if (s390_is_pv() && kvm_s390_get_protected_dump() && + kvm_s390_pv_info_basic_valid()) { + info->arch_sections_add_fn = *arch_sections_add; + info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; + info->arch_sections_write_fn = *arch_sections_write; + } else { + info->arch_sections_add_fn = NULL; + info->arch_sections_write_hdr_fn = NULL; + info->arch_sections_write_fn = NULL; + } return 0; } @@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) { int name_size = 8; /* "LINUX" or "CORE" + pad */ size_t elf_note_size = 0; - int note_head_size; + int note_head_size, content_size; const NoteFuncDesc *nf; assert(class == ELFCLASS64); @@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) note_head_size = sizeof(Elf64_Nhdr); for (nf = note_core; nf->note_contents_func; nf++) { - elf_note_size = elf_note_size + note_head_size + name_size + - nf->contents_size; + elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size; } for (nf = note_linux; nf->note_contents_func; nf++) { + if (nf->pvonly && !s390_is_pv()) { + continue; + } + content_size = nf->contents_size ? nf->contents_size : nf->note_size_func(); elf_note_size = elf_note_size + note_head_size + name_size + - nf->contents_size; + content_size; } return (elf_note_size) * nr_cpus; diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index da8c47f57b..3ac7ec9acf 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -158,6 +158,7 @@ static int cap_hpage_1m; static int cap_vcpu_resets; static int cap_protected; static int cap_zpci_op; +static int cap_protected_dump; static bool mem_op_storage_key_support; @@ -364,6 +365,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); + cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP); kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); @@ -2045,6 +2047,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); } +int kvm_s390_get_protected_dump(void) +{ + return cap_protected_dump; +} + int kvm_s390_get_ri(void) { return cap_ri; diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h index aaae8570de..f9785564d0 100644 --- a/target/s390x/kvm/kvm_s390x.h +++ b/target/s390x/kvm/kvm_s390x.h @@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); int kvm_s390_get_hpage_1m(void); +int kvm_s390_get_protected_dump(void); int kvm_s390_get_ri(void); int kvm_s390_get_zpci_op(void); int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build index d1356356b1..aef52b6686 100644 --- a/target/s390x/kvm/meson.build +++ b/target/s390x/kvm/meson.build @@ -1,6 +1,8 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( 'kvm.c' +), if_false: files( + 'stubs.c' )) # Newer kernels on s390 check for an S390_PGSTE program header and diff --git a/target/s390x/kvm/stubs.c b/target/s390x/kvm/stubs.c new file mode 100644 index 0000000000..5fd63b9a7e --- /dev/null +++ b/target/s390x/kvm/stubs.c @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" + +#include "kvm_s390x.h" + +int kvm_s390_get_protected_dump(void) +{ + return false; +} |