From 623e250abdca2c29830793e3ac81a9e405f33216 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Mon, 16 Jun 2014 11:03:19 -0500 Subject: linux-user: Correct AUXV Cache Line Sizes for PowerPC Set the AT_ICACHEBSIZE and AT_DCACHEBSIZE entries of the AUXV to match the CPU model's cache line sizes. This fixes memory clobbering problems on more recent Book 3s implementations; memset(p, 0, N) will use the dcbz instruction when N is sufficiently large and many of the newer server CPUs have cache lines sizes of 128 bytes. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- linux-user/elfload.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 1248eda272..64d23fa647 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -774,8 +774,9 @@ static uint32_t get_elf_hwcap(void) #define DLINFO_ARCH_ITEMS 5 #define ARCH_DLINFO \ do { \ - NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20); \ - NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20); \ + PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); \ + NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \ + NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \ NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ /* \ * Now handle glibc compatibility. \ -- cgit v1.2.3 From b2f1355020b1b48c479426b50f37baccb3b1fe84 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Mon, 16 Jun 2014 11:03:20 -0500 Subject: target-ppc: Add DFP to Emulated Instructions Flag Decimal Floating Point is emulated, so add it the mask. This will fix the erroneous message: Warning: Disabling some instructions which are not emulated by TCG (0x0, 0x4) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 74407ee209..08ae527cb6 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -2012,7 +2012,7 @@ enum { PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \ PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \ PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \ - PPC2_ALTIVEC_207 | PPC2_ISA207S) + PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP) }; /*****************************************************************************/ -- cgit v1.2.3 From 0e019746d7c27899c832562a72d736667d7ce1b7 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Mon, 16 Jun 2014 11:03:21 -0500 Subject: linux-user: Identify Addition Hardware Capabilities for PowerPC Add VSX, DFP and ISA 2.06 to the bits identified in the AT_HWCAP entry of the AUXV. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- linux-user/elfload.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 64d23fa647..9a41882dcf 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -749,6 +749,8 @@ static uint32_t get_elf_hwcap(void) Altivec/FP/SPE support. Anything else is just a bonus. */ #define GET_FEATURE(flag, feature) \ do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0) +#define GET_FEATURE2(flag, feature) \ + do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0) GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64); GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU); GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC); @@ -757,7 +759,13 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE(PPC_SPE_DOUBLE, QEMU_PPC_FEATURE_HAS_EFP_DOUBLE); GET_FEATURE(PPC_BOOKE, QEMU_PPC_FEATURE_BOOKE); GET_FEATURE(PPC_405_MAC, QEMU_PPC_FEATURE_HAS_4xxMAC); + GET_FEATURE2(PPC2_DFP, QEMU_PPC_FEATURE_HAS_DFP); + GET_FEATURE2(PPC2_VSX, QEMU_PPC_FEATURE_HAS_VSX); + GET_FEATURE2((PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | + PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206), + QEMU_PPC_FEATURE_ARCH_2_06); #undef GET_FEATURE +#undef GET_FEATURE2 return features; } -- cgit v1.2.3 From a60438ddd68d45bc378b324fe7ecc61452d4f131 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Mon, 16 Jun 2014 11:03:22 -0500 Subject: linux-user: Support HWCAP2 in PowerPC Set bits in the AT_HWCAP2 entry of the AUXV. Specifically, detect and set bits for bctar, ISEL and ISA 2.07. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- linux-user/elfload.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 9a41882dcf..60777fecf6 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -736,6 +736,14 @@ enum { QEMU_PPC_FEATURE_TRUE_LE = 0x00000002, QEMU_PPC_FEATURE_PPC_LE = 0x00000001, + + /* Feature definitions in AT_HWCAP2. */ + QEMU_PPC_FEATURE2_ARCH_2_07 = 0x80000000, /* ISA 2.07 */ + QEMU_PPC_FEATURE2_HAS_HTM = 0x40000000, /* Hardware Transactional Memory */ + QEMU_PPC_FEATURE2_HAS_DSCR = 0x20000000, /* Data Stream Control Register */ + QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */ + QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */ + QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */ }; #define ELF_HWCAP get_elf_hwcap() @@ -770,6 +778,29 @@ static uint32_t get_elf_hwcap(void) return features; } +#define ELF_HWCAP2 get_elf_hwcap2() + +static uint32_t get_elf_hwcap2(void) +{ + PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); + uint32_t features = 0; + +#define GET_FEATURE(flag, feature) \ + do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0) +#define GET_FEATURE2(flag, feature) \ + do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0) + + GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL); + GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR); + GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | + PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07); + +#undef GET_FEATURE +#undef GET_FEATURE2 + + return features; +} + /* * The requirements here are: * - keep the final alignment of sp (sp & 0xf) -- cgit v1.2.3 From cc84c0f3571c75ced90a9ba9dcbb208464a1d997 Mon Sep 17 00:00:00 2001 From: Avik Sil Date: Tue, 10 Jun 2014 13:26:44 +0530 Subject: spapr: Add "qemu, boot-menu" property to /chosen This is required to enable boot menu display during booting Signed-off-by: Avik Sil Signed-off-by: Nikunj A Dadhania Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 82f183f173..f4d464a94a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -442,6 +442,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, if (boot_device) { _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); } + if (boot_menu) { + _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu))); + } _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width))); _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height))); _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth))); -- cgit v1.2.3 From 294d1292893867894992e810a01cfcfa451f1885 Mon Sep 17 00:00:00 2001 From: Sorav Bansal Date: Tue, 17 Jun 2014 11:24:02 +0530 Subject: target-ppc: fixed translation of mcrxr instruction Fixed bug in gen_mcrxr() in target-ppc/translate.c: The XER[SO], XER[OV], and XER[CA] flags are stored in the least significant bit (bit 0) of their respective registers. They need to be shifted left (by their respective offsets) to generate the final XER value. The old translation code for the 'mcrxr' instruction was assuming that the flags are stored in bit 2, and was shifting them right (incorrectly) Signed-off-by: Sorav Bansal Reviewed-by: Tom Musta Tested-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/translate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 48017219a4..c5d73d5aa0 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -4123,8 +4123,9 @@ static void gen_mcrxr(DisasContext *ctx) tcg_gen_trunc_tl_i32(t0, cpu_so); tcg_gen_trunc_tl_i32(t1, cpu_ov); tcg_gen_trunc_tl_i32(dst, cpu_ca); - tcg_gen_shri_i32(t0, t0, 2); - tcg_gen_shri_i32(t1, t1, 1); + tcg_gen_shli_i32(t0, t0, 3); + tcg_gen_shli_i32(t1, t1, 2); + tcg_gen_shli_i32(dst, dst, 1); tcg_gen_or_i32(dst, dst, t0); tcg_gen_or_i32(dst, dst, t1); tcg_temp_free_i32(t0); -- cgit v1.2.3 From b3cad3abf6df07db165091343b912c2a5de63c26 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 23 Jun 2014 15:23:08 +0200 Subject: PPC: Add support for Apple gdb in gdbstub The Apple gdbstub protocol is different from the normal gdbstub protocol used on PowerPC. Add support for the different variant, so that we can use Apple's gdb to debug guest code. Keep in mind that the switch is a compile time option. We can't detect during runtime whether a gdb connecting to us is an upstream gdb or an Apple gdb. Signed-off-by: Alexander Graf --- target-ppc/cpu-qom.h | 2 + target-ppc/gdbstub.c | 137 ++++++++++++++++++++++++++++++++++++++++++++ target-ppc/translate_init.c | 8 +++ 3 files changed, 147 insertions(+) diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h index 13c7031265..f1f0a52998 100644 --- a/target-ppc/cpu-qom.h +++ b/target-ppc/cpu-qom.h @@ -119,7 +119,9 @@ void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf, int flags); hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); +int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg); int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg); int ppc64_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, void *opaque); int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c index 381a3c7e31..694d303e19 100644 --- a/target-ppc/gdbstub.c +++ b/target-ppc/gdbstub.c @@ -21,6 +21,31 @@ #include "qemu-common.h" #include "exec/gdbstub.h" +static int ppc_gdb_register_len_apple(int n) +{ + switch (n) { + case 0 ... 31: + /* gprs */ + return 8; + case 32 ... 63: + /* fprs */ + return 8; + case 64 ... 95: + return 16; + case 64+32: /* nip */ + case 65+32: /* msr */ + case 67+32: /* lr */ + case 68+32: /* ctr */ + case 69+32: /* xer */ + case 70+32: /* fpscr */ + return 8; + case 66+32: /* cr */ + return 4; + default: + return 0; + } +} + static int ppc_gdb_register_len(int n) { switch (n) { @@ -132,6 +157,65 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return r; } +int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + int r = ppc_gdb_register_len_apple(n); + + if (!r) { + return r; + } + + if (n < 32) { + /* gprs */ + gdb_get_reg64(mem_buf, env->gpr[n]); + } else if (n < 64) { + /* fprs */ + stfq_p(mem_buf, env->fpr[n-32]); + } else if (n < 96) { + /* Altivec */ + stq_p(mem_buf, n - 64); + stq_p(mem_buf + 8, 0); + } else { + switch (n) { + case 64 + 32: + gdb_get_reg64(mem_buf, env->nip); + break; + case 65 + 32: + gdb_get_reg64(mem_buf, env->msr); + break; + case 66 + 32: + { + uint32_t cr = 0; + int i; + for (i = 0; i < 8; i++) { + cr |= env->crf[i] << (32 - ((i + 1) * 4)); + } + gdb_get_reg32(mem_buf, cr); + break; + } + case 67 + 32: + gdb_get_reg64(mem_buf, env->lr); + break; + case 68 + 32: + gdb_get_reg64(mem_buf, env->ctr); + break; + case 69 + 32: + gdb_get_reg64(mem_buf, env->xer); + break; + case 70 + 32: + gdb_get_reg64(mem_buf, env->fpscr); + break; + } + } + if (msr_le) { + /* If cpu is in LE mode, convert memory contents to LE. */ + ppc_gdb_swap_register(mem_buf, n, r); + } + return r; +} + int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -185,3 +269,56 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) } return r; } +int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + int r = ppc_gdb_register_len_apple(n); + + if (!r) { + return r; + } + if (msr_le) { + /* If cpu is in LE mode, convert memory contents to LE. */ + ppc_gdb_swap_register(mem_buf, n, r); + } + if (n < 32) { + /* gprs */ + env->gpr[n] = ldq_p(mem_buf); + } else if (n < 64) { + /* fprs */ + env->fpr[n-32] = ldfq_p(mem_buf); + } else { + switch (n) { + case 64 + 32: + env->nip = ldq_p(mem_buf); + break; + case 65 + 32: + ppc_store_msr(env, ldq_p(mem_buf)); + break; + case 66 + 32: + { + uint32_t cr = ldl_p(mem_buf); + int i; + for (i = 0; i < 8; i++) { + env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF; + } + break; + } + case 67 + 32: + env->lr = ldq_p(mem_buf); + break; + case 68 + 32: + env->ctr = ldq_p(mem_buf); + break; + case 69 + 32: + env->xer = ldq_p(mem_buf); + break; + case 70 + 32: + /* fpscr */ + store_fpscr(env, ldq_p(mem_buf), 0xffffffff); + break; + } + } + return r; +} diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 85581c9537..594f7ac8d1 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -34,6 +34,7 @@ //#define PPC_DUMP_CPU //#define PPC_DEBUG_SPR //#define PPC_DUMP_SPR_ACCESSES +/* #define USE_APPLE_GDB */ /* For user-mode emulation, we don't emulate any IRQ controller */ #if defined(CONFIG_USER_ONLY) @@ -9667,6 +9668,13 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) #endif cc->gdb_num_core_regs = 71; + +#ifdef USE_APPLE_GDB + cc->gdb_read_register = ppc_cpu_gdb_read_register_apple; + cc->gdb_write_register = ppc_cpu_gdb_write_register_apple; + cc->gdb_num_core_regs = 71 + 32; +#endif + #if defined(TARGET_PPC64) cc->gdb_core_xml_file = "power64-core.xml"; #else -- cgit v1.2.3 From 3a3b8502e6f0c8d30865c5f36d2c3ae4114000b5 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 23 Jun 2014 23:26:32 +1000 Subject: spapr: Fix RTAS token numbers At the moment spapr_rtas_register() allocates a new token number for every new RTAS callback so numbers are not fixed and depend on the number of supported RTAS handlers and the exact order of spapr_rtas_register() calls. These tokens are copied into the device tree and remain the same during the guest lifetime. When we start another guest to receive a migration, it calls spapr_rtas_register() as well. If the number of RTAS handlers or their order is different in QEMU on source and destination sides, the "/rtas" node in the device tree will differ. Since migration overwrites the device tree (as it overwrites the entire RAM), the actual RTAS config on the destination side gets broken. This defines global contant values for every RTAS token which QEMU is using today. This changes spapr_rtas_register() to accept a token number instead of allocating one. This changes all users of spapr_rtas_register(). This changes XICS-KVM not to cache tokens registered with KVM as they constant now. This makes TOKEN_BASE global as RTAS_XXX use TOKEN_BASE as a base. TOKEN_MAX is moved and renamed too and its value is changed to the last token + 1. Boundary checks for token values are adjusted. This reserves token numbers for "os-term" handlers and PCI hotplug which we are working on. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/intc/xics.c | 8 +++--- hw/intc/xics_kvm.c | 22 ++++++----------- hw/nvram/spapr_nvram.c | 4 +-- hw/ppc/spapr_events.c | 3 ++- hw/ppc/spapr_pci.c | 18 +++++++++----- hw/ppc/spapr_rtas.c | 67 +++++++++++++++++++++++++------------------------- hw/ppc/spapr_vio.c | 5 ++-- include/hw/ppc/spapr.h | 41 +++++++++++++++++++++++++++++- 8 files changed, 104 insertions(+), 64 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 76dd6f5708..493a2a47c5 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -866,10 +866,10 @@ static void xics_realize(DeviceState *dev, Error **errp) } /* Registration of global state belongs into realize */ - spapr_rtas_register("ibm,set-xive", rtas_set_xive); - spapr_rtas_register("ibm,get-xive", rtas_get_xive); - spapr_rtas_register("ibm,int-off", rtas_int_off); - spapr_rtas_register("ibm,int-on", rtas_int_on); + spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); + spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive); + spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off); + spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on); spapr_register_hypercall(H_CPPR, h_cppr); spapr_register_hypercall(H_IPI, h_ipi); diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 09476ae34d..4704c98214 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -38,10 +38,6 @@ typedef struct KVMXICSState { XICSState parent_obj; - uint32_t set_xive_token; - uint32_t get_xive_token; - uint32_t int_off_token; - uint32_t int_on_token; int kernel_xics_fd; } KVMXICSState; @@ -392,32 +388,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp) goto fail; } - icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy); - icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy); - icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy); - icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy); + spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy); + spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy); + spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy); + spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy); - rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token, - "ibm,set-xive"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive"); goto fail; } - rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token, - "ibm,get-xive"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive"); goto fail; } - rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on"); goto fail; } - rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off"); goto fail; diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index af49c4667d..6a72ef4814 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -153,8 +153,8 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev) return -1; } - spapr_rtas_register("nvram-fetch", rtas_nvram_fetch); - spapr_rtas_register("nvram-store", rtas_nvram_store); + spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch); + spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store); return 0; } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 16fa49e886..5ce96a7196 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -317,5 +317,6 @@ void spapr_events_init(sPAPREnvironment *spapr) spapr->epow_irq = spapr_allocate_msi(0); spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); - spapr_rtas_register("check-exception", check_exception); + spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", + check_exception); } diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 988f8cb858..8607c88fd2 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -909,14 +909,20 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, void spapr_pci_rtas_init(void) { - spapr_rtas_register("read-pci-config", rtas_read_pci_config); - spapr_rtas_register("write-pci-config", rtas_write_pci_config); - spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config); - spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config); + spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config", + rtas_read_pci_config); + spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config", + rtas_write_pci_config); + spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config", + rtas_ibm_read_pci_config); + spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config", + rtas_ibm_write_pci_config); if (msi_supported) { - spapr_rtas_register("ibm,query-interrupt-source-number", + spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER, + "ibm,query-interrupt-source-number", rtas_ibm_query_interrupt_source_number); - spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi); + spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi", + rtas_ibm_change_msi); } } diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 8d08539baa..4e87d02805 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -36,9 +36,6 @@ #include -#define TOKEN_BASE 0x2000 -#define TOKEN_MAX 0x100 - static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -271,17 +268,14 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, static struct rtas_call { const char *name; spapr_rtas_fn fn; -} rtas_table[TOKEN_MAX]; - -static struct rtas_call *rtas_next = rtas_table; +} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE]; target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - if ((token >= TOKEN_BASE) - && ((token - TOKEN_BASE) < TOKEN_MAX)) { - struct rtas_call *call = rtas_table + (token - TOKEN_BASE); + if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) { + struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE); if (call->fn) { call->fn(cpu, spapr, token, nargs, args, nret, rets); @@ -303,23 +297,22 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -int spapr_rtas_register(const char *name, spapr_rtas_fn fn) +void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn) { - int i; - - for (i = 0; i < (rtas_next - rtas_table); i++) { - if (strcmp(name, rtas_table[i].name) == 0) { - fprintf(stderr, "RTAS call \"%s\" registered twice\n", name); - exit(1); - } + if (!((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX))) { + fprintf(stderr, "RTAS invalid token 0x%x\n", token); + exit(1); } - assert(rtas_next < (rtas_table + TOKEN_MAX)); - - rtas_next->name = name; - rtas_next->fn = fn; + token -= RTAS_TOKEN_BASE; + if (rtas_table[token].name) { + fprintf(stderr, "RTAS call \"%s\" is registered already as 0x%x\n", + rtas_table[token].name, token); + exit(1); + } - return (rtas_next++ - rtas_table) + TOKEN_BASE; + rtas_table[token].name = name; + rtas_table[token].fn = fn; } int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, @@ -359,7 +352,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, return ret; } - for (i = 0; i < TOKEN_MAX; i++) { + for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) { struct rtas_call *call = &rtas_table[i]; if (!call->name) { @@ -367,7 +360,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, } ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name, - i + TOKEN_BASE); + i + RTAS_TOKEN_BASE); if (ret < 0) { fprintf(stderr, "Couldn't add rtas token for %s: %s\n", call->name, fdt_strerror(ret)); @@ -380,18 +373,24 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, static void core_rtas_register_types(void) { - spapr_rtas_register("display-character", rtas_display_character); - spapr_rtas_register("get-time-of-day", rtas_get_time_of_day); - spapr_rtas_register("set-time-of-day", rtas_set_time_of_day); - spapr_rtas_register("power-off", rtas_power_off); - spapr_rtas_register("system-reboot", rtas_system_reboot); - spapr_rtas_register("query-cpu-stopped-state", + spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", + rtas_display_character); + spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day", + rtas_get_time_of_day); + spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day", + rtas_set_time_of_day); + spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off); + spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot", + rtas_system_reboot); + spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state", rtas_query_cpu_stopped_state); - spapr_rtas_register("start-cpu", rtas_start_cpu); - spapr_rtas_register("stop-self", rtas_stop_self); - spapr_rtas_register("ibm,get-system-parameter", + spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu); + spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self); + spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER, + "ibm,get-system-parameter", rtas_ibm_get_system_parameter); - spapr_rtas_register("ibm,set-system-parameter", + spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER, + "ibm,set-system-parameter", rtas_ibm_set_system_parameter); } diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 04e16ae04d..a195fd1565 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -517,8 +517,9 @@ VIOsPAPRBus *spapr_vio_bus_init(void) spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq); /* RTAS calls */ - spapr_rtas_register("ibm,set-tce-bypass", rtas_set_tce_bypass); - spapr_rtas_register("quiesce", rtas_quiesce); + spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass", + rtas_set_tce_bypass); + spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce); return bus; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 08c301f38d..91923de5b5 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -358,6 +358,45 @@ static inline int spapr_allocate_lsi(int hint) #define RTAS_OUT_NOT_SUPPORTED -3 #define RTAS_OUT_NOT_AUTHORIZED -9002 +/* RTAS tokens */ +#define RTAS_TOKEN_BASE 0x2000 + +#define RTAS_DISPLAY_CHARACTER (RTAS_TOKEN_BASE + 0x00) +#define RTAS_GET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x01) +#define RTAS_SET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x02) +#define RTAS_POWER_OFF (RTAS_TOKEN_BASE + 0x03) +#define RTAS_SYSTEM_REBOOT (RTAS_TOKEN_BASE + 0x04) +#define RTAS_QUERY_CPU_STOPPED_STATE (RTAS_TOKEN_BASE + 0x05) +#define RTAS_START_CPU (RTAS_TOKEN_BASE + 0x06) +#define RTAS_STOP_SELF (RTAS_TOKEN_BASE + 0x07) +#define RTAS_IBM_GET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x08) +#define RTAS_IBM_SET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x09) +#define RTAS_IBM_SET_XIVE (RTAS_TOKEN_BASE + 0x0A) +#define RTAS_IBM_GET_XIVE (RTAS_TOKEN_BASE + 0x0B) +#define RTAS_IBM_INT_OFF (RTAS_TOKEN_BASE + 0x0C) +#define RTAS_IBM_INT_ON (RTAS_TOKEN_BASE + 0x0D) +#define RTAS_CHECK_EXCEPTION (RTAS_TOKEN_BASE + 0x0E) +#define RTAS_EVENT_SCAN (RTAS_TOKEN_BASE + 0x0F) +#define RTAS_IBM_SET_TCE_BYPASS (RTAS_TOKEN_BASE + 0x10) +#define RTAS_QUIESCE (RTAS_TOKEN_BASE + 0x11) +#define RTAS_NVRAM_FETCH (RTAS_TOKEN_BASE + 0x12) +#define RTAS_NVRAM_STORE (RTAS_TOKEN_BASE + 0x13) +#define RTAS_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x14) +#define RTAS_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x15) +#define RTAS_IBM_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x16) +#define RTAS_IBM_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x17) +#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER (RTAS_TOKEN_BASE + 0x18) +#define RTAS_IBM_CHANGE_MSI (RTAS_TOKEN_BASE + 0x19) +#define RTAS_SET_INDICATOR (RTAS_TOKEN_BASE + 0x1A) +#define RTAS_SET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1B) +#define RTAS_GET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1C) +#define RTAS_GET_SENSOR_STATE (RTAS_TOKEN_BASE + 0x1D) +#define RTAS_IBM_CONFIGURE_CONNECTOR (RTAS_TOKEN_BASE + 0x1E) +#define RTAS_IBM_OS_TERM (RTAS_TOKEN_BASE + 0x1F) +#define RTAS_IBM_EXTENDED_OS_TERM (RTAS_TOKEN_BASE + 0x20) + +#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x21) + static inline uint64_t ppc64_phys_to_real(uint64_t addr) { return addr & ~0xF000000000000000ULL; @@ -377,7 +416,7 @@ typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); -int spapr_rtas_register(const char *name, spapr_rtas_fn fn); +void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn); target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); -- cgit v1.2.3 From 9bb62a0702dec4e103a3bb820b2a642e75995a56 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 10 Jun 2014 15:39:21 +1000 Subject: spapr_iommu: Make in-kernel TCE table optional POWER KVM supports an KVM_CAP_SPAPR_TCE capability which allows allocating TCE tables in the host kernel memory and handle H_PUT_TCE requests targeted to specific LIOBN (logical bus number) right in the host without switching to QEMU. At the moment this is used for emulated devices only and the handler only puts TCE to the table. If the in-kernel H_PUT_TCE handler finds a LIOBN and corresponding table, it will put a TCE to the table and complete hypercall execution. The user space will not be notified. Upcoming VFIO support is going to use the same sPAPRTCETable device class so KVM_CAP_SPAPR_TCE is going to be used as well. That means that TCE tables for VFIO are going to be allocated in the host as well. However VFIO operates with real IOMMU tables and simple copying of a TCE to the real hardware TCE table will not work as guest physical to host physical address translation is requited. So until the host kernel gets VFIO support for H_PUT_TCE, we better not to register VFIO's TCE in the host. This adds a place holder for KVM_CAP_SPAPR_TCE_VFIO capability. It is not in upstream yet and being discussed so now it is always false which means that in-kernel VFIO acceleration is not supported. This adds a bool @vfio_accel flag to the sPAPRTCETable device telling that sPAPRTCETable should not try allocating TCE table in the host kernel for VFIO. The flag is false now as at the moment there is no VFIO. This adds an vfio_accel parameter to spapr_tce_new_table(), the semantic is the same. Since there is only emulated PCI and VIO now, the flag is set to false. Upcoming VFIO support will set it to true. This is a preparation patch so no change in behaviour is expected Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr_iommu.c | 7 +++++-- hw/ppc/spapr_pci.c | 2 +- hw/ppc/spapr_vio.c | 2 +- include/hw/ppc/spapr.h | 4 +++- target-ppc/kvm.c | 7 +++++-- target-ppc/kvm_ppc.h | 6 ++++-- 6 files changed, 19 insertions(+), 9 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 9e49ec4a5c..698ae60953 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -118,7 +118,8 @@ static int spapr_tce_table_realize(DeviceState *dev) tcet->table = kvmppc_create_spapr_tce(tcet->liobn, tcet->nb_table << tcet->page_shift, - &tcet->fd); + &tcet->fd, + tcet->vfio_accel); } if (!tcet->table) { @@ -142,7 +143,8 @@ static int spapr_tce_table_realize(DeviceState *dev) sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, - uint32_t nb_table) + uint32_t nb_table, + bool vfio_accel) { sPAPRTCETable *tcet; @@ -161,6 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, tcet->bus_offset = bus_offset; tcet->page_shift = page_shift; tcet->nb_table = nb_table; + tcet->vfio_accel = vfio_accel; object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 8607c88fd2..d1e3e0ff10 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -658,7 +658,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, 0, SPAPR_TCE_PAGE_SHIFT, - 0x40000000 >> SPAPR_TCE_PAGE_SHIFT); + 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false); if (!tcet) { error_setg(errp, "Unable to create TCE table for %s", sphb->dtbusname); diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index a195fd1565..8b765c65e8 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) 0, SPAPR_TCE_PAGE_SHIFT, pc->rtce_window_size >> - SPAPR_TCE_PAGE_SHIFT); + SPAPR_TCE_PAGE_SHIFT, false); address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id); } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 91923de5b5..f71d7dec90 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -446,6 +446,7 @@ struct sPAPRTCETable { uint32_t page_shift; uint64_t *table; bool bypass; + bool vfio_accel; int fd; MemoryRegion iommu; QLIST_ENTRY(sPAPRTCETable) list; @@ -457,7 +458,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, - uint32_t nb_table); + uint32_t nb_table, + bool vfio_accel); MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet); void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass); int spapr_dma_dt(void *fdt, int node_off, const char *propname, diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 561f8ccf2f..2d87108d8b 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -63,6 +63,7 @@ static int cap_ppc_smt; static int cap_ppc_rma; static int cap_spapr_tce; static int cap_spapr_multitce; +static int cap_spapr_vfio; static int cap_hior; static int cap_one_reg; static int cap_epr; @@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s) cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); + cap_spapr_vfio = false; cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); @@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void) return cap_spapr_multitce; } -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, + bool vfio_accel) { struct kvm_create_spapr_tce args = { .liobn = liobn, @@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) * destroying the table, which the upper layers -will- do */ *pfd = -1; - if (!cap_spapr_tce) { + if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) { return NULL; } diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 412cc7f3c1..1118122d89 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); #ifndef CONFIG_USER_ONLY off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem); bool kvmppc_spapr_use_multitce(void); -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd); +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, + bool vfio_accel); int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); int kvmppc_reset_htab(int shift_hint); uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); @@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void) } static inline void *kvmppc_create_spapr_tce(uint32_t liobn, - uint32_t window_size, int *fd) + uint32_t window_size, int *fd, + bool vfio_accel) { return NULL; } -- cgit v1.2.3 From 6d8be4c3434783a59ae29f7ea6a792b56c812349 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 10 Jun 2014 15:39:22 +1000 Subject: vfio: Add vfio_container_ioctl() While most operations with VFIO IOMMU driver are generic and used inside vfio.c, there are still some operations which only specific VFIO IOMMU drivers implement. The first example of it will be reading a DMA window start from the host. This adds a helper which passes an ioctl request to the container's fd. The helper will check if @req is known. For this, stub is added. This return -1 on any requests for now. Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Acked-by: Alex Williamson Signed-off-by: Alexander Graf --- hw/misc/vfio.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/hw/misc/vfio.h | 9 +++++++++ 2 files changed, 51 insertions(+) create mode 100644 include/hw/misc/vfio.h diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 7437c2e3c3..bdd6e3386e 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -39,6 +39,7 @@ #include "qemu/range.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" +#include "hw/misc/vfio.h" /* #define DEBUG_VFIO */ #ifdef DEBUG_VFIO @@ -4318,3 +4319,44 @@ static void register_vfio_pci_dev_type(void) } type_init(register_vfio_pci_dev_type) + +static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param) +{ + VFIOGroup *group; + VFIOContainer *container; + int ret = -1; + + group = vfio_get_group(groupid, as); + if (!group) { + error_report("vfio: group %d not registered", groupid); + return ret; + } + + container = group->container; + if (group->container) { + ret = ioctl(container->fd, req, param); + if (ret < 0) { + error_report("vfio: failed to ioctl container: ret=%d, %s", + ret, strerror(errno)); + } + } + + vfio_put_group(group); + + return ret; +} + +int vfio_container_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param) +{ + /* We allow only certain ioctls to the container */ + switch (req) { + default: + /* Return an error on unknown requests */ + error_report("vfio: unsupported ioctl %X", req); + return -1; + } + + return vfio_container_do_ioctl(as, groupid, req, param); +} diff --git a/include/hw/misc/vfio.h b/include/hw/misc/vfio.h new file mode 100644 index 0000000000..0b26cd8e11 --- /dev/null +++ b/include/hw/misc/vfio.h @@ -0,0 +1,9 @@ +#ifndef VFIO_API_H +#define VFIO_API_H + +#include "qemu/typedefs.h" + +extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param); + +#endif -- cgit v1.2.3 From 9fc34ada7e80a7c561f64eb124b2b814a8feca68 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 10 Jun 2014 15:39:23 +1000 Subject: spapr_pci_vfio: Add spapr-pci-vfio-host-bridge to support vfio The patch adds a spapr-pci-vfio-host-bridge device type which is a PCI Host Bridge with VFIO support. The new device inherits from the spapr-pci-host-bridge device and adds an "iommu" property which is an IOMMU id. This ID represents a minimal entity for which IOMMU isolation can be guaranteed. In SPAPR architecture IOMMU group is called a Partitionable Endpoint (PE). Current implementation supports one IOMMU id per QEMU VFIO PHB. Since SPAPR allows multiple PHB for no extra cost, this does not seem to be a problem. This limitation may change in the future though. Example of use: Configure and Add 3 functions of a multifunctional device to QEMU: (the NEC PCI USB card is used as an example here): -device spapr-pci-vfio-host-bridge,id=USB,iommu=4,index=7 \ -device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true -device vfio-pci,host=4:0:1.1,addr=1.1,bus=USB -device vfio-pci,host=4:0:1.2,addr=1.2,bus=USB where: * index=7 is a QEMU PHB index (used as source for MMIO/MSI/IO windows offset); * iommu=4 is an IOMMU id which can be found in sysfs: [aik@vpl2 ~]$ cd /sys/bus/pci/devices/0004:00:00.0/ [aik@vpl2 0004:00:00.0]$ ls -l iommu_group lrwxrwxrwx 1 root root 0 Jun 5 12:49 iommu_group -> ../../../kernel/iommu_groups/4 Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/Makefile.objs | 3 ++ hw/ppc/spapr_pci_vfio.c | 102 ++++++++++++++++++++++++++++++++++++++++++++ include/hw/pci-host/spapr.h | 11 +++++ 3 files changed, 116 insertions(+) create mode 100644 hw/ppc/spapr_pci_vfio.c diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index ea747f0a20..edd44d03e7 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o +ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) +obj-y += spapr_pci_vfio.o +endif # PowerPC 4xx boards obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o obj-y += ppc4xx_pci.o diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c new file mode 100644 index 0000000000..d3bddf2887 --- /dev/null +++ b/hw/ppc/spapr_pci_vfio.c @@ -0,0 +1,102 @@ +/* + * QEMU sPAPR PCI host for VFIO + * + * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "linux/vfio.h" +#include "hw/misc/vfio.h" + +static Property spapr_phb_vfio_properties[] = { + DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) +{ + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); + struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) }; + int ret; + sPAPRTCETable *tcet; + uint32_t liobn = svphb->phb.dma_liobn; + + if (svphb->iommugroupid == -1) { + error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); + return; + } + + ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + VFIO_CHECK_EXTENSION, + (void *) VFIO_SPAPR_TCE_IOMMU); + if (ret != 1) { + error_setg_errno(errp, -ret, + "spapr-vfio: SPAPR extension is not supported"); + return; + } + + ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, -ret, + "spapr-vfio: get info from container failed"); + return; + } + + tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start, + SPAPR_TCE_PAGE_SHIFT, + info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT, + true); + if (!tcet) { + error_setg(errp, "spapr-vfio: failed to create VFIO TCE table"); + return; + } + + /* Register default 32bit DMA window */ + memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset, + spapr_tce_get_iommu(tcet)); +} + +static void spapr_phb_vfio_reset(DeviceState *qdev) +{ + /* Do nothing */ +} + +static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); + + dc->props = spapr_phb_vfio_properties; + dc->reset = spapr_phb_vfio_reset; + spc->finish_realize = spapr_phb_vfio_finish_realize; +} + +static const TypeInfo spapr_phb_vfio_info = { + .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE, + .parent = TYPE_SPAPR_PCI_HOST_BRIDGE, + .instance_size = sizeof(sPAPRPHBVFIOState), + .class_init = spapr_phb_vfio_class_init, + .class_size = sizeof(sPAPRPHBClass), +}; + +static void spapr_pci_vfio_register_types(void) +{ + type_register_static(&spapr_phb_vfio_info); +} + +type_init(spapr_pci_vfio_register_types) diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 0934518bbd..6808e967bd 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -30,10 +30,14 @@ #define SPAPR_MSIX_MAX_DEVS 32 #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" +#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" #define SPAPR_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) +#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \ + OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE) + #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \ OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE) #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \ @@ -41,6 +45,7 @@ typedef struct sPAPRPHBClass sPAPRPHBClass; typedef struct sPAPRPHBState sPAPRPHBState; +typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; struct sPAPRPHBClass { PCIHostBridgeClass parent_class; @@ -76,6 +81,12 @@ struct sPAPRPHBState { QLIST_ENTRY(sPAPRPHBState) list; }; +struct sPAPRPHBVFIOState { + sPAPRPHBState phb; + + int32_t iommugroupid; +}; + #define SPAPR_PCI_BASE_BUID 0x800000020000000ULL #define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL -- cgit v1.2.3 From 591812634c84471fc68feb259ebebb82d0d5513a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 10 Jun 2014 15:39:24 +1000 Subject: vfio: Enable for SPAPR This turns the sPAPR support on and enables VFIO container use in the kernel. This extends vfio_connect_container to support VFIO_SPAPR_TCE_IOMMU type in the host kernel. This registers a memory listener which sPAPR IOMMU will notify when executing H_PUT_TCE/etc DMA calls. The listener then will notify the host kernel about DMA map/unmap operation via VFIO_IOMMU_MAP_DMA/ VFIO_IOMMU_UNMAP_DMA ioctls. This executes VFIO_IOMMU_ENABLE ioctl to make sure that the IOMMU is free of mappings and can be exclusively given to the user. At the moment SPAPR is the only platform requiring this call to be implemented. Note that the host kernel function implementing VFIO_IOMMU_DISABLE is called automatically when container's fd is closed so there is no need to call it explicitly from QEMU. We may need to call VFIO_IOMMU_DISABLE explicitly in the future for some sort of dynamic reconfiguration (PCI hotplug or dynamic IOMMU group management). Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Signed-off-by: Alexander Graf --- hw/misc/vfio.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index bdd6e3386e..7b279c4f05 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -3650,6 +3650,39 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) container->iommu_data.type1.initialized = true; + } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) { + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); + if (ret) { + error_report("vfio: failed to set group container: %m"); + ret = -errno; + goto free_container_exit; + } + + ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU); + if (ret) { + error_report("vfio: failed to set iommu for container: %m"); + ret = -errno; + goto free_container_exit; + } + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_report("vfio: failed to enable container: %m"); + ret = -errno; + goto free_container_exit; + } + + container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.release = vfio_listener_release; + + memory_listener_register(&container->iommu_data.type1.listener, + container->space->as); + } else { error_report("vfio: No available IOMMU models"); ret = -EINVAL; @@ -4352,6 +4385,9 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, { /* We allow only certain ioctls to the container */ switch (req) { + case VFIO_CHECK_EXTENSION: + case VFIO_IOMMU_SPAPR_TCE_GET_INFO: + break; default: /* Return an error on unknown requests */ error_report("vfio: unsupported ioctl %X", req); -- cgit v1.2.3 From b247812e4afda505a5ab48dd9ad05bd51bb55bee Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 24 Jun 2014 00:05:09 +0100 Subject: target-ppc: Remove unused IMM and d extract helpers Remove the definition of the IMM and d extract helpers; these seem to have been added as part of the initial PPC support in 2003 but never actually used. Signed-off-by: Peter Maydell Signed-off-by: Alexander Graf --- target-ppc/translate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index c5d73d5aa0..b501655850 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -426,7 +426,6 @@ static inline uint32_t SPR(uint32_t opcode) return ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5); } /*** Get constants ***/ -EXTRACT_HELPER(IMM, 12, 8); /* 16 bits signed immediate value */ EXTRACT_SHELPER(SIMM, 0, 16); /* 16 bits unsigned immediate value */ @@ -459,8 +458,6 @@ EXTRACT_HELPER(FPFLM, 17, 8); EXTRACT_HELPER(FPW, 16, 1); /*** Jump target decoding ***/ -/* Displacement */ -EXTRACT_SHELPER(d, 0, 16); /* Immediate address */ static inline target_ulong LI(uint32_t opcode) { -- cgit v1.2.3 From c99b6f879a9b29b89a9c88237119d3801b086051 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 24 Jun 2014 00:05:10 +0100 Subject: target-ppc: Remove unused gen_qemu_ld8s() The gen_qemu_ld8s() function is unused; remove it. Signed-off-by: Peter Maydell Signed-off-by: Alexander Graf --- target-ppc/translate.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index b501655850..b23933f7bd 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -2662,11 +2662,6 @@ static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2) tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx); } -static inline void gen_qemu_ld8s(DisasContext *ctx, TCGv arg1, TCGv arg2) -{ - tcg_gen_qemu_ld8s(arg1, arg2, ctx->mem_idx); -} - static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2) { TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask; -- cgit v1.2.3 From a0bb2a5fa095c88819eb6d8e08b1630c8d23c261 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Tue, 24 Jun 2014 00:03:48 +0200 Subject: mac99: Add motherboard devices before PCI cards Change the order of creating devices for New World Mac emulation so that devices on the motherboard are added first and PCI cards (VGA and NIC) come later. As a side effect, this also causes OpenBIOS to map the motherboard devices into the MMIO space to the same addresses as on real hardware and allow clients that hardcode these addresses (e.g. MorphOS) to find and use them until OpenBIOS is tought to map devices to specific addresses. (On real hardware the graphics and network cards are really on separate buses but we don't model that yet.) This brings the memory map closer to what is found on PowerMac3,1. Signed-off-by: BALATON Zoltan Signed-off-by: Alexander Graf --- hw/ppc/mac_newworld.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index e493dc1b4b..89d3cadf19 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -373,18 +373,11 @@ static void ppc_core99_init(MachineState *machine) machine_arch = ARCH_MAC99; } /* init basic PC hardware */ - pci_vga_init(pci_bus); - escc_mem = escc_init(0, pic[0x25], pic[0x24], serial_hds[0], serial_hds[1], ESCC_CLOCK, 4); memory_region_init_alias(escc_bar, NULL, "escc-bar", escc_mem, 0, memory_region_size(escc_mem)); - for(i = 0; i < nb_nics; i++) - pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); - - ide_drive_get(hd, MAX_IDE_BUS); - macio = pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO); dev = DEVICE(macio); qdev_connect_gpio_out(dev, 0, pic[0x19]); /* CUDA */ @@ -395,6 +388,8 @@ static void ppc_core99_init(MachineState *machine) macio_init(macio, pic_mem, escc_bar); /* We only emulate 2 out of 3 IDE controllers for now */ + ide_drive_get(hd, MAX_IDE_BUS); + macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), "ide[0]")); macio_ide_init_drives(macio_ide, hd); @@ -420,8 +415,15 @@ static void ppc_core99_init(MachineState *machine) } } - if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) + pci_vga_init(pci_bus); + + if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) { graphic_depth = 15; + } + + for (i = 0; i < nb_nics; i++) { + pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); + } /* The NewWorld NVRAM is not located in the MacIO device */ dev = qdev_create(NULL, TYPE_MACIO_NVRAM); -- cgit v1.2.3 From 1be88255a71f92f79e027b12609ca36999a21d7c Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Mon, 23 Jun 2014 21:10:59 +0200 Subject: uninorth: Fix PCI hole size Fix PCI hole size to match that what is found on real hardware. (OpenBIOS already uses the correct length.) Signed-off-by: BALATON Zoltan Signed-off-by: Alexander Graf --- hw/pci-host/uninorth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c index e72fe2a70b..21f805f314 100644 --- a/hw/pci-host/uninorth.c +++ b/hw/pci-host/uninorth.c @@ -230,7 +230,7 @@ PCIBus *pci_pmac_init(qemu_irq *pic, d = UNI_NORTH_PCI_HOST_BRIDGE(dev); memory_region_init(&d->pci_mmio, OBJECT(d), "pci-mmio", 0x100000000ULL); memory_region_init_alias(&d->pci_hole, OBJECT(d), "pci-hole", &d->pci_mmio, - 0x80000000ULL, 0x70000000ULL); + 0x80000000ULL, 0x10000000ULL); memory_region_add_subregion(address_space_mem, 0x80000000ULL, &d->pci_hole); -- cgit v1.2.3 From f6c3ebcc3b117311389bac58cffc4ad7c3016e1c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 25 Jun 2014 17:41:50 +1000 Subject: target-ppc: Add support for POWER8 pvr 0x4D0000 At the moment QEMU knows about one version of POWER8 CPU with PVR 0x4B.0000. This CPU class is defined as "POWER8". The linux kernel names it as "POWER8E" which is different from the name QEMU uses. Now we get another version of POWER8 which is architecturally equivalent to POWER8E but has different PVR 0x4D.0000 so QEMU fails to find a PPC CPU class on these machines. The linux kernel names these CPUs as "POWER8". This renames the existing "POWER8" to "POWER8E" to be more precise and stay in sync with the linux kernel. This adds a new "POWER8" family which calls POWER8E class init function and defines own PVR mask (used to match a CPU class) and desc (used to create dynamic version-less CPU class). This does not change CPU class fw_name attribute as the host POWER8 firmware keeps using "PowerPC,POWER8" on both POWER8 and POWER8E. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- target-ppc/cpu-models.c | 3 +++ target-ppc/cpu-models.h | 7 +++++-- target-ppc/translate_init.c | 20 ++++++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c index 97a81d8660..9a91af9dbf 100644 --- a/target-ppc/cpu-models.c +++ b/target-ppc/cpu-models.c @@ -1138,6 +1138,8 @@ "POWER7 v2.3") POWERPC_DEF("POWER7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7P, "POWER7+ v2.1") + POWERPC_DEF("POWER8E_v1.0", CPU_POWERPC_POWER8E_v10, POWER8E, + "POWER8E v1.0") POWERPC_DEF("POWER8_v1.0", CPU_POWERPC_POWER8_v10, POWER8, "POWER8 v1.0") POWERPC_DEF("970", CPU_POWERPC_970, 970, @@ -1386,6 +1388,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "POWER5gs", "POWER5+" }, { "POWER7", "POWER7_v2.3" }, { "POWER7+", "POWER7+_v2.1" }, + { "POWER8E", "POWER8E_v1.0" }, { "POWER8", "POWER8_v1.0" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h index db75896012..c39d03a504 100644 --- a/target-ppc/cpu-models.h +++ b/target-ppc/cpu-models.h @@ -559,9 +559,12 @@ enum { CPU_POWERPC_POWER7P_BASE = 0x004A0000, CPU_POWERPC_POWER7P_MASK = 0xFFFF0000, CPU_POWERPC_POWER7P_v21 = 0x004A0201, - CPU_POWERPC_POWER8_BASE = 0x004B0000, + CPU_POWERPC_POWER8E_BASE = 0x004B0000, + CPU_POWERPC_POWER8E_MASK = 0xFFFF0000, + CPU_POWERPC_POWER8E_v10 = 0x004B0100, + CPU_POWERPC_POWER8_BASE = 0x004D0000, CPU_POWERPC_POWER8_MASK = 0xFFFF0000, - CPU_POWERPC_POWER8_v10 = 0x004B0100, + CPU_POWERPC_POWER8_v10 = 0x004D0100, CPU_POWERPC_970 = 0x00390202, CPU_POWERPC_970FX_v10 = 0x00391100, CPU_POWERPC_970FX_v20 = 0x003C0200, diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 594f7ac8d1..a3bb336e16 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8189,16 +8189,16 @@ static void init_proc_POWER8(CPUPPCState *env) init_proc_book3s_64(env, BOOK3S_CPU_POWER8); } -POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER8E)(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); dc->fw_name = "PowerPC,POWER8"; - dc->desc = "POWER8"; + dc->desc = "POWER8E"; dc->props = powerpc_servercpu_properties; - pcc->pvr = CPU_POWERPC_POWER8_BASE; - pcc->pvr_mask = CPU_POWERPC_POWER8_MASK; + pcc->pvr = CPU_POWERPC_POWER8E_BASE; + pcc->pvr_mask = CPU_POWERPC_POWER8E_MASK; pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06; pcc->init_proc = init_proc_POWER8; pcc->check_pow = check_pow_nocheck; @@ -8252,6 +8252,18 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; } + +POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + ppc_POWER8E_cpu_family_class_init(oc, data); + + dc->desc = "POWER8"; + pcc->pvr = CPU_POWERPC_POWER8_BASE; + pcc->pvr_mask = CPU_POWERPC_POWER8_MASK; +} #endif /* defined (TARGET_PPC64) */ -- cgit v1.2.3 From 6ca1502e3694cdb79f7143f700e3dc60a5d73539 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 25 Jun 2014 14:10:24 +1000 Subject: spapr: Fix code design style (s/SPAPRMachine/sPAPRMachineState) Every single sPAPR QOM object has small first "s". Most (not all yet) QOM objects have "State" suffix. This replaces SPAPRMachine with sPAPRMachineState to conform with QEMU code style and removes redundant empty line. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f4d464a94a..cfaa63c424 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -85,16 +85,16 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) +typedef struct sPAPRMachineState sPAPRMachineState; -typedef struct SPAPRMachine SPAPRMachine; #define TYPE_SPAPR_MACHINE "spapr-machine" #define SPAPR_MACHINE(obj) \ - OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE) + OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) /** - * SPAPRMachine: + * sPAPRMachineState: */ -struct SPAPRMachine { +struct sPAPRMachineState { /*< private >*/ MachineState parent_obj; @@ -102,7 +102,6 @@ struct SPAPRMachine { char *kvm_type; }; - sPAPREnvironment *spapr; int spapr_allocate_irq(int hint, bool lsi) @@ -1622,14 +1621,14 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, static char *spapr_get_kvm_type(Object *obj, Error **errp) { - SPAPRMachine *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *sm = SPAPR_MACHINE(obj); return g_strdup(sm->kvm_type); } static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) { - SPAPRMachine *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *sm = SPAPR_MACHINE(obj); g_free(sm->kvm_type); sm->kvm_type = g_strdup(value); @@ -1663,7 +1662,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) static const TypeInfo spapr_machine_info = { .name = TYPE_SPAPR_MACHINE, .parent = TYPE_MACHINE, - .instance_size = sizeof(SPAPRMachine), + .instance_size = sizeof(sPAPRMachineState), .instance_init = spapr_machine_initfn, .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { -- cgit v1.2.3 From 6026db4501f773caaa2895cde7f93022960c7169 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 25 Jun 2014 14:08:45 +1000 Subject: spapr: Define a 2.1 pseries machine This adds a v2.1 machine to support backward compatibility for newer macines in the case if they ever be implemented. This adds a "pseries-2.1" machine as a child of the "pseries" machine and only changes visible machine name. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index cfaa63c424..8d14f6b993 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1671,9 +1671,25 @@ static const TypeInfo spapr_machine_info = { }, }; +static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->name = "pseries-2.1"; + mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1"; + mc->is_default = 0; +} + +static const TypeInfo spapr_machine_2_1_info = { + .name = TYPE_SPAPR_MACHINE "2.1", + .parent = TYPE_SPAPR_MACHINE, + .class_init = spapr_machine_2_1_class_init, +}; + static void spapr_machine_register_types(void) { type_register_static(&spapr_machine_info); + type_register_static(&spapr_machine_2_1_info); } type_init(spapr_machine_register_types) -- cgit v1.2.3 From ce3fa1eca2c60673fede2222defb4bd13a3b5b1e Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Wed, 25 Jun 2014 13:54:29 +1000 Subject: spapr: Add rtas_st_buffer utility function Add a function to write lengh + data into a buffer as required for the emulation of the RTAS ibm,get-system-parameter call. If the destination is smaller than the source, the write is truncated and success is returned. This matches the behaviour of pHyp. This will be used in following patches. Signed-off-by: Sam Bobroff Signed-off-by: Alexander Graf --- include/hw/ppc/spapr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index f71d7dec90..25ca87f244 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -412,6 +412,19 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val) stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val); } + +static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len, + uint8_t *buffer, uint16_t buffer_len) +{ + if (phys_len < 2) { + return; + } + stw_be_phys(&address_space_memory, + ppc64_phys_to_real(phys), buffer_len); + cpu_physical_memory_write(ppc64_phys_to_real(phys + 2), + buffer, MIN(buffer_len, phys_len - 2)); +} + typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, -- cgit v1.2.3 From 3052d9519000739ec25224ed1ca7498b7ff66d77 Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Wed, 25 Jun 2014 13:54:30 +1000 Subject: spapr: Fix RTAS sysparm DIAGNOSTICS_RUN_MODE This allows the ibm,get-system-parameter RTAS call to succeed for the DIAGNOSTICS_RUN_MODE system parameter. The problem can be seen with "ppc64_cpu --run-mode" from the powerpc-utils package which fails before this patch with "Machine does not support diagnostic run mode". This is corrected by using the rtas_st_buffer() function to write to the buffer. The RTAS constants are also moved out into a header file, some new constants added and the surrounding code slightly simplified. Signed-off-by: Sam Bobroff [agraf: remove some commentary] Signed-off-by: Alexander Graf --- hw/ppc/spapr_rtas.c | 18 +++++++++--------- include/hw/ppc/spapr.h | 11 +++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 4e87d02805..ac9a8601ea 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -222,8 +222,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, env->msr = 0; } -#define DIAGNOSTICS_RUN_MODE 42 - static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, @@ -233,16 +231,18 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, target_ulong parameter = rtas_ld(args, 0); target_ulong buffer = rtas_ld(args, 1); target_ulong length = rtas_ld(args, 2); - target_ulong ret = RTAS_OUT_NOT_SUPPORTED; + target_ulong ret = RTAS_OUT_SUCCESS; switch (parameter) { - case DIAGNOSTICS_RUN_MODE: - if (length == 1) { - rtas_st(buffer, 0, 0); - ret = RTAS_OUT_SUCCESS; - } + case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: { + uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED; + + rtas_st_buffer(buffer, length, ¶m_val, sizeof(param_val)); break; } + default: + ret = RTAS_OUT_NOT_SUPPORTED; + } rtas_st(rets, 0, ret); } @@ -257,7 +257,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, target_ulong ret = RTAS_OUT_NOT_SUPPORTED; switch (parameter) { - case DIAGNOSTICS_RUN_MODE: + case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: ret = RTAS_OUT_NOT_AUTHORIZED; break; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 25ca87f244..d043771616 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -397,6 +397,17 @@ static inline int spapr_allocate_lsi(int hint) #define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x21) +/* RTAS ibm,get-system-parameter token values */ +#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42 + +/* Possible values for the platform-processor-diagnostics-run-mode parameter + * of the RTAS ibm,get-system-parameter call. + */ +#define DIAGNOSTICS_RUN_MODE_DISABLED 0 +#define DIAGNOSTICS_RUN_MODE_STAGGERED 1 +#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2 +#define DIAGNOSTICS_RUN_MODE_PERIODIC 3 + static inline uint64_t ppc64_phys_to_real(uint64_t addr) { return addr & ~0xF000000000000000ULL; -- cgit v1.2.3 From b907d7b0fdc8b2bbb93a37ac4b5c68a44f22e8ac Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Wed, 25 Jun 2014 13:54:31 +1000 Subject: spapr: Add RTAS sysparm UUID Add support for the UUID parameter to the emulated RTAS call ibm,get-system-parameter. Return the guest's UUID as the value for the RTAS UUID system parameter, or null (a zero length result) if it is not set. Signed-off-by: Sam Bobroff Signed-off-by: Alexander Graf --- hw/ppc/spapr_rtas.c | 4 ++++ include/hw/ppc/spapr.h | 1 + 2 files changed, 5 insertions(+) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index ac9a8601ea..dba4e2b20b 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -240,6 +240,9 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, rtas_st_buffer(buffer, length, ¶m_val, sizeof(param_val)); break; } + case RTAS_SYSPARM_UUID: + rtas_st_buffer(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0)); + break; default: ret = RTAS_OUT_NOT_SUPPORTED; } @@ -258,6 +261,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, switch (parameter) { case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: + case RTAS_SYSPARM_UUID: ret = RTAS_OUT_NOT_AUTHORIZED; break; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index d043771616..46b7a71734 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -399,6 +399,7 @@ static inline int spapr_allocate_lsi(int hint) /* RTAS ibm,get-system-parameter token values */ #define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42 +#define RTAS_SYSPARM_UUID 48 /* Possible values for the platform-processor-diagnostics-run-mode parameter * of the RTAS ibm,get-system-parameter call. -- cgit v1.2.3 From 3b50d8974b979bbaa091978e70d83de939593d1f Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Wed, 25 Jun 2014 13:54:32 +1000 Subject: spapr: Add RTAS sysparm SPLPAR Characteristics Add support for the SPLPAR Characteristics parameter to the emulated RTAS call ibm,get-system-parameter. The support provides just enough information to allow "cat /proc/powerpc/lparcfg" to succeed without generating a kernel error message. Without this patch the above command will produce the following kernel message: arch/powerpc/platforms/pseries/lparcfg.c \ parse_system_parameter_string Error calling get-system-parameter \ (0xfffffffd) Signed-off-by: Sam Bobroff Signed-off-by: Alexander Graf --- hw/ppc/spapr_rtas.c | 8 ++++++++ include/hw/ppc/spapr.h | 1 + 2 files changed, 9 insertions(+) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index dba4e2b20b..9ba1ba69f9 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -234,6 +234,13 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, target_ulong ret = RTAS_OUT_SUCCESS; switch (parameter) { + case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: { + char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d", + max_cpus, smp_cpus); + rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val)); + g_free(param_val); + break; + } case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: { uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED; @@ -260,6 +267,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, target_ulong ret = RTAS_OUT_NOT_SUPPORTED; switch (parameter) { + case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: case RTAS_SYSPARM_UUID: ret = RTAS_OUT_NOT_AUTHORIZED; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 46b7a71734..3b6ccd395a 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -398,6 +398,7 @@ static inline int spapr_allocate_lsi(int hint) #define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x21) /* RTAS ibm,get-system-parameter token values */ +#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20 #define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42 #define RTAS_SYSPARM_UUID 48 -- cgit v1.2.3 From 4af88944d013330910826af10aaa2ef9a2919fde Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:12 +1000 Subject: xics: Add flags for interrupts The existing interrupt allocation scheme in SPAPR assumes that interrupts are allocated at the start time, continously and the config will not change. However, there are cases when this is not going to work such as: 1. migration - we will have to have an ability to choose interrupt numbers for devices in the command line and this will create gaps in interrupt space. 2. PCI hotplug - interrupts from unplugged device need to be returned back to interrupt pool, otherwise we will quickly run out of interrupts. This replaces a separate lslsi[] array with a byte in the ICSIRQState struct and defines "LSI" and "MSI" flags. Neither of these flags set signals that the descriptor is not allocated and not in use. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/intc/xics.c | 26 ++++++++++++++++++-------- hw/intc/xics_kvm.c | 5 ++--- include/hw/ppc/xics.h | 6 +++++- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 493a2a47c5..5220d4f363 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val) { ICSState *ics = (ICSState *)opaque; - if (ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { set_irq_lsi(ics, srcno, val); } else { set_irq_msi(ics, srcno, val); @@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server, trace_xics_ics_write_xive(nr, srcno, server, priority); - if (ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { write_xive_lsi(ics, srcno); } else { write_xive_msi(ics, srcno); @@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics) for (i = 0; i < ics->nr_irqs; i++) { /* FIXME: filter by server#? */ - if (ics->islsi[i]) { + if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { resend_lsi(ics, i); } else { resend_msi(ics, i); @@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr) trace_xics_ics_eoi(nr); - if (ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { irq->status &= ~XICS_STATUS_SENT; } } @@ -563,13 +563,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id) static const VMStateDescription vmstate_ics_irq = { .name = "ics/irq", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(server, ICSIRQState), VMSTATE_UINT8(priority, ICSIRQState), VMSTATE_UINT8(saved_priority, ICSIRQState), VMSTATE_UINT8(status, ICSIRQState), + VMSTATE_UINT8(flags, ICSIRQState), VMSTATE_END_OF_LIST() }, }; @@ -606,7 +607,6 @@ static void ics_realize(DeviceState *dev, Error **errp) return; } ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); - ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool)); ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs); } @@ -643,11 +643,21 @@ qemu_irq xics_get_qirq(XICSState *icp, int irq) return icp->ics->qirqs[irq - icp->ics->offset]; } +static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) +{ + assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK)); + + ics->irqs[srcno].flags |= + lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI; +} + void xics_set_irq_type(XICSState *icp, int irq, bool lsi) { - assert(ics_valid_irq(icp->ics, irq)); + ICSState *ics = icp->ics; + + assert(ics_valid_irq(ics, irq)); - icp->ics->islsi[irq - icp->ics->offset] = lsi; + ics_set_irq_type(ics, irq - ics->offset, lsi); } /* diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 4704c98214..5461454b30 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -220,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id) state |= KVM_XICS_MASKED; } - if (ics->islsi[i]) { + if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { state |= KVM_XICS_LEVEL_SENSITIVE; if (irq->status & XICS_STATUS_ASSERTED) { state |= KVM_XICS_PENDING; @@ -249,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val) int rc; args.irq = srcno + ics->offset; - if (!ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) { if (!val) { return; } @@ -286,7 +286,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp) return; } ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); - ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool)); ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs); } diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 85e4c8a3dd..2891599e38 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -136,7 +136,6 @@ struct ICSState { uint32_t nr_irqs; uint32_t offset; qemu_irq *qirqs; - bool *islsi; ICSIRQState *irqs; XICSState *icp; }; @@ -150,6 +149,11 @@ struct ICSIRQState { #define XICS_STATUS_REJECTED 0x4 #define XICS_STATUS_MASKED_PENDING 0x8 uint8_t status; +/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */ +#define XICS_FLAGS_IRQ_LSI 0x1 +#define XICS_FLAGS_IRQ_MSI 0x2 +#define XICS_FLAGS_IRQ_MASK 0x3 + uint8_t flags; }; #define XICS_IRQS 1024 -- cgit v1.2.3 From 641c349352cd3846ad164357d5a831e748d13536 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:13 +1000 Subject: xics: Add xics_find_source() PAPR allows having multiple interrupt sources such as PHB. This adds a source lookup function and makes use of it. Since at the moment QEMU only supports a single source, no change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/intc/xics.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 5220d4f363..c02feaf583 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -633,14 +633,32 @@ static const TypeInfo ics_info = { /* * Exported functions */ +static int xics_find_source(XICSState *icp, int irq) +{ + int sources = 1; + int src; + + /* FIXME: implement multiple sources */ + for (src = 0; src < sources; ++src) { + ICSState *ics = &icp->ics[src]; + if (ics_valid_irq(ics, irq)) { + return src; + } + } + + return -1; +} qemu_irq xics_get_qirq(XICSState *icp, int irq) { - if (!ics_valid_irq(icp->ics, irq)) { - return NULL; + int src = xics_find_source(icp, irq); + + if (src >= 0) { + ICSState *ics = &icp->ics[src]; + return ics->qirqs[irq - ics->offset]; } - return icp->ics->qirqs[irq - icp->ics->offset]; + return NULL; } static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) @@ -653,10 +671,12 @@ static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) void xics_set_irq_type(XICSState *icp, int irq, bool lsi) { - ICSState *ics = icp->ics; + int src = xics_find_source(icp, irq); + ICSState *ics; - assert(ics_valid_irq(ics, irq)); + assert(src >= 0); + ics = &icp->ics[src]; ics_set_irq_type(ics, irq - ics->offset, lsi); } -- cgit v1.2.3 From a7e519a8cf12c9f08a28339743b648dde38cd9d3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:14 +1000 Subject: xics: Disable flags reset on xics reset Since islsi[] array has been merged into the ICSState struct, we must not reset flags as they tell if the interrupt is in use. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/intc/xics.c | 7 +++++++ hw/intc/xics_kvm.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index c02feaf583..634101ac23 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev) { ICSState *ics = ICS(dev); int i; + uint8_t flags[ics->nr_irqs]; + + for (i = 0; i < ics->nr_irqs; i++) { + flags[i] = ics->irqs[i].flags; + } memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs); + for (i = 0; i < ics->nr_irqs; i++) { ics->irqs[i].priority = 0xff; ics->irqs[i].saved_priority = 0xff; + ics->irqs[i].flags = flags[i]; } } diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 5461454b30..20b19e9d4f 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -267,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev) { ICSState *ics = ICS(dev); int i; + uint8_t flags[ics->nr_irqs]; + + for (i = 0; i < ics->nr_irqs; i++) { + flags[i] = ics->irqs[i].flags; + } memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs); + for (i = 0; i < ics->nr_irqs; i++) { ics->irqs[i].priority = 0xff; ics->irqs[i].saved_priority = 0xff; + ics->irqs[i].flags = flags[i]; } ics_set_kvm_state(ics, 1); -- cgit v1.2.3 From bee763dbfb8cfceea112131970da07f215f293a6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:15 +1000 Subject: spapr: Move interrupt allocator to xics The current allocator returns IRQ numbers from a pool and does not support IRQs reuse in any form as it did not keep track of what it previously returned, it only keeps the last returned IRQ. Some use cases such as PCI hot(un)plug may require IRQ release and reallocation. This moves an allocator from SPAPR to XICS. This switches IRQ users to use new API. This uses LSI/MSI flags to know if interrupt is allocated. The interrupt release function will be posted as a separate patch. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/intc/xics.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr.c | 67 -------------------------------------- hw/ppc/spapr_events.c | 2 +- hw/ppc/spapr_pci.c | 6 ++-- hw/ppc/spapr_vio.c | 2 +- include/hw/ppc/spapr.h | 10 ------ include/hw/ppc/xics.h | 2 ++ trace-events | 4 +++ 8 files changed, 99 insertions(+), 82 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 634101ac23..6cd980a735 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -687,6 +687,94 @@ void xics_set_irq_type(XICSState *icp, int irq, bool lsi) ics_set_irq_type(ics, irq - ics->offset, lsi); } +#define ICS_IRQ_FREE(ics, srcno) \ + (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) + +static int ics_find_free_block(ICSState *ics, int num, int alignnum) +{ + int first, i; + + for (first = 0; first < ics->nr_irqs; first += alignnum) { + if (num > (ics->nr_irqs - first)) { + return -1; + } + for (i = first; i < first + num; ++i) { + if (!ICS_IRQ_FREE(ics, i)) { + break; + } + } + if (i == (first + num)) { + return first; + } + } + + return -1; +} + +int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi) +{ + ICSState *ics = &icp->ics[src]; + int irq; + + if (irq_hint) { + assert(src == xics_find_source(icp, irq_hint)); + if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { + trace_xics_alloc_failed_hint(src, irq_hint); + return -1; + } + irq = irq_hint; + } else { + irq = ics_find_free_block(ics, 1, 1); + if (irq < 0) { + trace_xics_alloc_failed_no_left(src); + return -1; + } + irq += ics->offset; + } + + ics_set_irq_type(ics, irq - ics->offset, lsi); + trace_xics_alloc(src, irq); + + return irq; +} + +/* + * Allocate block of consequtive IRQs, returns a number of the first. + * If align==true, aligns the first IRQ number to num. + */ +int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align) +{ + int i, first = -1; + ICSState *ics = &icp->ics[src]; + + assert(src == 0); + /* + * MSIMesage::data is used for storing VIRQ so + * it has to be aligned to num to support multiple + * MSI vectors. MSI-X is not affected by this. + * The hint is used for the first IRQ, the rest should + * be allocated continuously. + */ + if (align) { + assert((num == 1) || (num == 2) || (num == 4) || + (num == 8) || (num == 16) || (num == 32)); + first = ics_find_free_block(ics, num, num); + } else { + first = ics_find_free_block(ics, num, 1); + } + + if (first >= 0) { + for (i = first; i < first + num; ++i) { + ics_set_irq_type(ics, i, lsi); + } + } + first += ics->offset; + + trace_xics_alloc_block(src, first, num, lsi, align); + + return first; +} + /* * Guest interfaces */ diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8d14f6b993..ea9bda9a68 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -104,73 +104,6 @@ struct sPAPRMachineState { sPAPREnvironment *spapr; -int spapr_allocate_irq(int hint, bool lsi) -{ - int irq; - - if (hint) { - irq = hint; - if (hint >= spapr->next_irq) { - spapr->next_irq = hint + 1; - } - /* FIXME: we should probably check for collisions somehow */ - } else { - irq = spapr->next_irq++; - } - - /* Configure irq type */ - if (!xics_get_qirq(spapr->icp, irq)) { - return 0; - } - - xics_set_irq_type(spapr->icp, irq, lsi); - - return irq; -} - -/* - * Allocate block of consequtive IRQs, returns a number of the first. - * If msi==true, aligns the first IRQ number to num. - */ -int spapr_allocate_irq_block(int num, bool lsi, bool msi) -{ - int first = -1; - int i, hint = 0; - - /* - * MSIMesage::data is used for storing VIRQ so - * it has to be aligned to num to support multiple - * MSI vectors. MSI-X is not affected by this. - * The hint is used for the first IRQ, the rest should - * be allocated continuously. - */ - if (msi) { - assert((num == 1) || (num == 2) || (num == 4) || - (num == 8) || (num == 16) || (num == 32)); - hint = (spapr->next_irq + num - 1) & ~(num - 1); - } - - for (i = 0; i < num; ++i) { - int irq; - - irq = spapr_allocate_irq(hint, lsi); - if (!irq) { - return -1; - } - - if (0 == i) { - first = irq; - hint = 0; - } - - /* If the above doesn't create a consecutive block then that's - * an internal bug */ - assert(irq == (first + i)); - } - - return first; -} - static XICSState *try_create_xics(const char *type, int nr_servers, int nr_irqs) { diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 5ce96a7196..1b6157dec4 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -314,7 +314,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, void spapr_events_init(sPAPREnvironment *spapr) { - spapr->epow_irq = spapr_allocate_msi(0); + spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false); spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d1e3e0ff10..d115fca31e 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -360,8 +360,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, /* There is no cached config, allocate MSIs */ if (!phb->msi_table[ndev].nvec) { - irq = spapr_allocate_irq_block(req_num, false, - ret_intr_type == RTAS_TYPE_MSI); + irq = xics_alloc_block(spapr->icp, 0, req_num, false, + ret_intr_type == RTAS_TYPE_MSI); if (irq < 0) { error_report("Cannot allocate MSIs for device#%d", ndev); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -634,7 +634,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) for (i = 0; i < PCI_NUM_PINS; i++) { uint32_t irq; - irq = spapr_allocate_lsi(0); + irq = xics_alloc_block(spapr->icp, 0, 1, true, false); if (!irq) { error_setg(errp, "spapr_allocate_lsi failed"); return; diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 8b765c65e8..dc9e46a7b1 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -449,7 +449,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) dev->qdev.id = id; } - dev->irq = spapr_allocate_msi(dev->irq); + dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false); if (!dev->irq) { return -1; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 3b6ccd395a..6b2f21c50a 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -339,16 +339,6 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, int spapr_allocate_irq(int hint, bool lsi); int spapr_allocate_irq_block(int num, bool lsi, bool msi); -static inline int spapr_allocate_msi(int hint) -{ - return spapr_allocate_irq(hint, false); -} - -static inline int spapr_allocate_lsi(int hint) -{ - return spapr_allocate_irq(hint, true); -} - /* RTAS return codes */ #define RTAS_OUT_SUCCESS 0 #define RTAS_OUT_NO_ERRORS_FOUND 1 diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 2891599e38..30b14419a4 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -160,6 +160,8 @@ struct ICSIRQState { qemu_irq xics_get_qirq(XICSState *icp, int irq); void xics_set_irq_type(XICSState *icp, int irq, bool lsi); +int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi); +int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align); void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu); diff --git a/trace-events b/trace-events index ba01ad52cf..2a46e6c737 100644 --- a/trace-events +++ b/trace-events @@ -1188,6 +1188,10 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]" xics_ics_eoi(int nr) "ics_eoi: irq %#x" +xics_alloc(int src, int irq) "source#%d, irq %d" +xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use" +xics_alloc_failed_no_left(int src) "source#%d, no irq left" +xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d" # hw/ppc/spapr.c spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes" -- cgit v1.2.3 From ba0e5bf8de38a33ea2e09b40e0493d30b62fcbf0 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:17 +1000 Subject: spapr: Remove @next_irq This removes @next_irq from sPAPREnvironment which was used in old IRQ allocator as XICS is now responsible for IRQs and keeps track of allocated IRQs. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ea9bda9a68..a8ba916970 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -891,7 +891,7 @@ static const VMStateDescription vmstate_spapr = { .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32(next_irq, sPAPREnvironment), + VMSTATE_UNUSED(4), /* used to be @next_irq */ /* RTC offset */ VMSTATE_UINT64(rtc_offset, sPAPREnvironment), @@ -1295,7 +1295,6 @@ static void ppc_spapr_init(MachineState *machine) /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads, XICS_IRQS); - spapr->next_irq = XICS_IRQ_BASE; /* init CPUs */ if (cpu_model == NULL) { -- cgit v1.2.3 From 51bba713fe1120ba7b0542216278f10e48349589 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:18 +1000 Subject: xics: Implement xics_ics_free() This implements interrupt release function so IRQs can be returned back to the pool for reuse in cases such as PCI hot plug. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/intc/xics.c | 27 +++++++++++++++++++++++++++ include/hw/ppc/xics.h | 1 + trace-events | 2 ++ 3 files changed, 30 insertions(+) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 6cd980a735..0fd2a84c7b 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -775,6 +775,33 @@ int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align) return first; } +static void ics_free(ICSState *ics, int srcno, int num) +{ + int i; + + for (i = srcno; i < srcno + num; ++i) { + if (ICS_IRQ_FREE(ics, i)) { + trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset); + } + memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); + } +} + +void xics_free(XICSState *icp, int irq, int num) +{ + int src = xics_find_source(icp, irq); + + if (src >= 0) { + ICSState *ics = &icp->ics[src]; + + /* FIXME: implement multiple sources */ + assert(src == 0); + + trace_xics_ics_free(ics - icp->ics, irq, num); + ics_free(ics, irq - ics->offset, num); + } +} + /* * Guest interfaces */ diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 30b14419a4..a214dd7f28 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -162,6 +162,7 @@ qemu_irq xics_get_qirq(XICSState *icp, int irq); void xics_set_irq_type(XICSState *icp, int irq, bool lsi); int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi); int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align); +void xics_free(XICSState *icp, int irq, int num); void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu); diff --git a/trace-events b/trace-events index 2a46e6c737..51c08a79d9 100644 --- a/trace-events +++ b/trace-events @@ -1192,6 +1192,8 @@ xics_alloc(int src, int irq) "source#%d, irq %d" xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use" xics_alloc_failed_no_left(int src) "source#%d, no irq left" xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d" +xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs" +xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free" # hw/ppc/spapr.c spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes" -- cgit v1.2.3 From f32935ea2284fe833fdf4a0d6b2fe423fc08a1b2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:19 +1000 Subject: vmstate: Add preallocation for migrating arrays (VMS_ALLOC flag) There are few helpers already to support array migration. However they all require the destination side to preallocate arrays before migration which is not always possible due to unknown array size as it might be some sort of dynamic state. One of the examples is an array of MSIX-enabled devices in SPAPR PHB - this array may vary from 0 to 65536 entries and its size depends on guest's ability to enable MSIX or do PCI hotplug. This adds new VMSTATE_VARRAY_STRUCT_ALLOC macro which is pretty similar to VMSTATE_STRUCT_VARRAY_POINTER_INT32 but it can alloc memory for migratign array on the destination side. This defines VMS_ALLOC flag for a field. This changes vmstate_base_addr() to do the allocation when receiving migration. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Juan Quintela [agraf: drop g_malloc_n usage] Signed-off-by: Alexander Graf --- include/migration/vmstate.h | 11 +++++++++++ vmstate.c | 13 ++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 71a8a95641..9a001bd28f 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -101,6 +101,7 @@ enum VMStateFlags { VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/ VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/ VMS_MUST_EXIST = 0x1000, /* Field must exist in input */ + VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */ }; typedef struct { @@ -429,6 +430,16 @@ extern const VMStateInfo vmstate_info_bitmap; .offset = offsetof(_state, _field), \ } +#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + #define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \ .name = (stringify(_field)), \ .version_id = (_version), \ diff --git a/vmstate.c b/vmstate.c index c9965205df..ef2f87bdad 100644 --- a/vmstate.c +++ b/vmstate.c @@ -43,11 +43,18 @@ static int vmstate_size(void *opaque, VMStateField *field) return size; } -static void *vmstate_base_addr(void *opaque, VMStateField *field) +static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc) { void *base_addr = opaque + field->offset; if (field->flags & VMS_POINTER) { + if (alloc && (field->flags & VMS_ALLOC)) { + int n_elems = vmstate_n_elems(opaque, field); + if (n_elems) { + gsize size = n_elems * field->size; + *((void **)base_addr + field->start) = g_malloc(size); + } + } base_addr = *(void **)base_addr + field->start; } @@ -81,7 +88,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, field->field_exists(opaque, version_id)) || (!field->field_exists && field->version_id <= version_id)) { - void *base_addr = vmstate_base_addr(opaque, field); + void *base_addr = vmstate_base_addr(opaque, field, true); int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); @@ -135,7 +142,7 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { - void *base_addr = vmstate_base_addr(opaque, field); + void *base_addr = vmstate_base_addr(opaque, field, false); int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); -- cgit v1.2.3 From 9a321e92343891e30f6fe8bfaad40454ae358bfb Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 30 May 2014 19:34:20 +1000 Subject: spapr_pci: Use XICS interrupt allocator and do not cache interrupts in PHB Currently SPAPR PHB keeps track of all allocated MSI (here and below MSI stands for both MSI and MSIX) interrupt because XICS used to be unable to reuse interrupts. This is a problem for dynamic MSI reconfiguration which happens when guest reloads a driver or performs PCI hotplug. Another problem is that the existing implementation can enable MSI on 32 devices maximum (SPAPR_MSIX_MAX_DEVS=32) and there is no good reason for that. This makes use of new XICS ability to reuse interrupts. This reorganizes MSI information storage in sPAPRPHBState. Instead of static array of 32 descriptors (one per a PCI function), this patch adds a GHashTable when @config_addr is a key and (first_irq, num) pair is a value. GHashTable can dynamically grow and shrink so the initial limit of 32 devices is gone. This changes migration stream as @msi_table was a static array while new @msi_devs is a dynamic hash table. This adds temporary array which is used for migration, it is populated in "spapr_pci"::pre_save() callback and expanded into the hash table in post_load() callback. Since the destination side does not know the number of MSI-enabled devices in advance and cannot pre-allocate the temporary array to receive migration state, this makes use of new VMSTATE_STRUCT_VARRAY_ALLOC macro which allocates the array automatically. This resets the MSI configuration space when interrupts are released by the ibm,change-msi RTAS call. This fixed traces to be more informative. This changes vmstate_spapr_pci_msi name from "...lsi" to "...msi" which was incorrect by accident. As the internal representation changed, thus bumps migration version number. Signed-off-by: Alexey Kardashevskiy [agraf: drop g_malloc_n usage] Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 195 +++++++++++++++++++++++++------------------- include/hw/pci-host/spapr.h | 21 +++-- include/hw/ppc/spapr.h | 1 - trace-events | 5 +- 4 files changed, 129 insertions(+), 93 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d115fca31e..9ed39a93b7 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -219,37 +219,13 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_write_pci_config(spapr, 0, addr, size, val, rets); } -/* - * Find an entry with config_addr or returns the empty one if not found AND - * alloc_new is set. - * At the moment the msi_table entries are never released so there is - * no point to look till the end of the list if we need to find the free entry. - */ -static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr, - bool alloc_new) -{ - int i; - - for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) { - if (!phb->msi_table[i].nvec) { - break; - } - if (phb->msi_table[i].config_addr == config_addr) { - return i; - } - } - if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) { - trace_spapr_pci_msi("Allocating new MSI config", i, config_addr); - return i; - } - - return -1; -} - /* * Set MSI/MSIX message data. * This is required for msi_notify()/msix_notify() which * will write at the addresses via spapr_msi_write(). + * + * If hwaddr == 0, all entries will have .data == first_irq i.e. + * table will be reset. */ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, unsigned first_irq, unsigned req_num) @@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, return; } - for (i = 0; i < req_num; ++i, ++msg.data) { + for (i = 0; i < req_num; ++i) { msix_set_message(pdev, i, msg); trace_spapr_pci_msi_setup(pdev->name, i, msg.address); + if (addr) { + ++msg.data; + } } } @@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ unsigned int seq_num = rtas_ld(args, 5); unsigned int ret_intr_type; - int ndev, irq, max_irqs = 0; + unsigned int irq, max_irqs = 0, num = 0; sPAPRPHBState *phb = NULL; PCIDevice *pdev = NULL; + bool msix = false; + spapr_pci_msi *msi; + int *config_addr_key; switch (func) { case RTAS_CHANGE_MSI_FN: @@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, /* Releasing MSIs */ if (!req_num) { - ndev = spapr_msicfg_find(phb, config_addr, false); - if (ndev < 0) { - trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr); + msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + if (!msi) { + trace_spapr_pci_msi("Releasing wrong config", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); return; } - trace_spapr_pci_msi("Released MSIs", ndev, config_addr); + + xics_free(spapr->icp, msi->first_irq, msi->num); + spapr_msi_setmsg(pdev, 0, msix, 0, num); + g_hash_table_remove(phb->msi, &config_addr); + + trace_spapr_pci_msi("Released MSIs", config_addr); rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, 0); return; @@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, /* Enabling MSI */ - /* Find a device number in the map to add or reuse the existing one */ - ndev = spapr_msicfg_find(phb, config_addr, true); - if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) { - error_report("No free entry for a new MSI device"); - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); - return; - } - trace_spapr_pci_msi("Configuring MSI", ndev, config_addr); - /* Check if the device supports as many IRQs as requested */ if (ret_intr_type == RTAS_TYPE_MSI) { max_irqs = msi_nr_vectors_allocated(pdev); @@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, max_irqs = pdev->msix_entries_nr; } if (!max_irqs) { - error_report("Requested interrupt type %d is not enabled for device#%d", - ret_intr_type, ndev); + error_report("Requested interrupt type %d is not enabled for device %x", + ret_intr_type, config_addr); rtas_st(rets, 0, -1); /* Hardware error */ return; } /* Correct the number if the guest asked for too many */ if (req_num > max_irqs) { + trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs); req_num = max_irqs; + irq = 0; /* to avoid misleading trace */ + goto out; } - /* Check if there is an old config and MSI number has not changed */ - if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) { - /* Unexpected behaviour */ - error_report("Cannot reuse MSI config for device#%d", ndev); + /* Allocate MSIs */ + irq = xics_alloc_block(spapr->icp, 0, req_num, false, + ret_intr_type == RTAS_TYPE_MSI); + if (!irq) { + error_report("Cannot allocate MSIs for device %x", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); return; } - /* There is no cached config, allocate MSIs */ - if (!phb->msi_table[ndev].nvec) { - irq = xics_alloc_block(spapr->icp, 0, req_num, false, - ret_intr_type == RTAS_TYPE_MSI); - if (irq < 0) { - error_report("Cannot allocate MSIs for device#%d", ndev); - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); - return; - } - phb->msi_table[ndev].irq = irq; - phb->msi_table[ndev].nvec = req_num; - phb->msi_table[ndev].config_addr = config_addr; - } - /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX, - phb->msi_table[ndev].irq, req_num); + irq, req_num); + /* Add MSI device to cache */ + msi = g_new(spapr_pci_msi, 1); + msi->first_irq = irq; + msi->num = req_num; + config_addr_key = g_new(int, 1); + *config_addr_key = config_addr; + g_hash_table_insert(phb->msi, config_addr_key, msi); + +out: rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, req_num); rtas_st(rets, 2, ++seq_num); rtas_st(rets, 3, ret_intr_type); - trace_spapr_pci_rtas_ibm_change_msi(func, req_num); + trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq); } static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, @@ -395,25 +372,28 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, uint32_t config_addr = rtas_ld(args, 0); uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); - int ndev; sPAPRPHBState *phb = NULL; + PCIDevice *pdev = NULL; + spapr_pci_msi *msi; - /* Fins sPAPRPHBState */ + /* Find sPAPRPHBState */ phb = find_phb(spapr, buid); - if (!phb) { + if (phb) { + pdev = find_dev(spapr, buid, config_addr); + } + if (!phb || !pdev) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } /* Find device descriptor and start IRQ */ - ndev = spapr_msicfg_find(phb, config_addr, false); - if (ndev < 0) { - trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr); + msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) { + trace_spapr_pci_msi("Failed to return vector", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); return; } - - intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num; + intr_src_num = msi->first_irq + ioa_intr_num; trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num, intr_src_num); @@ -649,6 +629,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } info->finish_realize(sphb, errp); + + sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); } static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) @@ -712,22 +694,69 @@ static const VMStateDescription vmstate_spapr_pci_lsi = { }; static const VMStateDescription vmstate_spapr_pci_msi = { - .name = "spapr_pci/lsi", + .name = "spapr_pci/msi", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32(config_addr, struct spapr_pci_msi), - VMSTATE_UINT32(irq, struct spapr_pci_msi), - VMSTATE_UINT32(nvec, struct spapr_pci_msi), - + .fields = (VMStateField []) { + VMSTATE_UINT32(key, spapr_pci_msi_mig), + VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig), + VMSTATE_UINT32(value.num, spapr_pci_msi_mig), VMSTATE_END_OF_LIST() }, }; +static void spapr_pci_pre_save(void *opaque) +{ + sPAPRPHBState *sphb = opaque; + GHashTableIter iter; + gpointer key, value; + int i; + + if (sphb->msi_devs) { + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + } + sphb->msi_devs_num = g_hash_table_size(sphb->msi); + if (!sphb->msi_devs_num) { + return; + } + sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig)); + + g_hash_table_iter_init(&iter, sphb->msi); + for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { + sphb->msi_devs[i].key = *(uint32_t *) key; + sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + } +} + +static int spapr_pci_post_load(void *opaque, int version_id) +{ + sPAPRPHBState *sphb = opaque; + gpointer key, value; + int i; + + for (i = 0; i < sphb->msi_devs_num; ++i) { + key = g_memdup(&sphb->msi_devs[i].key, + sizeof(sphb->msi_devs[i].key)); + value = g_memdup(&sphb->msi_devs[i].value, + sizeof(sphb->msi_devs[i].value)); + g_hash_table_insert(sphb->msi, key, value); + } + if (sphb->msi_devs) { + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + } + sphb->msi_devs_num = 0; + + return 0; +} + static const VMStateDescription vmstate_spapr_pci = { .name = "spapr_pci", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .pre_save = spapr_pci_pre_save, + .post_load = spapr_pci_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState), VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState), @@ -737,9 +766,9 @@ static const VMStateDescription vmstate_spapr_pci = { VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState), VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0, vmstate_spapr_pci_lsi, struct spapr_pci_lsi), - VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0, - vmstate_spapr_pci_msi, struct spapr_pci_msi), - + VMSTATE_INT32(msi_devs_num, sPAPRPHBState), + VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0, + vmstate_spapr_pci_msi, spapr_pci_msi_mig), VMSTATE_END_OF_LIST() }, }; diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 6808e967bd..32f0aa7d10 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -27,8 +27,6 @@ #include "hw/pci/pci_host.h" #include "hw/ppc/xics.h" -#define SPAPR_MSIX_MAX_DEVS 32 - #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" #define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" @@ -53,6 +51,16 @@ struct sPAPRPHBClass { void (*finish_realize)(sPAPRPHBState *sphb, Error **errp); }; +typedef struct spapr_pci_msi { + uint32_t first_irq; + uint32_t num; +} spapr_pci_msi; + +typedef struct spapr_pci_msi_mig { + uint32_t key; + spapr_pci_msi value; +} spapr_pci_msi_mig; + struct sPAPRPHBState { PCIHostState parent_obj; @@ -72,11 +80,10 @@ struct sPAPRPHBState { uint32_t irq; } lsi_table[PCI_NUM_PINS]; - struct spapr_pci_msi { - uint32_t config_addr; - uint32_t irq; - uint32_t nvec; - } msi_table[SPAPR_MSIX_MAX_DEVS]; + GHashTable *msi; + /* Temporary cache for migration purposes */ + int32_t msi_devs_num; + spapr_pci_msi_mig *msi_devs; QLIST_ENTRY(sPAPRPHBState) list; }; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 6b2f21c50a..bbba51a703 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -27,7 +27,6 @@ typedef struct sPAPREnvironment { long rtas_size; void *fdt_skel; target_ulong entry_point; - uint32_t next_irq; uint64_t rtc_offset; struct PPCTimebase tb; bool has_graphics; diff --git a/trace-events b/trace-events index 51c08a79d9..d071b97dd7 100644 --- a/trace-events +++ b/trace-events @@ -1169,12 +1169,13 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride, qxl_render_update_area_done(void *cookie) "%p" # hw/ppc/spapr_pci.c -spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)" +spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)" spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64 -spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u" +spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u" spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u" spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u" spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u" +spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u" # hw/intc/xics.c xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x" -- cgit v1.2.3 From 79c0ff2cae1f24cb7e041ac2dbdcc329d2a86ba2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 4 Jun 2014 13:48:18 +0200 Subject: PPC: e500: Only create dt entries for existing serial ports When the user specifies -nodefaults he can tell us that he doesn't want any serial ports spawned by default. While we do honor that wish, we still create device tree entries for those non-existent devices. Make device tree generation depend on whether the device is actually available. Signed-off-by: Alexander Graf --- hw/ppc/e500.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index a973c18d88..bb2e75fe1b 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -316,10 +316,15 @@ static int ppce500_load_device_tree(MachineState *machine, * device it finds in the dt as serial output device. And we generate * devices in reverse order to the dt. */ - dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, - soc, mpic, "serial1", 1, false); - dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, - soc, mpic, "serial0", 0, true); + if (serial_hds[1]) { + dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, + soc, mpic, "serial1", 1, false); + } + + if (serial_hds[0]) { + dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, + soc, mpic, "serial0", 0, true); + } snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc, MPC8544_UTIL_OFFSET); -- cgit v1.2.3