diff options
Diffstat (limited to 'hw/ppc')
-rw-r--r-- | hw/ppc/e500.c | 147 | ||||
-rw-r--r-- | hw/ppc/mac_newworld.c | 5 | ||||
-rw-r--r-- | hw/ppc/mac_oldworld.c | 5 | ||||
-rw-r--r-- | hw/ppc/ppc.c | 79 | ||||
-rw-r--r-- | hw/ppc/spapr.c | 234 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 233 | ||||
-rw-r--r-- | hw/ppc/spapr_iommu.c | 181 | ||||
-rw-r--r-- | hw/ppc/spapr_pci.c | 129 | ||||
-rw-r--r-- | hw/ppc/spapr_vio.c | 6 |
9 files changed, 833 insertions, 186 deletions
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 223bab9eea..a973c18d88 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -39,7 +39,6 @@ #define EPAPR_MAGIC (0x45504150) #define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb" -#define UIMAGE_LOAD_BASE 0 #define DTC_LOAD_PAD 0x1800000 #define DTC_PAD_MASK 0xFFFFF #define DTB_MAX_SIZE (8 * 1024 * 1024) @@ -128,6 +127,8 @@ static int ppce500_load_device_tree(MachineState *machine, hwaddr addr, hwaddr initrd_base, hwaddr initrd_size, + hwaddr kernel_base, + hwaddr kernel_size, bool dry_run) { CPUPPCState *env = first_cpu->env_ptr; @@ -204,6 +205,13 @@ static int ppce500_load_device_tree(MachineState *machine, if (ret < 0) { fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n"); } + + } + + if (kernel_base != -1ULL) { + qemu_fdt_setprop_cells(fdt, "/chosen", "qemu,boot-kernel", + kernel_base >> 32, kernel_base, + kernel_size >> 32, kernel_size); } ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", @@ -392,20 +400,25 @@ typedef struct DeviceTreeParams { hwaddr addr; hwaddr initrd_base; hwaddr initrd_size; + hwaddr kernel_base; + hwaddr kernel_size; } DeviceTreeParams; static void ppce500_reset_device_tree(void *opaque) { DeviceTreeParams *p = opaque; ppce500_load_device_tree(p->machine, &p->params, p->addr, p->initrd_base, - p->initrd_size, false); + p->initrd_size, p->kernel_base, p->kernel_size, + false); } static int ppce500_prep_device_tree(MachineState *machine, PPCE500Params *params, hwaddr addr, hwaddr initrd_base, - hwaddr initrd_size) + hwaddr initrd_size, + hwaddr kernel_base, + hwaddr kernel_size) { DeviceTreeParams *p = g_new(DeviceTreeParams, 1); p->machine = machine; @@ -413,12 +426,15 @@ static int ppce500_prep_device_tree(MachineState *machine, p->addr = addr; p->initrd_base = initrd_base; p->initrd_size = initrd_size; + p->kernel_base = kernel_base; + p->kernel_size = kernel_size; qemu_register_reset(ppce500_reset_device_tree, p); /* Issue the device tree loader once, so that we get the size of the blob */ return ppce500_load_device_tree(machine, params, addr, initrd_base, - initrd_size, true); + initrd_size, kernel_base, kernel_size, + true); } /* Create -kernel TLB entries for BookE. */ @@ -603,17 +619,22 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) MemoryRegion *ram = g_new(MemoryRegion, 1); PCIBus *pci_bus; CPUPPCState *env = NULL; - uint64_t elf_entry; - uint64_t elf_lowaddr; - hwaddr entry=0; - hwaddr loadaddr=UIMAGE_LOAD_BASE; - target_long kernel_size=0; - target_ulong dt_base = 0; - target_ulong initrd_base = 0; - target_long initrd_size = 0; - target_ulong cur_base = 0; + uint64_t loadaddr; + hwaddr kernel_base = -1LL; + int kernel_size = 0; + hwaddr dt_base = 0; + hwaddr initrd_base = 0; + int initrd_size = 0; + hwaddr cur_base = 0; + char *filename; + hwaddr bios_entry = 0; + target_long bios_size; + struct boot_info *boot_info; + int dt_size; int i; - unsigned int pci_irq_nrs[4] = {1, 2, 3, 4}; + /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and + * 4 respectively */ + unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4}; qemu_irq **irqs, *mpic; DeviceState *dev; CPUPPCState *firstenv = NULL; @@ -713,12 +734,13 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) /* PCI */ dev = qdev_create(NULL, "e500-pcihost"); qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot); + qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]); qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); - sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]); - sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]); - sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]); - sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]); + for (i = 0; i < PCI_NUM_PINS; i++) { + sysbus_connect_irq(s, i, mpic[pci_irq_nrs[i]]); + } + memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET, sysbus_mmio_get_region(s, 0)); @@ -738,29 +760,24 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) /* Register spinning region */ sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL); + if (cur_base < (32 * 1024 * 1024)) { + /* u-boot occupies memory up to 32MB, so load blobs above */ + cur_base = (32 * 1024 * 1024); + } + /* Load kernel. */ if (machine->kernel_filename) { - kernel_size = load_uimage(machine->kernel_filename, &entry, - &loadaddr, NULL); - if (kernel_size < 0) { - kernel_size = load_elf(machine->kernel_filename, NULL, NULL, - &elf_entry, &elf_lowaddr, NULL, 1, - ELF_MACHINE, 0); - entry = elf_entry; - loadaddr = elf_lowaddr; - } - /* XXX try again as binary */ + kernel_base = cur_base; + kernel_size = load_image_targphys(machine->kernel_filename, + cur_base, + ram_size - cur_base); if (kernel_size < 0) { fprintf(stderr, "qemu: could not load kernel '%s'\n", machine->kernel_filename); exit(1); } - cur_base = loadaddr + kernel_size; - - /* Reserve space for dtb */ - dt_base = (cur_base + DTC_LOAD_PAD) & ~DTC_PAD_MASK; - cur_base += DTB_MAX_SIZE; + cur_base += kernel_size; } /* Load initrd. */ @@ -778,24 +795,60 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) cur_base = initrd_base + initrd_size; } - /* If we're loading a kernel directly, we must load the device tree too. */ - if (machine->kernel_filename) { - struct boot_info *boot_info; - int dt_size; - - dt_size = ppce500_prep_device_tree(machine, params, dt_base, - initrd_base, initrd_size); - if (dt_size < 0) { - fprintf(stderr, "couldn't load device tree\n"); + /* + * Smart firmware defaults ahead! + * + * We follow the following table to select which payload we execute. + * + * -kernel | -bios | payload + * ---------+-------+--------- + * N | Y | u-boot + * N | N | u-boot + * Y | Y | u-boot + * Y | N | kernel + * + * This ensures backwards compatibility with how we used to expose + * -kernel to users but allows them to run through u-boot as well. + */ + if (bios_name == NULL) { + if (machine->kernel_filename) { + bios_name = machine->kernel_filename; + } else { + bios_name = "u-boot.e500"; + } + } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + + bios_size = load_elf(filename, NULL, NULL, &bios_entry, &loadaddr, NULL, + 1, ELF_MACHINE, 0); + if (bios_size < 0) { + /* + * Hrm. No ELF image? Try a uImage, maybe someone is giving us an + * ePAPR compliant kernel + */ + kernel_size = load_uimage(filename, &bios_entry, &loadaddr, NULL); + if (kernel_size < 0) { + fprintf(stderr, "qemu: could not load firmware '%s'\n", filename); exit(1); } - assert(dt_size < DTB_MAX_SIZE); + } - boot_info = env->load_info; - boot_info->entry = entry; - boot_info->dt_base = dt_base; - boot_info->dt_size = dt_size; + /* Reserve space for dtb */ + dt_base = (loadaddr + bios_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK; + + dt_size = ppce500_prep_device_tree(machine, params, dt_base, + initrd_base, initrd_size, + kernel_base, kernel_size); + if (dt_size < 0) { + fprintf(stderr, "couldn't load device tree\n"); + exit(1); } + assert(dt_size < DTB_MAX_SIZE); + + boot_info = env->load_info; + boot_info->entry = bios_entry; + boot_info->dt_base = dt_base; + boot_info->dt_size = dt_size; if (kvm_enabled()) { kvmppc_init(); diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 4bdaa8d398..e493dc1b4b 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -72,6 +72,8 @@ #define MAX_IDE_BUS 2 #define CFG_ADDR 0xf0000510 #define TBFREQ (100UL * 1000UL * 1000UL) +#define CLOCKFREQ (266UL * 1000UL * 1000UL) +#define BUSFREQ (100UL * 1000UL * 1000UL) /* debug UniNorth */ //#define DEBUG_UNIN @@ -467,7 +469,8 @@ static void ppc_core99_init(MachineState *machine) fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ); } /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, 266000000); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 77598e44cc..4b5e905fc2 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -46,6 +46,8 @@ #define MAX_IDE_BUS 2 #define CFG_ADDR 0xf0000510 #define TBFREQ 16600000UL +#define CLOCKFREQ 266000000UL +#define BUSFREQ 66000000UL static int fw_cfg_boot_set(void *opaque, const char *boot_device) { @@ -337,7 +339,8 @@ static void ppc_heathrow_init(MachineState *machine) fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ); } /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, 266000000); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 71df471746..bec82cd7a9 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -29,9 +29,11 @@ #include "sysemu/cpus.h" #include "hw/timer/m48t59.h" #include "qemu/log.h" +#include "qemu/error-report.h" #include "hw/loader.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" +#include "trace.h" //#define PPC_DEBUG_IRQ //#define PPC_DEBUG_TB @@ -49,6 +51,8 @@ # define LOG_TB(...) do { } while (0) #endif +#define NSEC_PER_SEC 1000000000LL + static void cpu_ppc_tb_stop (CPUPPCState *env); static void cpu_ppc_tb_start (CPUPPCState *env); @@ -829,6 +833,81 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq) cpu_ppc_store_purr(cpu, 0x0000000000000000ULL); } +static void timebase_pre_save(void *opaque) +{ + PPCTimebase *tb = opaque; + uint64_t ticks = cpu_get_real_ticks(); + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + + if (!first_ppc_cpu->env.tb_env) { + error_report("No timebase object"); + return; + } + + tb->time_of_the_day_ns = get_clock_realtime(); + /* + * tb_offset is only expected to be changed by migration so + * there is no need to update it from KVM here + */ + tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset; +} + +static int timebase_post_load(void *opaque, int version_id) +{ + PPCTimebase *tb_remote = opaque; + CPUState *cpu; + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + int64_t tb_off_adj, tb_off, ns_diff; + int64_t migration_duration_ns, migration_duration_tb, guest_tb, host_ns; + unsigned long freq; + + if (!first_ppc_cpu->env.tb_env) { + error_report("No timebase object"); + return -1; + } + + freq = first_ppc_cpu->env.tb_env->tb_freq; + /* + * Calculate timebase on the destination side of migration. + * The destination timebase must be not less than the source timebase. + * We try to adjust timebase by downtime if host clocks are not + * too much out of sync (1 second for now). + */ + host_ns = get_clock_realtime(); + ns_diff = MAX(0, host_ns - tb_remote->time_of_the_day_ns); + migration_duration_ns = MIN(NSEC_PER_SEC, ns_diff); + migration_duration_tb = muldiv64(migration_duration_ns, freq, NSEC_PER_SEC); + guest_tb = tb_remote->guest_timebase + MIN(0, migration_duration_tb); + + tb_off_adj = guest_tb - cpu_get_real_ticks(); + + tb_off = first_ppc_cpu->env.tb_env->tb_offset; + trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off, + (tb_off_adj - tb_off) / freq); + + /* Set new offset to all CPUs */ + CPU_FOREACH(cpu) { + PowerPCCPU *pcpu = POWERPC_CPU(cpu); + pcpu->env.tb_env->tb_offset = tb_off_adj; + } + + return 0; +} + +const VMStateDescription vmstate_ppc_timebase = { + .name = "timebase", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .pre_save = timebase_pre_save, + .post_load = timebase_post_load, + .fields = (VMStateField []) { + VMSTATE_UINT64(guest_timebase, PPCTimebase), + VMSTATE_INT64(time_of_the_day_ns, PPCTimebase), + VMSTATE_END_OF_LIST() + }, +}; + /* Set up (once) timebase frequency (in Hz) */ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index adac5ffbfb..e06321cf15 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -34,6 +34,7 @@ #include "sysemu/kvm.h" #include "kvm_ppc.h" #include "mmu-hash64.h" +#include "qom/cpu.h" #include "hw/boards.h" #include "hw/ppc/ppc.h" @@ -53,6 +54,7 @@ #include "hw/usb.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "trace.h" #include <libfdt.h> @@ -78,13 +80,28 @@ #define TIMEBASE_FREQ 512000000ULL #define MAX_CPUS 256 -#define XICS_IRQS 1024 #define PHANDLE_XICP 0x00001111 #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) + +typedef struct SPAPRMachine SPAPRMachine; #define TYPE_SPAPR_MACHINE "spapr-machine" +#define SPAPR_MACHINE(obj) \ + OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE) + +/** + * SPAPRMachine: + */ +struct SPAPRMachine { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + char *kvm_type; +}; + sPAPREnvironment *spapr; @@ -202,34 +219,79 @@ static XICSState *xics_system_init(int nr_servers, int nr_irqs) return icp; } +static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, + int smt_threads) +{ + int i, ret = 0; + uint32_t servers_prop[smt_threads]; + uint32_t gservers_prop[smt_threads * 2]; + int index = ppc_get_vcpu_dt_id(cpu); + + if (cpu->cpu_version) { + ret = fdt_setprop(fdt, offset, "cpu-version", + &cpu->cpu_version, sizeof(cpu->cpu_version)); + if (ret < 0) { + return ret; + } + } + + /* Build interrupt servers and gservers properties */ + for (i = 0; i < smt_threads; i++) { + servers_prop[i] = cpu_to_be32(index + i); + /* Hack, direct the group queues back to cpu 0 */ + gservers_prop[i*2] = cpu_to_be32(index + i); + gservers_prop[i*2 + 1] = 0; + } + ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(servers_prop)); + if (ret < 0) { + return ret; + } + ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", + gservers_prop, sizeof(gservers_prop)); + + return ret; +} + static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) { - int ret = 0, offset; - CPUState *cpu; + int ret = 0, offset, cpus_offset; + CPUState *cs; char cpu_model[32]; int smt = kvmppc_smt_threads(); uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; - CPU_FOREACH(cpu) { - DeviceClass *dc = DEVICE_GET_CLASS(cpu); - int index = ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int index = ppc_get_vcpu_dt_id(cpu); uint32_t associativity[] = {cpu_to_be32(0x5), cpu_to_be32(0x0), cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(cpu->numa_node), + cpu_to_be32(cs->numa_node), cpu_to_be32(index)}; if ((index % smt) != 0) { continue; } - snprintf(cpu_model, 32, "/cpus/%s@%x", dc->fw_name, - index); + snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index); - offset = fdt_path_offset(fdt, cpu_model); + cpus_offset = fdt_path_offset(fdt, "/cpus"); + if (cpus_offset < 0) { + cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "cpus"); + if (cpus_offset < 0) { + return cpus_offset; + } + } + offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model); if (offset < 0) { - return offset; + offset = fdt_add_subnode(fdt, cpus_offset, cpu_model); + if (offset < 0) { + return offset; + } } if (nb_numa_nodes > 1) { @@ -245,6 +307,12 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) if (ret < 0) { return ret; } + + ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, + ppc_get_compat_smt_threads(cpu)); + if (ret < 0) { + return ret; + } } return ret; } @@ -293,6 +361,10 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, } \ } while (0) +static void add_str(GString *s, const gchar *s1) +{ + g_string_append_len(s, s1, strlen(s1) + 1); +} static void *spapr_create_fdt_skel(hwaddr initrd_base, hwaddr initrd_size, @@ -306,13 +378,26 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, CPUState *cs; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); - char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" - "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode"; - char qemu_hypertas_prop[] = "hcall-memop1"; + GString *hypertas = g_string_sized_new(256); + GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; - int i, smt = kvmppc_smt_threads(); + int smt = kvmppc_smt_threads(); unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; + QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); + unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; + uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + + add_str(hypertas, "hcall-pft"); + add_str(hypertas, "hcall-term"); + add_str(hypertas, "hcall-dabr"); + add_str(hypertas, "hcall-interrupt"); + add_str(hypertas, "hcall-tce"); + add_str(hypertas, "hcall-vio"); + add_str(hypertas, "hcall-splpar"); + add_str(hypertas, "hcall-bulk"); + add_str(hypertas, "hcall-set-mode"); + add_str(qemu_hypertas, "hcall-memop1"); fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create(fdt, FDT_MAX_SIZE))); @@ -375,8 +460,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, DeviceClass *dc = DEVICE_GET_CLASS(cs); PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); int index = ppc_get_vcpu_dt_id(cpu); - uint32_t servers_prop[smp_threads]; - uint32_t gservers_prop[smp_threads * 2]; char *nodename; uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), 0xffffffff, 0xffffffff}; @@ -425,18 +508,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_string(fdt, "status", "okay"))); _FDT((fdt_property(fdt, "64-bit", NULL, 0))); - /* Build interrupt servers and gservers properties */ - for (i = 0; i < smp_threads; i++) { - servers_prop[i] = cpu_to_be32(index + i); - /* Hack, direct the group queues back to cpu 0 */ - gservers_prop[i*2] = cpu_to_be32(index + i); - gservers_prop[i*2 + 1] = 0; - } - _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s", - servers_prop, sizeof(servers_prop)))); - _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s", - gservers_prop, sizeof(gservers_prop)))); - if (env->spr_cb[SPR_PURR].oea_read) { _FDT((fdt_property(fdt, "ibm,purr", NULL, 0))); } @@ -470,6 +541,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, page_sizes_prop, page_sizes_prop_size))); } + _FDT((fdt_property_cell(fdt, "ibm,chip-id", + cs->cpu_index / cpus_per_socket))); + _FDT((fdt_end_node(fdt))); } @@ -478,10 +552,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, /* RTAS */ _FDT((fdt_begin_node(fdt, "rtas"))); - _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop, - sizeof(hypertas_prop)))); - _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop, - sizeof(qemu_hypertas_prop)))); + if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { + add_str(hypertas, "hcall-multi-tce"); + } + _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str, + hypertas->len))); + g_string_free(hypertas, TRUE); + _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str, + qemu_hypertas->len))); + g_string_free(qemu_hypertas, TRUE); _FDT((fdt_property(fdt, "ibm,associativity-reference-points", refpoints, sizeof(refpoints)))); @@ -521,12 +600,68 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, /* event-sources */ spapr_events_fdt_skel(fdt, epow_irq); + /* /hypervisor node */ + if (kvm_enabled()) { + uint8_t hypercall[16]; + + /* indicate KVM hypercall interface */ + _FDT((fdt_begin_node(fdt, "hypervisor"))); + _FDT((fdt_property_string(fdt, "compatible", "linux,kvm"))); + if (kvmppc_has_cap_fixup_hcalls()) { + /* + * Older KVM versions with older guest kernels were broken with the + * magic page, don't allow the guest to map it. + */ + kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, + sizeof(hypercall)); + _FDT((fdt_property(fdt, "hcall-instructions", hypercall, + sizeof(hypercall)))); + } + _FDT((fdt_end_node(fdt))); + } + _FDT((fdt_end_node(fdt))); /* close root node */ _FDT((fdt_finish(fdt))); return fdt; } +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size) +{ + void *fdt, *fdt_skel; + sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; + + size -= sizeof(hdr); + + /* Create sceleton */ + fdt_skel = g_malloc0(size); + _FDT((fdt_create(fdt_skel, size))); + _FDT((fdt_begin_node(fdt_skel, ""))); + _FDT((fdt_end_node(fdt_skel))); + _FDT((fdt_finish(fdt_skel))); + fdt = g_malloc0(size); + _FDT((fdt_open_into(fdt_skel, fdt, size))); + g_free(fdt_skel); + + /* Fix skeleton up */ + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); + + /* Pack resulting tree */ + _FDT((fdt_pack(fdt))); + + if (fdt_totalsize(fdt) + sizeof(hdr) > size) { + trace_spapr_cas_failed(size); + return -1; + } + + cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); + cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); + trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); + g_free(fdt); + + return 0; +} + static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) { uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0), @@ -817,14 +952,14 @@ static int spapr_vga_init(PCIBus *pci_bus) static const VMStateDescription vmstate_spapr = { .name = "spapr", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(next_irq, sPAPREnvironment), /* RTC offset */ VMSTATE_UINT64(rtc_offset, sPAPREnvironment), - + VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2), VMSTATE_END_OF_LIST() }, }; @@ -1251,6 +1386,12 @@ static void ppc_spapr_init(MachineState *machine) kvmppc_set_papr(cpu); } + if (cpu->max_compat) { + if (ppc_set_compat(cpu, cpu->max_compat) < 0) { + exit(1); + } + } + xics_cpu_setup(spapr->icp, cpu); qemu_register_reset(spapr_cpu_reset, cpu); @@ -1475,6 +1616,27 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, return NULL; } +static char *spapr_get_kvm_type(Object *obj, Error **errp) +{ + SPAPRMachine *sm = SPAPR_MACHINE(obj); + + return g_strdup(sm->kvm_type); +} + +static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) +{ + SPAPRMachine *sm = SPAPR_MACHINE(obj); + + g_free(sm->kvm_type); + sm->kvm_type = g_strdup(value); +} + +static void spapr_machine_initfn(Object *obj) +{ + object_property_add_str(obj, "kvm-type", + spapr_get_kvm_type, spapr_set_kvm_type, NULL); +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1497,6 +1659,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) static const TypeInfo spapr_machine_info = { .name = TYPE_SPAPR_MACHINE, .parent = TYPE_MACHINE, + .instance_size = sizeof(SPAPRMachine), + .instance_init = spapr_machine_initfn, .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 0bae0535e8..795207799d 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -3,6 +3,9 @@ #include "helper_regs.h" #include "hw/ppc/spapr.h" #include "mmu-hash64.h" +#include "cpu-models.h" +#include "trace.h" +#include "kvm_ppc.h" struct SPRSyncState { CPUState *cs; @@ -709,47 +712,218 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } +static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + CPUState *cs; + + if (value1) { + return H_P3; + } + if (value2) { + return H_P4; + } + + switch (mflags) { + case H_SET_MODE_ENDIAN_BIG: + CPU_FOREACH(cs) { + set_spr(cs, SPR_LPCR, 0, LPCR_ILE); + } + return H_SUCCESS; + + case H_SET_MODE_ENDIAN_LITTLE: + CPU_FOREACH(cs) { + set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE); + } + return H_SUCCESS; + } + + return H_UNSUPPORTED_FLAG; +} + +static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + CPUState *cs; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + target_ulong prefix; + + if (!(pcc->insns_flags2 & PPC2_ISA207S)) { + return H_P2; + } + if (value1) { + return H_P3; + } + if (value2) { + return H_P4; + } + + switch (mflags) { + case H_SET_MODE_ADDR_TRANS_NONE: + prefix = 0; + break; + case H_SET_MODE_ADDR_TRANS_0001_8000: + prefix = 0x18000; + break; + case H_SET_MODE_ADDR_TRANS_C000_0000_0000_4000: + prefix = 0xC000000000004000; + break; + default: + return H_UNSUPPORTED_FLAG; + } + + CPU_FOREACH(cs) { + CPUPPCState *env = &POWERPC_CPU(cpu)->env; + + set_spr(cs, SPR_LPCR, mflags << LPCR_AIL_SHIFT, LPCR_AIL); + env->excp_prefix = prefix; + } + + return H_SUCCESS; +} + static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { - CPUState *cs; - target_ulong mflags = args[0]; target_ulong resource = args[1]; - target_ulong value1 = args[2]; - target_ulong value2 = args[3]; target_ulong ret = H_P2; - if (resource == H_SET_MODE_RESOURCE_LE) { - if (value1) { - ret = H_P3; - goto out; - } - if (value2) { - ret = H_P4; - goto out; - } - switch (mflags) { - case H_SET_MODE_ENDIAN_BIG: - CPU_FOREACH(cs) { - set_spr(cs, SPR_LPCR, 0, LPCR_ILE); - } - ret = H_SUCCESS; - break; + switch (resource) { + case H_SET_MODE_RESOURCE_LE: + ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]); + break; + case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: + ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0], + args[2], args[3]); + break; + } + + return ret; +} + +typedef struct { + PowerPCCPU *cpu; + uint32_t cpu_version; + int ret; +} SetCompatState; - case H_SET_MODE_ENDIAN_LITTLE: - CPU_FOREACH(cs) { - set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE); +static void do_set_compat(void *arg) +{ + SetCompatState *s = arg; + + cpu_synchronize_state(CPU(s->cpu)); + s->ret = ppc_set_compat(s->cpu, s->cpu_version); +} + +#define get_compat_level(cpuver) ( \ + ((cpuver) == CPU_POWERPC_LOGICAL_2_05) ? 2050 : \ + ((cpuver) == CPU_POWERPC_LOGICAL_2_06) ? 2060 : \ + ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \ + ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) + +static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, + sPAPREnvironment *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong list = args[0]; + PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_); + CPUState *cs; + bool cpu_match = false; + unsigned old_cpu_version = cpu_->cpu_version; + unsigned compat_lvl = 0, cpu_version = 0; + unsigned max_lvl = get_compat_level(cpu_->max_compat); + int counter; + + /* Parse PVR list */ + for (counter = 0; counter < 512; ++counter) { + uint32_t pvr, pvr_mask; + + pvr_mask = rtas_ld(list, 0); + list += 4; + pvr = rtas_ld(list, 0); + list += 4; + + trace_spapr_cas_pvr_try(pvr); + if (!max_lvl && + ((cpu_->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask))) { + cpu_match = true; + cpu_version = 0; + } else if (pvr == cpu_->cpu_version) { + cpu_match = true; + cpu_version = cpu_->cpu_version; + } else if (!cpu_match) { + /* If it is a logical PVR, try to determine the highest level */ + unsigned lvl = get_compat_level(pvr); + if (lvl) { + bool is205 = (pcc_->pcr_mask & PCR_COMPAT_2_05) && + (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05)); + bool is206 = (pcc_->pcr_mask & PCR_COMPAT_2_06) && + ((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) || + (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS))); + + if (is205 || is206) { + if (!max_lvl) { + /* User did not set the level, choose the highest */ + if (compat_lvl <= lvl) { + compat_lvl = lvl; + cpu_version = pvr; + } + } else if (max_lvl >= lvl) { + /* User chose the level, don't set higher than this */ + compat_lvl = lvl; + cpu_version = pvr; + } + } } - ret = H_SUCCESS; + } + /* Terminator record */ + if (~pvr_mask & pvr) { break; + } + } - default: - ret = H_UNSUPPORTED_FLAG; + /* For the future use: here @list points to the first capability */ + + /* Parsing finished */ + trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match, + cpu_version, pcc_->pcr_mask); + + /* Update CPUs */ + if (old_cpu_version != cpu_version) { + CPU_FOREACH(cs) { + SetCompatState s = { + .cpu = POWERPC_CPU(cs), + .cpu_version = cpu_version, + .ret = 0 + }; + + run_on_cpu(cs, do_set_compat, &s); + + if (s.ret < 0) { + fprintf(stderr, "Unable to set compatibility mode\n"); + return H_HARDWARE; + } } } -out: - return ret; + if (!cpu_version) { + return H_SUCCESS; + } + + if (!list) { + return H_SUCCESS; + } + + if (spapr_h_cas_compose_response(args[1], args[2])) { + qemu_system_reset_request(); + } + + return H_SUCCESS; } static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; @@ -831,6 +1005,9 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas); spapr_register_hypercall(H_SET_MODE, h_set_mode); + + /* ibm,client-architecture-support support */ + spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); } type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 2887ad4827..9e49ec4a5c 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -35,6 +35,9 @@ enum sPAPRTCEAccess { SPAPR_TCE_RW = 3, }; +#define IOMMU_PAGE_SIZE(shift) (1ULL << (shift)) +#define IOMMU_PAGE_MASK(shift) (~(IOMMU_PAGE_SIZE(shift) - 1)) + static QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables; static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) @@ -70,12 +73,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) if (tcet->bypass) { ret.perm = IOMMU_RW; - } else if (addr < tcet->window_size) { + } else if ((addr >> tcet->page_shift) < tcet->nb_table) { /* Check if we are in bound */ - tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT]; - ret.iova = addr & ~SPAPR_TCE_PAGE_MASK; - ret.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK; - ret.addr_mask = SPAPR_TCE_PAGE_MASK; + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + + tce = tcet->table[addr >> tcet->page_shift]; + ret.iova = addr & page_mask; + ret.translated_addr = tce & page_mask; + ret.addr_mask = ~page_mask; ret.perm = tce; } trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm, @@ -84,24 +89,14 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) return ret; } -static int spapr_tce_table_pre_load(void *opaque) -{ - sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque); - - tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT; - - return 0; -} - static const VMStateDescription vmstate_spapr_tce_table = { .name = "spapr_iommu", - .version_id = 1, - .minimum_version_id = 1, - .pre_load = spapr_tce_table_pre_load, - .fields = (VMStateField[]) { + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField []) { /* Sanity check */ VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable), - VMSTATE_UINT32_EQUAL(window_size, sPAPRTCETable), + VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable), /* IOMMU state */ VMSTATE_BOOL(bypass, sPAPRTCETable), @@ -121,28 +116,33 @@ static int spapr_tce_table_realize(DeviceState *dev) if (kvm_enabled()) { tcet->table = kvmppc_create_spapr_tce(tcet->liobn, - tcet->window_size, + tcet->nb_table << + tcet->page_shift, &tcet->fd); } if (!tcet->table) { - size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT) - * sizeof(uint64_t); + size_t table_size = tcet->nb_table * sizeof(uint64_t); tcet->table = g_malloc0(table_size); } - tcet->nb_table = tcet->window_size >> SPAPR_TCE_PAGE_SHIFT; trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd); memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops, - "iommu-spapr", UINT64_MAX); + "iommu-spapr", ram_size); QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list); + vmstate_register(DEVICE(tcet), tcet->liobn, &vmstate_spapr_tce_table, + tcet); + return 0; } -sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t window_size) +sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, + uint64_t bus_offset, + uint32_t page_shift, + uint32_t nb_table) { sPAPRTCETable *tcet; @@ -152,17 +152,19 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, size_t wi return NULL; } - if (!window_size) { + if (!nb_table) { return NULL; } tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE)); tcet->liobn = liobn; - tcet->window_size = window_size; + tcet->bus_offset = bus_offset; + tcet->page_shift = page_shift; + tcet->nb_table = nb_table; object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL); - qdev_init_nofail(DEVICE(tcet)); + object_property_set_bool(OBJECT(tcet), true, "realized", NULL); return tcet; } @@ -175,7 +177,7 @@ static void spapr_tce_table_finalize(Object *obj) if (!kvm_enabled() || (kvmppc_remove_spapr_tce(tcet->table, tcet->fd, - tcet->window_size) != 0)) { + tcet->nb_table) != 0)) { g_free(tcet->table); } } @@ -193,8 +195,7 @@ void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass) static void spapr_tce_reset(DeviceState *dev) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); - size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT) - * sizeof(uint64_t); + size_t table_size = tcet->nb_table * sizeof(uint64_t); tcet->bypass = false; memset(tcet->table, 0, table_size); @@ -204,25 +205,110 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, target_ulong tce) { IOMMUTLBEntry entry; + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; - if (ioba >= tcet->window_size) { + if (index >= tcet->nb_table) { hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x" TARGET_FMT_lx "\n", ioba); return H_PARAMETER; } - tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT] = tce; + tcet->table[index] = tce; entry.target_as = &address_space_memory, - entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK; - entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK; - entry.addr_mask = SPAPR_TCE_PAGE_MASK; + entry.iova = ioba & page_mask; + entry.translated_addr = tce & page_mask; + entry.addr_mask = ~page_mask; entry.perm = tce; memory_region_notify_iommu(&tcet->iommu, entry); return H_SUCCESS; } +static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, + sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ + int i; + target_ulong liobn = args[0]; + target_ulong ioba = args[1]; + target_ulong ioba1 = ioba; + target_ulong tce_list = args[2]; + target_ulong npages = args[3]; + target_ulong ret = H_PARAMETER; + sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); + CPUState *cs = CPU(cpu); + hwaddr page_mask, page_size; + + if (!tcet) { + return H_PARAMETER; + } + + if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) { + return H_PARAMETER; + } + + page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + page_size = IOMMU_PAGE_SIZE(tcet->page_shift); + ioba &= page_mask; + + for (i = 0; i < npages; ++i, ioba += page_size) { + target_ulong off = (tce_list & ~SPAPR_TCE_RW) + + i * sizeof(target_ulong); + target_ulong tce = ldq_phys(cs->as, off); + + ret = put_tce_emu(tcet, ioba, tce); + if (ret) { + break; + } + } + + /* Trace last successful or the first problematic entry */ + i = i ? (i - 1) : 0; + trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, + ldq_phys(cs->as, + tce_list + i * sizeof(target_ulong)), + ret); + + return ret; +} + +static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ + int i; + target_ulong liobn = args[0]; + target_ulong ioba = args[1]; + target_ulong tce_value = args[2]; + target_ulong npages = args[3]; + target_ulong ret = H_PARAMETER; + sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); + hwaddr page_mask, page_size; + + if (!tcet) { + return H_PARAMETER; + } + + if (npages > tcet->nb_table) { + return H_PARAMETER; + } + + page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + page_size = IOMMU_PAGE_SIZE(tcet->page_shift); + ioba &= page_mask; + + for (i = 0; i < npages; ++i, ioba += page_size) { + ret = put_tce_emu(tcet, ioba, tce_value); + if (ret) { + break; + } + } + trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret); + + return ret; +} + static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { @@ -232,9 +318,11 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong ret = H_PARAMETER; sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1); - if (tcet) { + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + + ioba &= page_mask; + ret = put_tce_emu(tcet, ioba, tce); } trace_spapr_iommu_put(liobn, ioba, tce, ret); @@ -245,13 +333,15 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, target_ulong *tce) { - if (ioba >= tcet->window_size) { + unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; + + if (index >= tcet->nb_table) { hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x" TARGET_FMT_lx "\n", ioba); return H_PARAMETER; } - *tce = tcet->table[ioba >> SPAPR_TCE_PAGE_SHIFT]; + *tce = tcet->table[index]; return H_SUCCESS; } @@ -265,9 +355,11 @@ static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong ret = H_PARAMETER; sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); - ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1); - if (tcet) { + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + + ioba &= page_mask; + ret = get_tce_emu(tcet, ioba, &tce); if (!ret) { args[0] = tce; @@ -316,13 +408,12 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, } return spapr_dma_dt(fdt, node_off, propname, - tcet->liobn, 0, tcet->window_size); + tcet->liobn, 0, tcet->nb_table << tcet->page_shift); } static void spapr_tce_table_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - dc->vmsd = &vmstate_spapr_tce_table; dc->init = spapr_tce_table_realize; dc->reset = spapr_tce_reset; @@ -331,6 +422,8 @@ static void spapr_tce_table_class_init(ObjectClass *klass, void *data) /* hcall-tce */ spapr_register_hypercall(H_PUT_TCE, h_put_tce); spapr_register_hypercall(H_GET_TCE, h_get_tce); + spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect); + spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce); } static TypeInfo spapr_tce_table_info = { diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a4a51d4990..988f8cb858 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -280,7 +280,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ unsigned int seq_num = rtas_ld(args, 5); unsigned int ret_intr_type; - int ndev, irq; + int ndev, irq, max_irqs = 0; sPAPRPHBState *phb = NULL; PCIDevice *pdev = NULL; @@ -333,6 +333,23 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, } trace_spapr_pci_msi("Configuring MSI", ndev, config_addr); + /* Check if the device supports as many IRQs as requested */ + if (ret_intr_type == RTAS_TYPE_MSI) { + max_irqs = msi_nr_vectors_allocated(pdev); + } else if (ret_intr_type == RTAS_TYPE_MSIX) { + max_irqs = pdev->msix_entries_nr; + } + if (!max_irqs) { + error_report("Requested interrupt type %d is not enabled for device#%d", + ret_intr_type, ndev); + rtas_st(rets, 0, -1); /* Hardware error */ + return; + } + /* Correct the number if the guest asked for too many */ + if (req_num > max_irqs) { + req_num = max_irqs; + } + /* Check if there is an old config and MSI number has not changed */ if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) { /* Unexpected behaviour */ @@ -511,6 +528,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) SysBusDevice *s = SYS_BUS_DEVICE(dev); sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); PCIHostState *phb = PCI_HOST_BRIDGE(s); + sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s); char *namebuf; int i; PCIBus *bus; @@ -575,23 +593,14 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr, &sphb->memwindow); - /* On ppc, we only have MMIO no specific IO space from the CPU - * perspective. In theory we ought to be able to embed the PCI IO - * memory region direction in the system memory space. However, - * if any of the IO BAR subregions use the old_portio mechanism, - * that won't be processed properly unless accessed from the - * system io address space. This hack to bounce things via - * system_io works around the problem until all the users of - * old_portion are updated */ + /* Initialize IO regions */ sprintf(namebuf, "%s.io", sphb->dtbusname); memory_region_init(&sphb->iospace, OBJECT(sphb), namebuf, SPAPR_PCI_IO_WIN_SIZE); - /* FIXME: fix to support multiple PHBs */ - memory_region_add_subregion(get_system_io(), 0, &sphb->iospace); sprintf(namebuf, "%s.io-alias", sphb->dtbusname); memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf, - get_system_io(), 0, SPAPR_PCI_IO_WIN_SIZE); + &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE); memory_region_add_subregion(get_system_memory(), sphb->io_win_addr, &sphb->iowindow); @@ -601,16 +610,18 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); phb->bus = bus; - sphb->dma_window_start = 0; - sphb->dma_window_size = 0x40000000; - sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn, - sphb->dma_window_size); - if (!sphb->tcet) { - error_setg(errp, "Unable to create TCE table for %s", - sphb->dtbusname); - return; - } - address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet), + /* + * Initialize PHB address space. + * By default there will be at least one subregion for default + * 32bit DMA window. + * Later the guest might want to create another DMA window + * which will become another memory subregion. + */ + sprintf(namebuf, "%s.iommu-root", sphb->dtbusname); + + memory_region_init(&sphb->iommu_root, OBJECT(sphb), + namebuf, UINT64_MAX); + address_space_init(&sphb->iommu_as, &sphb->iommu_root, sphb->dtbusname); pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); @@ -631,15 +642,49 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sphb->lsi_table[i].irq = irq; } + + if (!info->finish_realize) { + error_setg(errp, "finish_realize not defined"); + return; + } + + info->finish_realize(sphb, errp); } -static void spapr_phb_reset(DeviceState *qdev) +static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) { - SysBusDevice *s = SYS_BUS_DEVICE(qdev); - sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); + sPAPRTCETable *tcet; + tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, + 0, + SPAPR_TCE_PAGE_SHIFT, + 0x40000000 >> SPAPR_TCE_PAGE_SHIFT); + if (!tcet) { + error_setg(errp, "Unable to create TCE table for %s", + sphb->dtbusname); + return ; + } + + /* Register default 32bit DMA window */ + memory_region_add_subregion(&sphb->iommu_root, 0, + spapr_tce_get_iommu(tcet)); +} + +static int spapr_phb_children_reset(Object *child, void *opaque) +{ + DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE); + + if (dev) { + device_reset(dev); + } + + return 0; +} + +static void spapr_phb_reset(DeviceState *qdev) +{ /* Reset the IOMMU state */ - device_reset(DEVICE(sphb->tcet)); + object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL); } static Property spapr_phb_properties[] = { @@ -711,6 +756,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) { PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); hc->root_bus_path = spapr_phb_root_bus_path; dc->realize = spapr_phb_realize; @@ -719,6 +765,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_spapr_pci; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; + spc->finish_realize = spapr_phb_finish_realize; } static const TypeInfo spapr_phb_info = { @@ -726,6 +773,7 @@ static const TypeInfo spapr_phb_info = { .parent = TYPE_PCI_HOST_BRIDGE, .instance_size = sizeof(sPAPRPHBState), .class_init = spapr_phb_class_init, + .class_size = sizeof(sPAPRPHBClass), }; PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) @@ -750,6 +798,29 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) #define b_fff(x) b_x((x), 8, 3) /* function number */ #define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ +typedef struct sPAPRTCEDT { + void *fdt; + int node_off; +} sPAPRTCEDT; + +static int spapr_phb_children_dt(Object *child, void *opaque) +{ + sPAPRTCEDT *p = opaque; + sPAPRTCETable *tcet; + + tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); + if (!tcet) { + return 0; + } + + spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window", + tcet->liobn, tcet->bus_offset, + tcet->nb_table << tcet->page_shift); + /* Stop after the first window */ + + return 1; +} + int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) @@ -805,6 +876,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges))); _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); + _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS)); /* Build the interrupt-map, this must matches what is done * in pci_spapr_map_irq @@ -829,9 +901,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, sizeof(interrupt_map))); - spapr_dma_dt(fdt, bus_off, "ibm,dma-window", - phb->dma_liobn, phb->dma_window_start, - phb->dma_window_size); + object_child_foreach(OBJECT(phb), spapr_phb_children_dt, + &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off })); return 0; } diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index bce8d7f4e0..04e16ae04d 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -456,7 +456,11 @@ static int spapr_vio_busdev_init(DeviceState *qdev) if (pc->rtce_window_size) { uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg; - dev->tcet = spapr_tce_new_table(qdev, liobn, pc->rtce_window_size); + dev->tcet = spapr_tce_new_table(qdev, liobn, + 0, + SPAPR_TCE_PAGE_SHIFT, + pc->rtce_window_size >> + SPAPR_TCE_PAGE_SHIFT); address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id); } |