diff options
author | Blue Swirl <blauwirbel@gmail.com> | 2011-11-01 20:57:01 +0000 |
---|---|---|
committer | Blue Swirl <blauwirbel@gmail.com> | 2011-11-01 20:57:01 +0000 |
commit | e927dab1fda0eee41fa5fe51ae98293f66a86db1 (patch) | |
tree | f4e2c0f47ef3f0e4821d019bb796a1a1c0c225a8 /hw | |
parent | 2ff6458116546ced7ce00cc39423ee30b1477e67 (diff) | |
parent | 3384f95c59e5db381cf3e605c8acec71baf0e6b8 (diff) |
Merge branch 'ppc-next' of git://repo.or.cz/qemu/agraf
* 'ppc-next' of git://repo.or.cz/qemu/agraf: (24 commits)
pseries: Add partial support for PCI
ppc: Alter CPU state to mask out TCG unimplemented instructions as appropriate
pseries: Allow writes to KVM accelerated TCE table
KVM: PPC: Override host vmx/vsx/dfp only when information known
ppc: Fix up usermode only builds
pseries: Correct vmx/dfp handling in both KVM and TCG cases
PPC: Fail configure when libfdt is not available
ppc: Avoid decrementer related kvm exits
PPC: Disable non-440 CPUs for ppcemb target
PPC: Bump qemu-system-ppc to 64-bit physical address space
pseries: Under kvm use guest cpu = host cpu by default
ppc: Add cpu defs for POWER7 revisions 2.1 and 2.3
ppc: First cut implementation of -cpu host
ppc: Remove broken partial PVR matching
pseries: Update SLOF firmware image
pseries: Add device tree properties for VMX/VSX and DFP under kvm
ppc: Generalize the kvmppc_get_clockfreq() function
Set an invalid-bits mask for each SPE instructions
pseries: Update SLOF firmware image
pseries: Use Book3S-HV TCE acceleration capabilities
...
Diffstat (limited to 'hw')
-rw-r--r-- | hw/ppc.c | 6 | ||||
-rw-r--r-- | hw/ppce500_pci.c | 82 | ||||
-rw-r--r-- | hw/spapr.c | 135 | ||||
-rw-r--r-- | hw/spapr.h | 2 | ||||
-rw-r--r-- | hw/spapr_pci.c | 508 | ||||
-rw-r--r-- | hw/spapr_pci.h | 61 | ||||
-rw-r--r-- | hw/spapr_vio.c | 8 | ||||
-rw-r--r-- | hw/spapr_vio.h | 1 |
8 files changed, 762 insertions, 41 deletions
@@ -662,6 +662,12 @@ static void __cpu_ppc_store_decr (CPUState *env, uint64_t *nextp, LOG_TB("%s: %08" PRIx32 " => %08" PRIx32 "\n", __func__, decr, value); + + if (kvm_enabled()) { + /* KVM handles decrementer exceptions, we don't need our own timer */ + return; + } + now = qemu_get_clock_ns(vm_clock); next = now + muldiv64(value, get_ticks_per_sec(), tb_env->decr_freq); if (is_excp) { diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 2db365d0b6..960a5d0c60 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -89,6 +89,7 @@ static uint32_t pci_reg_read4(void *opaque, target_phys_addr_t addr) PPCE500PCIState *pci = opaque; unsigned long win; uint32_t value = 0; + int idx; win = addr & 0xfe0; @@ -97,24 +98,44 @@ static uint32_t pci_reg_read4(void *opaque, target_phys_addr_t addr) case PPCE500_PCI_OW2: case PPCE500_PCI_OW3: case PPCE500_PCI_OW4: + idx = (addr >> 5) & 0x7; switch (addr & 0xC) { - case PCI_POTAR: value = pci->pob[(addr >> 5) & 0x7].potar; break; - case PCI_POTEAR: value = pci->pob[(addr >> 5) & 0x7].potear; break; - case PCI_POWBAR: value = pci->pob[(addr >> 5) & 0x7].powbar; break; - case PCI_POWAR: value = pci->pob[(addr >> 5) & 0x7].powar; break; - default: break; + case PCI_POTAR: + value = pci->pob[idx].potar; + break; + case PCI_POTEAR: + value = pci->pob[idx].potear; + break; + case PCI_POWBAR: + value = pci->pob[idx].powbar; + break; + case PCI_POWAR: + value = pci->pob[idx].powar; + break; + default: + break; } break; case PPCE500_PCI_IW3: case PPCE500_PCI_IW2: case PPCE500_PCI_IW1: + idx = ((addr >> 5) & 0x3) - 1; switch (addr & 0xC) { - case PCI_PITAR: value = pci->pib[(addr >> 5) & 0x3].pitar; break; - case PCI_PIWBAR: value = pci->pib[(addr >> 5) & 0x3].piwbar; break; - case PCI_PIWBEAR: value = pci->pib[(addr >> 5) & 0x3].piwbear; break; - case PCI_PIWAR: value = pci->pib[(addr >> 5) & 0x3].piwar; break; - default: break; + case PCI_PITAR: + value = pci->pib[idx].pitar; + break; + case PCI_PIWBAR: + value = pci->pib[idx].piwbar; + break; + case PCI_PIWBEAR: + value = pci->pib[idx].piwbear; + break; + case PCI_PIWAR: + value = pci->pib[idx].piwar; + break; + default: + break; }; break; @@ -142,6 +163,7 @@ static void pci_reg_write4(void *opaque, target_phys_addr_t addr, { PPCE500PCIState *pci = opaque; unsigned long win; + int idx; win = addr & 0xfe0; @@ -153,24 +175,44 @@ static void pci_reg_write4(void *opaque, target_phys_addr_t addr, case PPCE500_PCI_OW2: case PPCE500_PCI_OW3: case PPCE500_PCI_OW4: + idx = (addr >> 5) & 0x7; switch (addr & 0xC) { - case PCI_POTAR: pci->pob[(addr >> 5) & 0x7].potar = value; break; - case PCI_POTEAR: pci->pob[(addr >> 5) & 0x7].potear = value; break; - case PCI_POWBAR: pci->pob[(addr >> 5) & 0x7].powbar = value; break; - case PCI_POWAR: pci->pob[(addr >> 5) & 0x7].powar = value; break; - default: break; + case PCI_POTAR: + pci->pob[idx].potar = value; + break; + case PCI_POTEAR: + pci->pob[idx].potear = value; + break; + case PCI_POWBAR: + pci->pob[idx].powbar = value; + break; + case PCI_POWAR: + pci->pob[idx].powar = value; + break; + default: + break; }; break; case PPCE500_PCI_IW3: case PPCE500_PCI_IW2: case PPCE500_PCI_IW1: + idx = ((addr >> 5) & 0x3) - 1; switch (addr & 0xC) { - case PCI_PITAR: pci->pib[(addr >> 5) & 0x3].pitar = value; break; - case PCI_PIWBAR: pci->pib[(addr >> 5) & 0x3].piwbar = value; break; - case PCI_PIWBEAR: pci->pib[(addr >> 5) & 0x3].piwbear = value; break; - case PCI_PIWAR: pci->pib[(addr >> 5) & 0x3].piwar = value; break; - default: break; + case PCI_PITAR: + pci->pib[idx].pitar = value; + break; + case PCI_PIWBAR: + pci->pib[idx].piwbar = value; + break; + case PCI_PIWBEAR: + pci->pib[idx].piwbear = value; + break; + case PCI_PIWAR: + pci->pib[idx].piwar = value; + break; + default: + break; }; break; diff --git a/hw/spapr.c b/hw/spapr.c index 63e5d336ea..bdaa938b6b 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -29,6 +29,9 @@ #include "elf.h" #include "net.h" #include "blockdev.h" +#include "cpus.h" +#include "kvm.h" +#include "kvm_ppc.h" #include "hw/boards.h" #include "hw/ppc.h" @@ -36,10 +39,12 @@ #include "hw/spapr.h" #include "hw/spapr_vio.h" +#include "hw/spapr_pci.h" #include "hw/xics.h" #include "kvm.h" #include "kvm_ppc.h" +#include "pci.h" #include "exec-memory.h" @@ -59,6 +64,11 @@ #define MAX_CPUS 256 #define XICS_IRQS 1024 +#define SPAPR_PCI_BUID 0x800000020000001ULL +#define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000) +#define SPAPR_PCI_MEM_WIN_SIZE 0x20000000 +#define SPAPR_PCI_IO_WIN_ADDR (0x10000000000ULL + 0x80000000) + #define PHANDLE_XICP 0x00001111 sPAPREnvironment *spapr; @@ -88,6 +98,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num) } static void *spapr_create_fdt_skel(const char *cpu_model, + target_phys_addr_t rma_size, target_phys_addr_t initrd_base, target_phys_addr_t initrd_size, const char *boot_device, @@ -96,7 +107,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model, { void *fdt; CPUState *env; - uint64_t mem_reg_property[] = { 0, cpu_to_be64(ram_size) }; + uint64_t mem_reg_property_rma[] = { 0, cpu_to_be64(rma_size) }; + uint64_t mem_reg_property_nonrma[] = { cpu_to_be64(rma_size), + cpu_to_be64(ram_size - rma_size) }; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)}; @@ -105,6 +118,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model, uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; int i; char *modelname; + int smt = kvmppc_smt_threads(); #define _FDT(exp) \ do { \ @@ -139,17 +153,35 @@ static void *spapr_create_fdt_skel(const char *cpu_model, &end_prop, sizeof(end_prop)))); _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); + /* + * Because we don't always invoke any firmware, we can't rely on + * that to do BAR allocation. Long term, we should probably do + * that ourselves, but for now, this setting (plus advertising the + * current BARs as 0) causes sufficiently recent kernels to to the + * BAR assignment themselves */ + _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0))); + _FDT((fdt_end_node(fdt))); - /* memory node */ + /* memory node(s) */ _FDT((fdt_begin_node(fdt, "memory@0"))); _FDT((fdt_property_string(fdt, "device_type", "memory"))); - _FDT((fdt_property(fdt, "reg", - mem_reg_property, sizeof(mem_reg_property)))); - + _FDT((fdt_property(fdt, "reg", mem_reg_property_rma, + sizeof(mem_reg_property_rma)))); _FDT((fdt_end_node(fdt))); + if (ram_size > rma_size) { + char mem_name[32]; + + sprintf(mem_name, "memory@%" PRIx64, (uint64_t)rma_size); + _FDT((fdt_begin_node(fdt, mem_name))); + _FDT((fdt_property_string(fdt, "device_type", "memory"))); + _FDT((fdt_property(fdt, "reg", mem_reg_property_nonrma, + sizeof(mem_reg_property_nonrma)))); + _FDT((fdt_end_node(fdt))); + } + /* cpus */ _FDT((fdt_begin_node(fdt, "cpus"))); @@ -164,13 +196,18 @@ static void *spapr_create_fdt_skel(const char *cpu_model, for (env = first_cpu; env != NULL; env = env->next_cpu) { int index = env->cpu_index; - uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */ + uint32_t servers_prop[smp_threads]; + uint32_t gservers_prop[smp_threads * 2]; char *nodename; uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), 0xffffffff, 0xffffffff}; uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; + if ((index % smt) != 0) { + continue; + } + if (asprintf(&nodename, "%s@%x", modelname, index) < 0) { fprintf(stderr, "Allocation failure\n"); exit(1); @@ -195,15 +232,41 @@ static void *spapr_create_fdt_skel(const char *cpu_model, pft_size_prop, sizeof(pft_size_prop)))); _FDT((fdt_property_string(fdt, "status", "okay"))); _FDT((fdt_property(fdt, "64-bit", NULL, 0))); - _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", index))); + + /* Build interrupt servers and gservers properties */ + for (i = 0; i < smp_threads; i++) { + servers_prop[i] = cpu_to_be32(index + i); + /* Hack, direct the group queues back to cpu 0 */ + gservers_prop[i*2] = cpu_to_be32(index + i); + gservers_prop[i*2 + 1] = 0; + } + _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(servers_prop)))); _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s", - gserver_prop, sizeof(gserver_prop)))); + gservers_prop, sizeof(gservers_prop)))); if (env->mmu_model & POWERPC_MMU_1TSEG) { _FDT((fdt_property(fdt, "ibm,processor-segment-sizes", segs, sizeof(segs)))); } + /* Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_property_cell(fdt, "ibm,dfp", 1))); + } + _FDT((fdt_end_node(fdt))); } @@ -260,6 +323,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, { int ret; void *fdt; + sPAPRPHBState *phb; fdt = g_malloc(FDT_MAX_SIZE); @@ -272,6 +336,15 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, exit(1); } + QLIST_FOREACH(phb, &spapr->phbs, list) { + ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt); + } + + if (ret < 0) { + fprintf(stderr, "couldn't setup PCI devices in fdt\n"); + exit(1); + } + /* RTAS */ ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size); if (ret < 0) { @@ -328,6 +401,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, int i; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); + target_phys_addr_t rma_alloc_size, rma_size; uint32_t initrd_base; long kernel_size, initrd_size, fw_size; long pteg_shift = 17; @@ -336,15 +410,28 @@ static void ppc_spapr_init(ram_addr_t ram_size, spapr = g_malloc(sizeof(*spapr)); cpu_ppc_hypercall = emulate_spapr_hypercall; - /* We place the device tree just below either the top of RAM, or - * 2GB, so that it can be processed with 32-bit code if - * necessary */ - spapr->fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE; + /* Allocate RMA if necessary */ + rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem); + + if (rma_alloc_size == -1) { + hw_error("qemu: Unable to create RMA\n"); + exit(1); + } + if (rma_alloc_size && (rma_alloc_size < ram_size)) { + rma_size = rma_alloc_size; + } else { + rma_size = ram_size; + } + + /* We place the device tree just below either the top of the RMA, + * or just below 2GB, whichever is lowere, so that it can be + * processed with 32-bit real mode code if necessary */ + spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE; spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE; /* init CPUs */ if (cpu_model == NULL) { - cpu_model = "POWER7"; + cpu_model = kvm_enabled() ? "host" : "POWER7"; } for (i = 0; i < smp_cpus; i++) { env = cpu_init(cpu_model); @@ -364,8 +451,13 @@ static void ppc_spapr_init(ram_addr_t ram_size, /* allocate RAM */ spapr->ram_limit = ram_size; - memory_region_init_ram(ram, NULL, "ppc_spapr.ram", spapr->ram_limit); - memory_region_add_subregion(sysmem, 0, ram); + if (spapr->ram_limit > rma_alloc_size) { + ram_addr_t nonrma_base = rma_alloc_size; + ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size; + + memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size); + memory_region_add_subregion(sysmem, nonrma_base, ram); + } /* allocate hash page table. For now we always make this 16mb, * later we should probably make it scale to the size of guest @@ -411,6 +503,12 @@ static void ppc_spapr_init(ram_addr_t ram_size, } } + /* Set up PCI */ + spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID, + SPAPR_PCI_MEM_WIN_ADDR, + SPAPR_PCI_MEM_WIN_SIZE, + SPAPR_PCI_IO_WIN_ADDR); + for (i = 0; i < nb_nics; i++) { NICInfo *nd = &nd_table[i]; @@ -421,10 +519,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, if (strcmp(nd->model, "ibmveth") == 0) { spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd); } else { - fprintf(stderr, "pSeries (sPAPR) platform does not support " - "NIC model '%s' (only ibmveth is supported)\n", - nd->model); - exit(1); + pci_nic_init_nofail(&nd_table[i], nd->model, NULL); } } @@ -489,7 +584,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, } /* Prepare the device tree */ - spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, + spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size, initrd_base, initrd_size, boot_device, kernel_cmdline, pteg_shift + 7); diff --git a/hw/spapr.h b/hw/spapr.h index 6657c336f6..df88f6abad 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -4,10 +4,12 @@ #include "hw/xics.h" struct VIOsPAPRBus; +struct sPAPRPHBState; struct icp_state; typedef struct sPAPREnvironment { struct VIOsPAPRBus *vio_bus; + QLIST_HEAD(, sPAPRPHBState) phbs; struct icp_state *icp; target_phys_addr_t ram_limit; diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c new file mode 100644 index 0000000000..2a5e6374c3 --- /dev/null +++ b/hw/spapr_pci.c @@ -0,0 +1,508 @@ +/* + * QEMU sPAPR PCI host originated from Uninorth PCI host + * + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation. + * Copyright (C) 2011 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw.h" +#include "pci.h" +#include "pci_host.h" +#include "hw/spapr.h" +#include "hw/spapr_pci.h" +#include "exec-memory.h" +#include <libfdt.h> + +#include "hw/pci_internals.h" + +static const uint32_t bars[] = { + PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1, + PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3, + PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5 + /*, PCI_ROM_ADDRESS*/ +}; + +static PCIDevice *find_dev(sPAPREnvironment *spapr, + uint64_t buid, uint32_t config_addr) +{ + DeviceState *qdev; + int devfn = (config_addr >> 8) & 0xFF; + sPAPRPHBState *phb; + + QLIST_FOREACH(phb, &spapr->phbs, list) { + if (phb->buid != buid) { + continue; + } + + QLIST_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) { + PCIDevice *dev = (PCIDevice *)qdev; + if (dev->devfn == devfn) { + return dev; + } + } + } + + return NULL; +} + +static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t val, size, addr; + uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0)); + + if (!dev) { + rtas_st(rets, 0, -1); + return; + } + size = rtas_ld(args, 3); + addr = rtas_ld(args, 0) & 0xFF; + val = pci_default_read_config(dev, addr, size); + rtas_st(rets, 0, 0); + rtas_st(rets, 1, val); +} + +static void rtas_read_pci_config(sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t val, size, addr; + PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0)); + + if (!dev) { + rtas_st(rets, 0, -1); + return; + } + size = rtas_ld(args, 1); + addr = rtas_ld(args, 0) & 0xFF; + val = pci_default_read_config(dev, addr, size); + rtas_st(rets, 0, 0); + rtas_st(rets, 1, val); +} + +static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t val, size, addr; + uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0)); + + if (!dev) { + rtas_st(rets, 0, -1); + return; + } + val = rtas_ld(args, 4); + size = rtas_ld(args, 3); + addr = rtas_ld(args, 0) & 0xFF; + pci_default_write_config(dev, addr, val, size); + rtas_st(rets, 0, 0); +} + +static void rtas_write_pci_config(sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t val, size, addr; + PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0)); + + if (!dev) { + rtas_st(rets, 0, -1); + return; + } + val = rtas_ld(args, 2); + size = rtas_ld(args, 1); + addr = rtas_ld(args, 0) & 0xFF; + pci_default_write_config(dev, addr, val, size); + rtas_st(rets, 0, 0); +} + +static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num) +{ + /* + * Here we need to convert pci_dev + irq_num to some unique value + * which is less than number of IRQs on the specific bus (now it + * is 16). At the moment irq_num == device_id (number of the + * slot?) + * FIXME: we should swizzle in fn and irq_num + */ + return (pci_dev->devfn >> 3) % SPAPR_PCI_NUM_LSI; +} + +static void pci_spapr_set_irq(void *opaque, int irq_num, int level) +{ + /* + * Here we use the number returned by pci_spapr_map_irq to find a + * corresponding qemu_irq. + */ + sPAPRPHBState *phb = opaque; + + qemu_set_irq(phb->lsi_table[irq_num].qirq, level); +} + +static int spapr_phb_init(SysBusDevice *s) +{ + sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s); + int i; + + /* Initialize the LSI table */ + for (i = 0; i < SPAPR_PCI_NUM_LSI; i++) { + qemu_irq qirq; + uint32_t num; + + qirq = spapr_allocate_irq(0, &num); + if (!qirq) { + return -1; + } + + phb->lsi_table[i].dt_irq = num; + phb->lsi_table[i].qirq = qirq; + } + + return 0; +} + +static int spapr_main_pci_host_init(PCIDevice *d) +{ + return 0; +} + +static PCIDeviceInfo spapr_main_pci_host_info = { + .qdev.name = "spapr-pci-host-bridge", + .qdev.size = sizeof(PCIDevice), + .init = spapr_main_pci_host_init, +}; + +static void spapr_register_devices(void) +{ + sysbus_register_dev("spapr-pci-host-bridge", sizeof(sPAPRPHBState), + spapr_phb_init); + pci_qdev_register(&spapr_main_pci_host_info); +} + +device_init(spapr_register_devices) + +static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + switch (size) { + case 1: + return cpu_inb(addr); + case 2: + return cpu_inw(addr); + case 4: + return cpu_inl(addr); + } + assert(0); +} + +static void spapr_io_write(void *opaque, target_phys_addr_t addr, + uint64_t data, unsigned size) +{ + switch (size) { + case 1: + cpu_outb(addr, data); + return; + case 2: + cpu_outw(addr, data); + return; + case 4: + cpu_outl(addr, data); + return; + } + assert(0); +} + +static MemoryRegionOps spapr_io_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + .read = spapr_io_read, + .write = spapr_io_write +}; + +void spapr_create_phb(sPAPREnvironment *spapr, + const char *busname, uint64_t buid, + uint64_t mem_win_addr, uint64_t mem_win_size, + uint64_t io_win_addr) +{ + DeviceState *dev; + SysBusDevice *s; + sPAPRPHBState *phb; + PCIBus *bus; + char namebuf[strlen(busname)+11]; + + dev = qdev_create(NULL, "spapr-pci-host-bridge"); + qdev_init_nofail(dev); + s = sysbus_from_qdev(dev); + phb = FROM_SYSBUS(sPAPRPHBState, s); + + phb->mem_win_addr = mem_win_addr; + + sprintf(namebuf, "%s-mem", busname); + memory_region_init(&phb->memspace, namebuf, INT64_MAX); + + sprintf(namebuf, "%s-memwindow", busname); + memory_region_init_alias(&phb->memwindow, namebuf, &phb->memspace, + SPAPR_PCI_MEM_WIN_BUS_OFFSET, mem_win_size); + memory_region_add_subregion(get_system_memory(), mem_win_addr, + &phb->memwindow); + + phb->io_win_addr = io_win_addr; + + /* On ppc, we only have MMIO no specific IO space from the CPU + * perspective. In theory we ought to be able to embed the PCI IO + * memory region direction in the system memory space. However, + * if any of the IO BAR subregions use the old_portio mechanism, + * that won't be processed properly unless accessed from the + * system io address space. This hack to bounce things via + * system_io works around the problem until all the users of + * old_portion are updated */ + sprintf(namebuf, "%s-io", busname); + memory_region_init(&phb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE); + /* FIXME: fix to support multiple PHBs */ + memory_region_add_subregion(get_system_io(), 0, &phb->iospace); + + sprintf(namebuf, "%s-iowindow", busname); + memory_region_init_io(&phb->iowindow, &spapr_io_ops, phb, + namebuf, SPAPR_PCI_IO_WIN_SIZE); + memory_region_add_subregion(get_system_memory(), io_win_addr, + &phb->iowindow); + + phb->host_state.bus = bus = pci_register_bus(&phb->busdev.qdev, busname, + pci_spapr_set_irq, + pci_spapr_map_irq, + phb, + &phb->memspace, &phb->iospace, + PCI_DEVFN(0, 0), + SPAPR_PCI_NUM_LSI); + + spapr_rtas_register("read-pci-config", rtas_read_pci_config); + spapr_rtas_register("write-pci-config", rtas_write_pci_config); + spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config); + spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config); + + QLIST_INSERT_HEAD(&spapr->phbs, phb, list); + + /* pci_bus_set_mem_base(bus, mem_va_start - SPAPR_PCI_MEM_BAR_START); */ +} + +/* Macros to operate with address in OF binding to PCI */ +#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) +#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ +#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ +#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ +#define b_ss(x) b_x((x), 24, 2) /* the space code */ +#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ +#define b_ddddd(x) b_x((x), 11, 5) /* device number */ +#define b_fff(x) b_x((x), 8, 3) /* function number */ +#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ + +static uint32_t regtype_to_ss(uint8_t type) +{ + if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + return 3; + } + if (type == PCI_BASE_ADDRESS_SPACE_IO) { + return 1; + } + return 2; +} + +int spapr_populate_pci_devices(sPAPRPHBState *phb, + uint32_t xics_phandle, + void *fdt) +{ + PCIBus *bus = phb->host_state.bus; + int bus_off, node_off = 0, devid, fn, i, n, devices; + DeviceState *qdev; + char nodename[256]; + struct { + uint32_t hi; + uint64_t addr; + uint64_t size; + } __attribute__((packed)) reg[PCI_NUM_REGIONS + 1], + assigned_addresses[PCI_NUM_REGIONS]; + uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; + struct { + uint32_t hi; + uint64_t child; + uint64_t parent; + uint64_t size; + } __attribute__((packed)) ranges[] = { + { + cpu_to_be32(b_ss(1)), cpu_to_be64(0), + cpu_to_be64(phb->io_win_addr), + cpu_to_be64(memory_region_size(&phb->iospace)), + }, + { + cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET), + cpu_to_be64(phb->mem_win_addr), + cpu_to_be64(memory_region_size(&phb->memwindow)), + }, + }; + uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 }; + uint32_t interrupt_map_mask[] = { + cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0}; + uint32_t interrupt_map[bus->nirq][7]; + + /* Start populating the FDT */ + sprintf(nodename, "pci@%" PRIx64, phb->buid); + bus_off = fdt_add_subnode(fdt, 0, nodename); + if (bus_off < 0) { + return bus_off; + } + +#define _FDT(exp) \ + do { \ + int ret = (exp); \ + if (ret < 0) { \ + return ret; \ + } \ + } while (0) + + /* Write PHB properties */ + _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); + _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); + _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3)); + _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2)); + _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1)); + _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0)); + _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range))); + _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges))); + _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); + _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask", + &interrupt_map_mask, sizeof(interrupt_map_mask))); + + /* Populate PCI devices and allocate IRQs */ + devices = 0; + QLIST_FOREACH(qdev, &bus->qbus.children, sibling) { + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev); + int irq_index = pci_spapr_map_irq(dev, 0); + uint32_t *irqmap = interrupt_map[devices]; + uint8_t *config = dev->config; + + devid = dev->devfn >> 3; + fn = dev->devfn & 7; + + sprintf(nodename, "pci@%u,%u", devid, fn); + + /* Allocate interrupt from the map */ + if (devid > bus->nirq) { + printf("Unexpected behaviour in spapr_populate_pci_devices," + "wrong devid %u\n", devid); + exit(-1); + } + irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn)); + irqmap[1] = 0; + irqmap[2] = 0; + irqmap[3] = 0; + irqmap[4] = cpu_to_be32(xics_phandle); + irqmap[5] = cpu_to_be32(phb->lsi_table[irq_index].dt_irq); + irqmap[6] = cpu_to_be32(0x8); + + /* Add node to FDT */ + node_off = fdt_add_subnode(fdt, bus_off, nodename); + if (node_off < 0) { + return node_off; + } + + _FDT(fdt_setprop_cell(fdt, node_off, "vendor-id", + pci_get_word(&config[PCI_VENDOR_ID]))); + _FDT(fdt_setprop_cell(fdt, node_off, "device-id", + pci_get_word(&config[PCI_DEVICE_ID]))); + _FDT(fdt_setprop_cell(fdt, node_off, "revision-id", + pci_get_byte(&config[PCI_REVISION_ID]))); + _FDT(fdt_setprop_cell(fdt, node_off, "class-code", + pci_get_long(&config[PCI_CLASS_REVISION]) >> 8)); + _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id", + pci_get_word(&config[PCI_SUBSYSTEM_ID]))); + _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id", + pci_get_word(&config[PCI_SUBSYSTEM_VENDOR_ID]))); + + /* Config space region comes first */ + reg[0].hi = cpu_to_be32( + b_n(0) | + b_p(0) | + b_t(0) | + b_ss(0/*config*/) | + b_bbbbbbbb(0) | + b_ddddd(devid) | + b_fff(fn)); + reg[0].addr = 0; + reg[0].size = 0; + + n = 0; + for (i = 0; i < PCI_NUM_REGIONS; ++i) { + if (0 == dev->io_regions[i].size) { + continue; + } + + reg[n+1].hi = cpu_to_be32( + b_n(0) | + b_p(0) | + b_t(0) | + b_ss(regtype_to_ss(dev->io_regions[i].type)) | + b_bbbbbbbb(0) | + b_ddddd(devid) | + b_fff(fn) | + b_rrrrrrrr(bars[i])); + reg[n+1].addr = 0; + reg[n+1].size = cpu_to_be64(dev->io_regions[i].size); + + assigned_addresses[n].hi = cpu_to_be32( + b_n(1) | + b_p(0) | + b_t(0) | + b_ss(regtype_to_ss(dev->io_regions[i].type)) | + b_bbbbbbbb(0) | + b_ddddd(devid) | + b_fff(fn) | + b_rrrrrrrr(bars[i])); + + /* + * Writing zeroes to assigned_addresses causes the guest kernel to + * reassign BARs + */ + assigned_addresses[n].addr = cpu_to_be64(dev->io_regions[i].addr); + assigned_addresses[n].size = reg[n+1].size; + + ++n; + } + _FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1))); + _FDT(fdt_setprop(fdt, node_off, "assigned-addresses", + assigned_addresses, + sizeof(assigned_addresses[0])*(n))); + _FDT(fdt_setprop_cell(fdt, node_off, "interrupts", + pci_get_byte(&config[PCI_INTERRUPT_PIN]))); + + ++devices; + } + + /* Write interrupt map */ + _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, + devices * sizeof(interrupt_map[0]))); + + return 0; +} diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h new file mode 100644 index 0000000000..213340c915 --- /dev/null +++ b/hw/spapr_pci.h @@ -0,0 +1,61 @@ +/* + * QEMU SPAPR PCI BUS definitions + * + * Copyright (c) 2011 Alexey Kardashevskiy <aik@au1.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ +#if !defined(__HW_SPAPR_H__) +#error Please include spapr.h before this file! +#endif + +#if !defined(__HW_SPAPR_PCI_H__) +#define __HW_SPAPR_PCI_H__ + +#include "hw/pci_host.h" +#include "hw/xics.h" + +#define SPAPR_PCI_NUM_LSI 16 + +typedef struct sPAPRPHBState { + SysBusDevice busdev; + PCIHostState host_state; + + uint64_t buid; + + MemoryRegion memspace, iospace; + target_phys_addr_t mem_win_addr, io_win_addr; + MemoryRegion memwindow, iowindow; + + struct { + uint32_t dt_irq; + qemu_irq qirq; + } lsi_table[SPAPR_PCI_NUM_LSI]; + + QLIST_ENTRY(sPAPRPHBState) list; +} sPAPRPHBState; + +#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL +#define SPAPR_PCI_IO_WIN_SIZE 0x10000 + +void spapr_create_phb(sPAPREnvironment *spapr, + const char *busname, uint64_t buid, + uint64_t mem_win_addr, uint64_t mem_win_size, + uint64_t io_win_addr); + +int spapr_populate_pci_devices(sPAPRPHBState *phb, + uint32_t xics_phandle, + void *fdt); + +#endif /* __HW_SPAPR_PCI_H__ */ diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c index 977603f81e..25cfc9d912 100644 --- a/hw/spapr_vio.c +++ b/hw/spapr_vio.c @@ -165,7 +165,13 @@ static void rtce_init(VIOsPAPRDevice *dev) * sizeof(VIOsPAPR_RTCE); if (size) { - dev->rtce_table = g_malloc0(size); + dev->rtce_table = kvmppc_create_spapr_tce(dev->reg, + dev->rtce_window_size, + &dev->kvmtce_fd); + + if (!dev->rtce_table) { + dev->rtce_table = g_malloc0(size); + } } } diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h index 4fe5f742c2..a325a5f4b3 100644 --- a/hw/spapr_vio.h +++ b/hw/spapr_vio.h @@ -57,6 +57,7 @@ typedef struct VIOsPAPRDevice { target_ulong signal_state; uint32_t rtce_window_size; VIOsPAPR_RTCE *rtce_table; + int kvmtce_fd; VIOsPAPR_CRQ crq; } VIOsPAPRDevice; |