aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorBlue Swirl <blauwirbel@gmail.com>2011-11-01 20:57:01 +0000
committerBlue Swirl <blauwirbel@gmail.com>2011-11-01 20:57:01 +0000
commite927dab1fda0eee41fa5fe51ae98293f66a86db1 (patch)
treef4e2c0f47ef3f0e4821d019bb796a1a1c0c225a8 /hw
parent2ff6458116546ced7ce00cc39423ee30b1477e67 (diff)
parent3384f95c59e5db381cf3e605c8acec71baf0e6b8 (diff)
Merge branch 'ppc-next' of git://repo.or.cz/qemu/agraf
* 'ppc-next' of git://repo.or.cz/qemu/agraf: (24 commits) pseries: Add partial support for PCI ppc: Alter CPU state to mask out TCG unimplemented instructions as appropriate pseries: Allow writes to KVM accelerated TCE table KVM: PPC: Override host vmx/vsx/dfp only when information known ppc: Fix up usermode only builds pseries: Correct vmx/dfp handling in both KVM and TCG cases PPC: Fail configure when libfdt is not available ppc: Avoid decrementer related kvm exits PPC: Disable non-440 CPUs for ppcemb target PPC: Bump qemu-system-ppc to 64-bit physical address space pseries: Under kvm use guest cpu = host cpu by default ppc: Add cpu defs for POWER7 revisions 2.1 and 2.3 ppc: First cut implementation of -cpu host ppc: Remove broken partial PVR matching pseries: Update SLOF firmware image pseries: Add device tree properties for VMX/VSX and DFP under kvm ppc: Generalize the kvmppc_get_clockfreq() function Set an invalid-bits mask for each SPE instructions pseries: Update SLOF firmware image pseries: Use Book3S-HV TCE acceleration capabilities ...
Diffstat (limited to 'hw')
-rw-r--r--hw/ppc.c6
-rw-r--r--hw/ppce500_pci.c82
-rw-r--r--hw/spapr.c135
-rw-r--r--hw/spapr.h2
-rw-r--r--hw/spapr_pci.c508
-rw-r--r--hw/spapr_pci.h61
-rw-r--r--hw/spapr_vio.c8
-rw-r--r--hw/spapr_vio.h1
8 files changed, 762 insertions, 41 deletions
diff --git a/hw/ppc.c b/hw/ppc.c
index 25b59dddaa..d29af0bb35 100644
--- a/hw/ppc.c
+++ b/hw/ppc.c
@@ -662,6 +662,12 @@ static void __cpu_ppc_store_decr (CPUState *env, uint64_t *nextp,
LOG_TB("%s: %08" PRIx32 " => %08" PRIx32 "\n", __func__,
decr, value);
+
+ if (kvm_enabled()) {
+ /* KVM handles decrementer exceptions, we don't need our own timer */
+ return;
+ }
+
now = qemu_get_clock_ns(vm_clock);
next = now + muldiv64(value, get_ticks_per_sec(), tb_env->decr_freq);
if (is_excp) {
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 2db365d0b6..960a5d0c60 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -89,6 +89,7 @@ static uint32_t pci_reg_read4(void *opaque, target_phys_addr_t addr)
PPCE500PCIState *pci = opaque;
unsigned long win;
uint32_t value = 0;
+ int idx;
win = addr & 0xfe0;
@@ -97,24 +98,44 @@ static uint32_t pci_reg_read4(void *opaque, target_phys_addr_t addr)
case PPCE500_PCI_OW2:
case PPCE500_PCI_OW3:
case PPCE500_PCI_OW4:
+ idx = (addr >> 5) & 0x7;
switch (addr & 0xC) {
- case PCI_POTAR: value = pci->pob[(addr >> 5) & 0x7].potar; break;
- case PCI_POTEAR: value = pci->pob[(addr >> 5) & 0x7].potear; break;
- case PCI_POWBAR: value = pci->pob[(addr >> 5) & 0x7].powbar; break;
- case PCI_POWAR: value = pci->pob[(addr >> 5) & 0x7].powar; break;
- default: break;
+ case PCI_POTAR:
+ value = pci->pob[idx].potar;
+ break;
+ case PCI_POTEAR:
+ value = pci->pob[idx].potear;
+ break;
+ case PCI_POWBAR:
+ value = pci->pob[idx].powbar;
+ break;
+ case PCI_POWAR:
+ value = pci->pob[idx].powar;
+ break;
+ default:
+ break;
}
break;
case PPCE500_PCI_IW3:
case PPCE500_PCI_IW2:
case PPCE500_PCI_IW1:
+ idx = ((addr >> 5) & 0x3) - 1;
switch (addr & 0xC) {
- case PCI_PITAR: value = pci->pib[(addr >> 5) & 0x3].pitar; break;
- case PCI_PIWBAR: value = pci->pib[(addr >> 5) & 0x3].piwbar; break;
- case PCI_PIWBEAR: value = pci->pib[(addr >> 5) & 0x3].piwbear; break;
- case PCI_PIWAR: value = pci->pib[(addr >> 5) & 0x3].piwar; break;
- default: break;
+ case PCI_PITAR:
+ value = pci->pib[idx].pitar;
+ break;
+ case PCI_PIWBAR:
+ value = pci->pib[idx].piwbar;
+ break;
+ case PCI_PIWBEAR:
+ value = pci->pib[idx].piwbear;
+ break;
+ case PCI_PIWAR:
+ value = pci->pib[idx].piwar;
+ break;
+ default:
+ break;
};
break;
@@ -142,6 +163,7 @@ static void pci_reg_write4(void *opaque, target_phys_addr_t addr,
{
PPCE500PCIState *pci = opaque;
unsigned long win;
+ int idx;
win = addr & 0xfe0;
@@ -153,24 +175,44 @@ static void pci_reg_write4(void *opaque, target_phys_addr_t addr,
case PPCE500_PCI_OW2:
case PPCE500_PCI_OW3:
case PPCE500_PCI_OW4:
+ idx = (addr >> 5) & 0x7;
switch (addr & 0xC) {
- case PCI_POTAR: pci->pob[(addr >> 5) & 0x7].potar = value; break;
- case PCI_POTEAR: pci->pob[(addr >> 5) & 0x7].potear = value; break;
- case PCI_POWBAR: pci->pob[(addr >> 5) & 0x7].powbar = value; break;
- case PCI_POWAR: pci->pob[(addr >> 5) & 0x7].powar = value; break;
- default: break;
+ case PCI_POTAR:
+ pci->pob[idx].potar = value;
+ break;
+ case PCI_POTEAR:
+ pci->pob[idx].potear = value;
+ break;
+ case PCI_POWBAR:
+ pci->pob[idx].powbar = value;
+ break;
+ case PCI_POWAR:
+ pci->pob[idx].powar = value;
+ break;
+ default:
+ break;
};
break;
case PPCE500_PCI_IW3:
case PPCE500_PCI_IW2:
case PPCE500_PCI_IW1:
+ idx = ((addr >> 5) & 0x3) - 1;
switch (addr & 0xC) {
- case PCI_PITAR: pci->pib[(addr >> 5) & 0x3].pitar = value; break;
- case PCI_PIWBAR: pci->pib[(addr >> 5) & 0x3].piwbar = value; break;
- case PCI_PIWBEAR: pci->pib[(addr >> 5) & 0x3].piwbear = value; break;
- case PCI_PIWAR: pci->pib[(addr >> 5) & 0x3].piwar = value; break;
- default: break;
+ case PCI_PITAR:
+ pci->pib[idx].pitar = value;
+ break;
+ case PCI_PIWBAR:
+ pci->pib[idx].piwbar = value;
+ break;
+ case PCI_PIWBEAR:
+ pci->pib[idx].piwbear = value;
+ break;
+ case PCI_PIWAR:
+ pci->pib[idx].piwar = value;
+ break;
+ default:
+ break;
};
break;
diff --git a/hw/spapr.c b/hw/spapr.c
index 63e5d336ea..bdaa938b6b 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -29,6 +29,9 @@
#include "elf.h"
#include "net.h"
#include "blockdev.h"
+#include "cpus.h"
+#include "kvm.h"
+#include "kvm_ppc.h"
#include "hw/boards.h"
#include "hw/ppc.h"
@@ -36,10 +39,12 @@
#include "hw/spapr.h"
#include "hw/spapr_vio.h"
+#include "hw/spapr_pci.h"
#include "hw/xics.h"
#include "kvm.h"
#include "kvm_ppc.h"
+#include "pci.h"
#include "exec-memory.h"
@@ -59,6 +64,11 @@
#define MAX_CPUS 256
#define XICS_IRQS 1024
+#define SPAPR_PCI_BUID 0x800000020000001ULL
+#define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000)
+#define SPAPR_PCI_MEM_WIN_SIZE 0x20000000
+#define SPAPR_PCI_IO_WIN_ADDR (0x10000000000ULL + 0x80000000)
+
#define PHANDLE_XICP 0x00001111
sPAPREnvironment *spapr;
@@ -88,6 +98,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num)
}
static void *spapr_create_fdt_skel(const char *cpu_model,
+ target_phys_addr_t rma_size,
target_phys_addr_t initrd_base,
target_phys_addr_t initrd_size,
const char *boot_device,
@@ -96,7 +107,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
{
void *fdt;
CPUState *env;
- uint64_t mem_reg_property[] = { 0, cpu_to_be64(ram_size) };
+ uint64_t mem_reg_property_rma[] = { 0, cpu_to_be64(rma_size) };
+ uint64_t mem_reg_property_nonrma[] = { cpu_to_be64(rma_size),
+ cpu_to_be64(ram_size - rma_size) };
uint32_t start_prop = cpu_to_be32(initrd_base);
uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
@@ -105,6 +118,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
int i;
char *modelname;
+ int smt = kvmppc_smt_threads();
#define _FDT(exp) \
do { \
@@ -139,17 +153,35 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
&end_prop, sizeof(end_prop))));
_FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
+ /*
+ * Because we don't always invoke any firmware, we can't rely on
+ * that to do BAR allocation. Long term, we should probably do
+ * that ourselves, but for now, this setting (plus advertising the
+ * current BARs as 0) causes sufficiently recent kernels to to the
+ * BAR assignment themselves */
+ _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0)));
+
_FDT((fdt_end_node(fdt)));
- /* memory node */
+ /* memory node(s) */
_FDT((fdt_begin_node(fdt, "memory@0")));
_FDT((fdt_property_string(fdt, "device_type", "memory")));
- _FDT((fdt_property(fdt, "reg",
- mem_reg_property, sizeof(mem_reg_property))));
-
+ _FDT((fdt_property(fdt, "reg", mem_reg_property_rma,
+ sizeof(mem_reg_property_rma))));
_FDT((fdt_end_node(fdt)));
+ if (ram_size > rma_size) {
+ char mem_name[32];
+
+ sprintf(mem_name, "memory@%" PRIx64, (uint64_t)rma_size);
+ _FDT((fdt_begin_node(fdt, mem_name)));
+ _FDT((fdt_property_string(fdt, "device_type", "memory")));
+ _FDT((fdt_property(fdt, "reg", mem_reg_property_nonrma,
+ sizeof(mem_reg_property_nonrma))));
+ _FDT((fdt_end_node(fdt)));
+ }
+
/* cpus */
_FDT((fdt_begin_node(fdt, "cpus")));
@@ -164,13 +196,18 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
for (env = first_cpu; env != NULL; env = env->next_cpu) {
int index = env->cpu_index;
- uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */
+ uint32_t servers_prop[smp_threads];
+ uint32_t gservers_prop[smp_threads * 2];
char *nodename;
uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
0xffffffff, 0xffffffff};
uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+ if ((index % smt) != 0) {
+ continue;
+ }
+
if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
fprintf(stderr, "Allocation failure\n");
exit(1);
@@ -195,15 +232,41 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
pft_size_prop, sizeof(pft_size_prop))));
_FDT((fdt_property_string(fdt, "status", "okay")));
_FDT((fdt_property(fdt, "64-bit", NULL, 0)));
- _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", index)));
+
+ /* Build interrupt servers and gservers properties */
+ for (i = 0; i < smp_threads; i++) {
+ servers_prop[i] = cpu_to_be32(index + i);
+ /* Hack, direct the group queues back to cpu 0 */
+ gservers_prop[i*2] = cpu_to_be32(index + i);
+ gservers_prop[i*2 + 1] = 0;
+ }
+ _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+ servers_prop, sizeof(servers_prop))));
_FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
- gserver_prop, sizeof(gserver_prop))));
+ gservers_prop, sizeof(gservers_prop))));
if (env->mmu_model & POWERPC_MMU_1TSEG) {
_FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
segs, sizeof(segs))));
}
+ /* Advertise VMX/VSX (vector extensions) if available
+ * 0 / no property == no vector extensions
+ * 1 == VMX / Altivec available
+ * 2 == VSX available */
+ if (env->insns_flags & PPC_ALTIVEC) {
+ uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+ _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
+ }
+
+ /* Advertise DFP (Decimal Floating Point) if available
+ * 0 / no property == no DFP
+ * 1 == DFP available */
+ if (env->insns_flags2 & PPC2_DFP) {
+ _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
+ }
+
_FDT((fdt_end_node(fdt)));
}
@@ -260,6 +323,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
{
int ret;
void *fdt;
+ sPAPRPHBState *phb;
fdt = g_malloc(FDT_MAX_SIZE);
@@ -272,6 +336,15 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
exit(1);
}
+ QLIST_FOREACH(phb, &spapr->phbs, list) {
+ ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
+ }
+
+ if (ret < 0) {
+ fprintf(stderr, "couldn't setup PCI devices in fdt\n");
+ exit(1);
+ }
+
/* RTAS */
ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
if (ret < 0) {
@@ -328,6 +401,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
int i;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
+ target_phys_addr_t rma_alloc_size, rma_size;
uint32_t initrd_base;
long kernel_size, initrd_size, fw_size;
long pteg_shift = 17;
@@ -336,15 +410,28 @@ static void ppc_spapr_init(ram_addr_t ram_size,
spapr = g_malloc(sizeof(*spapr));
cpu_ppc_hypercall = emulate_spapr_hypercall;
- /* We place the device tree just below either the top of RAM, or
- * 2GB, so that it can be processed with 32-bit code if
- * necessary */
- spapr->fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE;
+ /* Allocate RMA if necessary */
+ rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
+
+ if (rma_alloc_size == -1) {
+ hw_error("qemu: Unable to create RMA\n");
+ exit(1);
+ }
+ if (rma_alloc_size && (rma_alloc_size < ram_size)) {
+ rma_size = rma_alloc_size;
+ } else {
+ rma_size = ram_size;
+ }
+
+ /* We place the device tree just below either the top of the RMA,
+ * or just below 2GB, whichever is lowere, so that it can be
+ * processed with 32-bit real mode code if necessary */
+ spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE;
spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE;
/* init CPUs */
if (cpu_model == NULL) {
- cpu_model = "POWER7";
+ cpu_model = kvm_enabled() ? "host" : "POWER7";
}
for (i = 0; i < smp_cpus; i++) {
env = cpu_init(cpu_model);
@@ -364,8 +451,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
/* allocate RAM */
spapr->ram_limit = ram_size;
- memory_region_init_ram(ram, NULL, "ppc_spapr.ram", spapr->ram_limit);
- memory_region_add_subregion(sysmem, 0, ram);
+ if (spapr->ram_limit > rma_alloc_size) {
+ ram_addr_t nonrma_base = rma_alloc_size;
+ ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
+
+ memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size);
+ memory_region_add_subregion(sysmem, nonrma_base, ram);
+ }
/* allocate hash page table. For now we always make this 16mb,
* later we should probably make it scale to the size of guest
@@ -411,6 +503,12 @@ static void ppc_spapr_init(ram_addr_t ram_size,
}
}
+ /* Set up PCI */
+ spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
+ SPAPR_PCI_MEM_WIN_ADDR,
+ SPAPR_PCI_MEM_WIN_SIZE,
+ SPAPR_PCI_IO_WIN_ADDR);
+
for (i = 0; i < nb_nics; i++) {
NICInfo *nd = &nd_table[i];
@@ -421,10 +519,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
if (strcmp(nd->model, "ibmveth") == 0) {
spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd);
} else {
- fprintf(stderr, "pSeries (sPAPR) platform does not support "
- "NIC model '%s' (only ibmveth is supported)\n",
- nd->model);
- exit(1);
+ pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
}
}
@@ -489,7 +584,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
}
/* Prepare the device tree */
- spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
+ spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
initrd_base, initrd_size,
boot_device, kernel_cmdline,
pteg_shift + 7);
diff --git a/hw/spapr.h b/hw/spapr.h
index 6657c336f6..df88f6abad 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -4,10 +4,12 @@
#include "hw/xics.h"
struct VIOsPAPRBus;
+struct sPAPRPHBState;
struct icp_state;
typedef struct sPAPREnvironment {
struct VIOsPAPRBus *vio_bus;
+ QLIST_HEAD(, sPAPRPHBState) phbs;
struct icp_state *icp;
target_phys_addr_t ram_limit;
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
new file mode 100644
index 0000000000..2a5e6374c3
--- /dev/null
+++ b/hw/spapr_pci.c
@@ -0,0 +1,508 @@
+/*
+ * QEMU sPAPR PCI host originated from Uninorth PCI host
+ *
+ * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
+ * Copyright (C) 2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "pci.h"
+#include "pci_host.h"
+#include "hw/spapr.h"
+#include "hw/spapr_pci.h"
+#include "exec-memory.h"
+#include <libfdt.h>
+
+#include "hw/pci_internals.h"
+
+static const uint32_t bars[] = {
+ PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
+ PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
+ PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
+ /*, PCI_ROM_ADDRESS*/
+};
+
+static PCIDevice *find_dev(sPAPREnvironment *spapr,
+ uint64_t buid, uint32_t config_addr)
+{
+ DeviceState *qdev;
+ int devfn = (config_addr >> 8) & 0xFF;
+ sPAPRPHBState *phb;
+
+ QLIST_FOREACH(phb, &spapr->phbs, list) {
+ if (phb->buid != buid) {
+ continue;
+ }
+
+ QLIST_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
+ PCIDevice *dev = (PCIDevice *)qdev;
+ if (dev->devfn == devfn) {
+ return dev;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val, size, addr;
+ uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+ PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0));
+
+ if (!dev) {
+ rtas_st(rets, 0, -1);
+ return;
+ }
+ size = rtas_ld(args, 3);
+ addr = rtas_ld(args, 0) & 0xFF;
+ val = pci_default_read_config(dev, addr, size);
+ rtas_st(rets, 0, 0);
+ rtas_st(rets, 1, val);
+}
+
+static void rtas_read_pci_config(sPAPREnvironment *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val, size, addr;
+ PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0));
+
+ if (!dev) {
+ rtas_st(rets, 0, -1);
+ return;
+ }
+ size = rtas_ld(args, 1);
+ addr = rtas_ld(args, 0) & 0xFF;
+ val = pci_default_read_config(dev, addr, size);
+ rtas_st(rets, 0, 0);
+ rtas_st(rets, 1, val);
+}
+
+static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val, size, addr;
+ uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+ PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0));
+
+ if (!dev) {
+ rtas_st(rets, 0, -1);
+ return;
+ }
+ val = rtas_ld(args, 4);
+ size = rtas_ld(args, 3);
+ addr = rtas_ld(args, 0) & 0xFF;
+ pci_default_write_config(dev, addr, val, size);
+ rtas_st(rets, 0, 0);
+}
+
+static void rtas_write_pci_config(sPAPREnvironment *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val, size, addr;
+ PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0));
+
+ if (!dev) {
+ rtas_st(rets, 0, -1);
+ return;
+ }
+ val = rtas_ld(args, 2);
+ size = rtas_ld(args, 1);
+ addr = rtas_ld(args, 0) & 0xFF;
+ pci_default_write_config(dev, addr, val, size);
+ rtas_st(rets, 0, 0);
+}
+
+static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ /*
+ * Here we need to convert pci_dev + irq_num to some unique value
+ * which is less than number of IRQs on the specific bus (now it
+ * is 16). At the moment irq_num == device_id (number of the
+ * slot?)
+ * FIXME: we should swizzle in fn and irq_num
+ */
+ return (pci_dev->devfn >> 3) % SPAPR_PCI_NUM_LSI;
+}
+
+static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
+{
+ /*
+ * Here we use the number returned by pci_spapr_map_irq to find a
+ * corresponding qemu_irq.
+ */
+ sPAPRPHBState *phb = opaque;
+
+ qemu_set_irq(phb->lsi_table[irq_num].qirq, level);
+}
+
+static int spapr_phb_init(SysBusDevice *s)
+{
+ sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s);
+ int i;
+
+ /* Initialize the LSI table */
+ for (i = 0; i < SPAPR_PCI_NUM_LSI; i++) {
+ qemu_irq qirq;
+ uint32_t num;
+
+ qirq = spapr_allocate_irq(0, &num);
+ if (!qirq) {
+ return -1;
+ }
+
+ phb->lsi_table[i].dt_irq = num;
+ phb->lsi_table[i].qirq = qirq;
+ }
+
+ return 0;
+}
+
+static int spapr_main_pci_host_init(PCIDevice *d)
+{
+ return 0;
+}
+
+static PCIDeviceInfo spapr_main_pci_host_info = {
+ .qdev.name = "spapr-pci-host-bridge",
+ .qdev.size = sizeof(PCIDevice),
+ .init = spapr_main_pci_host_init,
+};
+
+static void spapr_register_devices(void)
+{
+ sysbus_register_dev("spapr-pci-host-bridge", sizeof(sPAPRPHBState),
+ spapr_phb_init);
+ pci_qdev_register(&spapr_main_pci_host_info);
+}
+
+device_init(spapr_register_devices)
+
+static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
+ unsigned size)
+{
+ switch (size) {
+ case 1:
+ return cpu_inb(addr);
+ case 2:
+ return cpu_inw(addr);
+ case 4:
+ return cpu_inl(addr);
+ }
+ assert(0);
+}
+
+static void spapr_io_write(void *opaque, target_phys_addr_t addr,
+ uint64_t data, unsigned size)
+{
+ switch (size) {
+ case 1:
+ cpu_outb(addr, data);
+ return;
+ case 2:
+ cpu_outw(addr, data);
+ return;
+ case 4:
+ cpu_outl(addr, data);
+ return;
+ }
+ assert(0);
+}
+
+static MemoryRegionOps spapr_io_ops = {
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .read = spapr_io_read,
+ .write = spapr_io_write
+};
+
+void spapr_create_phb(sPAPREnvironment *spapr,
+ const char *busname, uint64_t buid,
+ uint64_t mem_win_addr, uint64_t mem_win_size,
+ uint64_t io_win_addr)
+{
+ DeviceState *dev;
+ SysBusDevice *s;
+ sPAPRPHBState *phb;
+ PCIBus *bus;
+ char namebuf[strlen(busname)+11];
+
+ dev = qdev_create(NULL, "spapr-pci-host-bridge");
+ qdev_init_nofail(dev);
+ s = sysbus_from_qdev(dev);
+ phb = FROM_SYSBUS(sPAPRPHBState, s);
+
+ phb->mem_win_addr = mem_win_addr;
+
+ sprintf(namebuf, "%s-mem", busname);
+ memory_region_init(&phb->memspace, namebuf, INT64_MAX);
+
+ sprintf(namebuf, "%s-memwindow", busname);
+ memory_region_init_alias(&phb->memwindow, namebuf, &phb->memspace,
+ SPAPR_PCI_MEM_WIN_BUS_OFFSET, mem_win_size);
+ memory_region_add_subregion(get_system_memory(), mem_win_addr,
+ &phb->memwindow);
+
+ phb->io_win_addr = io_win_addr;
+
+ /* On ppc, we only have MMIO no specific IO space from the CPU
+ * perspective. In theory we ought to be able to embed the PCI IO
+ * memory region direction in the system memory space. However,
+ * if any of the IO BAR subregions use the old_portio mechanism,
+ * that won't be processed properly unless accessed from the
+ * system io address space. This hack to bounce things via
+ * system_io works around the problem until all the users of
+ * old_portion are updated */
+ sprintf(namebuf, "%s-io", busname);
+ memory_region_init(&phb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE);
+ /* FIXME: fix to support multiple PHBs */
+ memory_region_add_subregion(get_system_io(), 0, &phb->iospace);
+
+ sprintf(namebuf, "%s-iowindow", busname);
+ memory_region_init_io(&phb->iowindow, &spapr_io_ops, phb,
+ namebuf, SPAPR_PCI_IO_WIN_SIZE);
+ memory_region_add_subregion(get_system_memory(), io_win_addr,
+ &phb->iowindow);
+
+ phb->host_state.bus = bus = pci_register_bus(&phb->busdev.qdev, busname,
+ pci_spapr_set_irq,
+ pci_spapr_map_irq,
+ phb,
+ &phb->memspace, &phb->iospace,
+ PCI_DEVFN(0, 0),
+ SPAPR_PCI_NUM_LSI);
+
+ spapr_rtas_register("read-pci-config", rtas_read_pci_config);
+ spapr_rtas_register("write-pci-config", rtas_write_pci_config);
+ spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
+ spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+
+ QLIST_INSERT_HEAD(&spapr->phbs, phb, list);
+
+ /* pci_bus_set_mem_base(bus, mem_va_start - SPAPR_PCI_MEM_BAR_START); */
+}
+
+/* Macros to operate with address in OF binding to PCI */
+#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
+#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
+#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */
+#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */
+#define b_ss(x) b_x((x), 24, 2) /* the space code */
+#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */
+#define b_ddddd(x) b_x((x), 11, 5) /* device number */
+#define b_fff(x) b_x((x), 8, 3) /* function number */
+#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */
+
+static uint32_t regtype_to_ss(uint8_t type)
+{
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ return 3;
+ }
+ if (type == PCI_BASE_ADDRESS_SPACE_IO) {
+ return 1;
+ }
+ return 2;
+}
+
+int spapr_populate_pci_devices(sPAPRPHBState *phb,
+ uint32_t xics_phandle,
+ void *fdt)
+{
+ PCIBus *bus = phb->host_state.bus;
+ int bus_off, node_off = 0, devid, fn, i, n, devices;
+ DeviceState *qdev;
+ char nodename[256];
+ struct {
+ uint32_t hi;
+ uint64_t addr;
+ uint64_t size;
+ } __attribute__((packed)) reg[PCI_NUM_REGIONS + 1],
+ assigned_addresses[PCI_NUM_REGIONS];
+ uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
+ struct {
+ uint32_t hi;
+ uint64_t child;
+ uint64_t parent;
+ uint64_t size;
+ } __attribute__((packed)) ranges[] = {
+ {
+ cpu_to_be32(b_ss(1)), cpu_to_be64(0),
+ cpu_to_be64(phb->io_win_addr),
+ cpu_to_be64(memory_region_size(&phb->iospace)),
+ },
+ {
+ cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
+ cpu_to_be64(phb->mem_win_addr),
+ cpu_to_be64(memory_region_size(&phb->memwindow)),
+ },
+ };
+ uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
+ uint32_t interrupt_map_mask[] = {
+ cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0};
+ uint32_t interrupt_map[bus->nirq][7];
+
+ /* Start populating the FDT */
+ sprintf(nodename, "pci@%" PRIx64, phb->buid);
+ bus_off = fdt_add_subnode(fdt, 0, nodename);
+ if (bus_off < 0) {
+ return bus_off;
+ }
+
+#define _FDT(exp) \
+ do { \
+ int ret = (exp); \
+ if (ret < 0) { \
+ return ret; \
+ } \
+ } while (0)
+
+ /* Write PHB properties */
+ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
+ _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
+ _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
+ _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
+ _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
+ _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
+ &interrupt_map_mask, sizeof(interrupt_map_mask)));
+
+ /* Populate PCI devices and allocate IRQs */
+ devices = 0;
+ QLIST_FOREACH(qdev, &bus->qbus.children, sibling) {
+ PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev);
+ int irq_index = pci_spapr_map_irq(dev, 0);
+ uint32_t *irqmap = interrupt_map[devices];
+ uint8_t *config = dev->config;
+
+ devid = dev->devfn >> 3;
+ fn = dev->devfn & 7;
+
+ sprintf(nodename, "pci@%u,%u", devid, fn);
+
+ /* Allocate interrupt from the map */
+ if (devid > bus->nirq) {
+ printf("Unexpected behaviour in spapr_populate_pci_devices,"
+ "wrong devid %u\n", devid);
+ exit(-1);
+ }
+ irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn));
+ irqmap[1] = 0;
+ irqmap[2] = 0;
+ irqmap[3] = 0;
+ irqmap[4] = cpu_to_be32(xics_phandle);
+ irqmap[5] = cpu_to_be32(phb->lsi_table[irq_index].dt_irq);
+ irqmap[6] = cpu_to_be32(0x8);
+
+ /* Add node to FDT */
+ node_off = fdt_add_subnode(fdt, bus_off, nodename);
+ if (node_off < 0) {
+ return node_off;
+ }
+
+ _FDT(fdt_setprop_cell(fdt, node_off, "vendor-id",
+ pci_get_word(&config[PCI_VENDOR_ID])));
+ _FDT(fdt_setprop_cell(fdt, node_off, "device-id",
+ pci_get_word(&config[PCI_DEVICE_ID])));
+ _FDT(fdt_setprop_cell(fdt, node_off, "revision-id",
+ pci_get_byte(&config[PCI_REVISION_ID])));
+ _FDT(fdt_setprop_cell(fdt, node_off, "class-code",
+ pci_get_long(&config[PCI_CLASS_REVISION]) >> 8));
+ _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id",
+ pci_get_word(&config[PCI_SUBSYSTEM_ID])));
+ _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id",
+ pci_get_word(&config[PCI_SUBSYSTEM_VENDOR_ID])));
+
+ /* Config space region comes first */
+ reg[0].hi = cpu_to_be32(
+ b_n(0) |
+ b_p(0) |
+ b_t(0) |
+ b_ss(0/*config*/) |
+ b_bbbbbbbb(0) |
+ b_ddddd(devid) |
+ b_fff(fn));
+ reg[0].addr = 0;
+ reg[0].size = 0;
+
+ n = 0;
+ for (i = 0; i < PCI_NUM_REGIONS; ++i) {
+ if (0 == dev->io_regions[i].size) {
+ continue;
+ }
+
+ reg[n+1].hi = cpu_to_be32(
+ b_n(0) |
+ b_p(0) |
+ b_t(0) |
+ b_ss(regtype_to_ss(dev->io_regions[i].type)) |
+ b_bbbbbbbb(0) |
+ b_ddddd(devid) |
+ b_fff(fn) |
+ b_rrrrrrrr(bars[i]));
+ reg[n+1].addr = 0;
+ reg[n+1].size = cpu_to_be64(dev->io_regions[i].size);
+
+ assigned_addresses[n].hi = cpu_to_be32(
+ b_n(1) |
+ b_p(0) |
+ b_t(0) |
+ b_ss(regtype_to_ss(dev->io_regions[i].type)) |
+ b_bbbbbbbb(0) |
+ b_ddddd(devid) |
+ b_fff(fn) |
+ b_rrrrrrrr(bars[i]));
+
+ /*
+ * Writing zeroes to assigned_addresses causes the guest kernel to
+ * reassign BARs
+ */
+ assigned_addresses[n].addr = cpu_to_be64(dev->io_regions[i].addr);
+ assigned_addresses[n].size = reg[n+1].size;
+
+ ++n;
+ }
+ _FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1)));
+ _FDT(fdt_setprop(fdt, node_off, "assigned-addresses",
+ assigned_addresses,
+ sizeof(assigned_addresses[0])*(n)));
+ _FDT(fdt_setprop_cell(fdt, node_off, "interrupts",
+ pci_get_byte(&config[PCI_INTERRUPT_PIN])));
+
+ ++devices;
+ }
+
+ /* Write interrupt map */
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
+ devices * sizeof(interrupt_map[0])));
+
+ return 0;
+}
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
new file mode 100644
index 0000000000..213340c915
--- /dev/null
+++ b/hw/spapr_pci.h
@@ -0,0 +1,61 @@
+/*
+ * QEMU SPAPR PCI BUS definitions
+ *
+ * Copyright (c) 2011 Alexey Kardashevskiy <aik@au1.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#if !defined(__HW_SPAPR_H__)
+#error Please include spapr.h before this file!
+#endif
+
+#if !defined(__HW_SPAPR_PCI_H__)
+#define __HW_SPAPR_PCI_H__
+
+#include "hw/pci_host.h"
+#include "hw/xics.h"
+
+#define SPAPR_PCI_NUM_LSI 16
+
+typedef struct sPAPRPHBState {
+ SysBusDevice busdev;
+ PCIHostState host_state;
+
+ uint64_t buid;
+
+ MemoryRegion memspace, iospace;
+ target_phys_addr_t mem_win_addr, io_win_addr;
+ MemoryRegion memwindow, iowindow;
+
+ struct {
+ uint32_t dt_irq;
+ qemu_irq qirq;
+ } lsi_table[SPAPR_PCI_NUM_LSI];
+
+ QLIST_ENTRY(sPAPRPHBState) list;
+} sPAPRPHBState;
+
+#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
+#define SPAPR_PCI_IO_WIN_SIZE 0x10000
+
+void spapr_create_phb(sPAPREnvironment *spapr,
+ const char *busname, uint64_t buid,
+ uint64_t mem_win_addr, uint64_t mem_win_size,
+ uint64_t io_win_addr);
+
+int spapr_populate_pci_devices(sPAPRPHBState *phb,
+ uint32_t xics_phandle,
+ void *fdt);
+
+#endif /* __HW_SPAPR_PCI_H__ */
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index 977603f81e..25cfc9d912 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -165,7 +165,13 @@ static void rtce_init(VIOsPAPRDevice *dev)
* sizeof(VIOsPAPR_RTCE);
if (size) {
- dev->rtce_table = g_malloc0(size);
+ dev->rtce_table = kvmppc_create_spapr_tce(dev->reg,
+ dev->rtce_window_size,
+ &dev->kvmtce_fd);
+
+ if (!dev->rtce_table) {
+ dev->rtce_table = g_malloc0(size);
+ }
}
}
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index 4fe5f742c2..a325a5f4b3 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -57,6 +57,7 @@ typedef struct VIOsPAPRDevice {
target_ulong signal_state;
uint32_t rtce_window_size;
VIOsPAPR_RTCE *rtce_table;
+ int kvmtce_fd;
VIOsPAPR_CRQ crq;
} VIOsPAPRDevice;