aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-01-11 19:46:28 +0000
committerAlexander Graf <agraf@suse.de>2012-01-21 05:17:02 +0100
commit4d8d5467cd6e324fb49ae97b9d5dcee3973d9a19 (patch)
tree9e7a863dc3bd53e1aa58b72f9f789714cd103dca /hw
parentc9c3c80af71dd2b7813d1ada9b14cb51df584221 (diff)
pseries: SLOF PCI flag day
Currently on the pseries machine the SLOF firmware is used normally, but we bypass it when -kernel is specified. Having these two different boot paths can cause some confusion. In particular at present we need to "probe" the (emulated) PCI bus and produce device tree nodes for the PCI devices in qemu, for the -kernel case. In the SLOF case, it takes the device tree from qemu adds some stuff to it then passes it on to the kernel. It's been decided that a better approach is to always boot through SLOF, even when using -kernel. WIth this approach we can leave PCI probing and device node creation to SLOF in all cases which removes a bunch of code in qemu, and avoids iterating the PCI devices from the machine specific init code which we're not supposed to do. This patch changes qemu to always boot through SLOF, and not to create PCI nodes. Simultaneously it updates the included version of SLOF (submodule and binary image) to one which supports (and requires) the new approach. The new SLOF version also includes a number of unrelated enhancements: support for booting from virtio-pci devices and e1000, greatly improved FCode support and many bugfixes. It also makes SLOF ready to be used even when specifying a kernel on the qemu command line. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'hw')
-rw-r--r--hw/spapr.c135
-rw-r--r--hw/spapr_pci.c134
2 files changed, 103 insertions, 166 deletions
diff --git a/hw/spapr.c b/hw/spapr.c
index 0e1f80dfdc..b011371813 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -50,19 +50,29 @@
#include <libfdt.h>
-#define KERNEL_LOAD_ADDR 0x00000000
-#define INITRD_LOAD_ADDR 0x02800000
+/* SLOF memory layout:
+ *
+ * SLOF raw image loaded at 0, copies its romfs right below the flat
+ * device-tree, then position SLOF itself 31M below that
+ *
+ * So we set FW_OVERHEAD to 40MB which should account for all of that
+ * and more
+ *
+ * We load our kernel at 4M, leaving space for SLOF initial image
+ */
#define FDT_MAX_SIZE 0x10000
#define RTAS_MAX_SIZE 0x10000
#define FW_MAX_SIZE 0x400000
#define FW_FILE_NAME "slof.bin"
+#define FW_OVERHEAD 0x2800000
+#define KERNEL_LOAD_ADDR FW_MAX_SIZE
-#define MIN_RMA_SLOF 128UL
+#define MIN_RMA_SLOF 128UL
#define TIMEBASE_FREQ 512000000ULL
#define MAX_CPUS 256
-#define XICS_IRQS 1024
+#define XICS_IRQS 1024
#define SPAPR_PCI_BUID 0x800000020000001ULL
#define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000)
@@ -139,6 +149,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
target_phys_addr_t rma_size,
target_phys_addr_t initrd_base,
target_phys_addr_t initrd_size,
+ target_phys_addr_t kernel_size,
const char *boot_device,
const char *kernel_cmdline,
long hash_shift)
@@ -176,6 +187,12 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
fdt = g_malloc0(FDT_MAX_SIZE);
_FDT((fdt_create(fdt, FDT_MAX_SIZE)));
+ if (kernel_size) {
+ _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
+ }
+ if (initrd_size) {
+ _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
+ }
_FDT((fdt_finish_reservemap(fdt)));
/* Root node */
@@ -197,15 +214,13 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
&start_prop, sizeof(start_prop))));
_FDT((fdt_property(fdt, "linux,initrd-end",
&end_prop, sizeof(end_prop))));
- _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
+ if (kernel_size) {
+ uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
+ cpu_to_be64(kernel_size) };
- /*
- * Because we don't always invoke any firmware, we can't rely on
- * that to do BAR allocation. Long term, we should probably do
- * that ourselves, but for now, this setting (plus advertising the
- * current BARs as 0) causes sufficiently recent kernels to to the
- * BAR assignment themselves */
- _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0)));
+ _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
+ }
+ _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
_FDT((fdt_end_node(fdt)));
@@ -445,6 +460,12 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
_FDT((fdt_pack(fdt)));
+ if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
+ hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
+ fdt_totalsize(fdt), FDT_MAX_SIZE);
+ exit(1);
+ }
+
cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
g_free(fdt);
@@ -494,8 +515,9 @@ static void ppc_spapr_init(ram_addr_t ram_size,
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
target_phys_addr_t rma_alloc_size, rma_size;
- uint32_t initrd_base;
- long kernel_size, initrd_size, fw_size;
+ uint32_t initrd_base = 0;
+ long kernel_size = 0, initrd_size = 0;
+ long load_limit, rtas_limit, fw_size;
long pteg_shift = 17;
char *filename;
@@ -517,11 +539,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
rma_size = ram_size;
}
- /* We place the device tree just below either the top of the RMA,
+ /* We place the device tree and RTAS just below either the top of the RMA,
* or just below 2GB, whichever is lowere, so that it can be
* processed with 32-bit real mode code if necessary */
- spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE;
- spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE;
+ rtas_limit = MIN(rma_size, 0x80000000);
+ spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
+ spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
+ load_limit = spapr->fdt_addr - FW_OVERHEAD;
/* init CPUs */
if (cpu_model == NULL) {
@@ -577,13 +601,19 @@ static void ppc_spapr_init(ram_addr_t ram_size,
filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
- ram_size - spapr->rtas_addr);
+ rtas_limit - spapr->rtas_addr);
if (spapr->rtas_size < 0) {
hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
exit(1);
}
+ if (spapr->rtas_size > RTAS_MAX_SIZE) {
+ hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
+ spapr->rtas_size, RTAS_MAX_SIZE);
+ exit(1);
+ }
g_free(filename);
+
/* Set up Interrupt Controller */
spapr->icp = xics_system_init(XICS_IRQS);
spapr->next_irq = 16;
@@ -622,6 +652,20 @@ static void ppc_spapr_init(ram_addr_t ram_size,
spapr_vscsi_create(spapr->vio_bus, 0x2000 + i);
}
+ if (rma_size < (MIN_RMA_SLOF << 20)) {
+ fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
+ "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
+ exit(1);
+ }
+
+ fprintf(stderr, "sPAPR memory map:\n");
+ fprintf(stderr, "RTAS : 0x%08lx..%08lx\n",
+ (unsigned long)spapr->rtas_addr,
+ (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1));
+ fprintf(stderr, "FDT : 0x%08lx..%08lx\n",
+ (unsigned long)spapr->fdt_addr,
+ (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1));
+
if (kernel_filename) {
uint64_t lowaddr = 0;
@@ -630,57 +674,60 @@ static void ppc_spapr_init(ram_addr_t ram_size,
if (kernel_size < 0) {
kernel_size = load_image_targphys(kernel_filename,
KERNEL_LOAD_ADDR,
- ram_size - KERNEL_LOAD_ADDR);
+ load_limit - KERNEL_LOAD_ADDR);
}
if (kernel_size < 0) {
fprintf(stderr, "qemu: could not load kernel '%s'\n",
kernel_filename);
exit(1);
}
+ fprintf(stderr, "Kernel : 0x%08x..%08lx\n",
+ KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1);
/* load initrd */
if (initrd_filename) {
- initrd_base = INITRD_LOAD_ADDR;
+ /* Try to locate the initrd in the gap between the kernel
+ * and the firmware. Add a bit of space just in case
+ */
+ initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
initrd_size = load_image_targphys(initrd_filename, initrd_base,
- ram_size - initrd_base);
+ load_limit - initrd_base);
if (initrd_size < 0) {
fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
initrd_filename);
exit(1);
}
+ fprintf(stderr, "Ramdisk : 0x%08lx..%08lx\n",
+ (long)initrd_base, (long)(initrd_base + initrd_size - 1));
} else {
initrd_base = 0;
initrd_size = 0;
}
+ }
- spapr->entry_point = KERNEL_LOAD_ADDR;
- } else {
- if (rma_size < (MIN_RMA_SLOF << 20)) {
- fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
- "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
- exit(1);
- }
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
- fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
- if (fw_size < 0) {
- hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
- exit(1);
- }
- g_free(filename);
- spapr->entry_point = 0x100;
- initrd_base = 0;
- initrd_size = 0;
-
- /* SLOF will startup the secondary CPUs using RTAS,
- rather than expecting a kexec() style entry */
- for (env = first_cpu; env != NULL; env = env->next_cpu) {
- env->halted = 1;
- }
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
+ fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
+ if (fw_size < 0) {
+ hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
+ exit(1);
+ }
+ g_free(filename);
+ fprintf(stderr, "Firmware load : 0x%08x..%08lx\n",
+ 0, fw_size);
+ fprintf(stderr, "Firmware runtime : 0x%08lx..%08lx\n",
+ load_limit, (unsigned long)spapr->fdt_addr);
+
+ spapr->entry_point = 0x100;
+
+ /* SLOF will startup the secondary CPUs using RTAS */
+ for (env = first_cpu; env != NULL; env = env->next_cpu) {
+ env->halted = 1;
}
/* Prepare the device tree */
spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
initrd_base, initrd_size,
+ kernel_size,
boot_device, kernel_cmdline,
pteg_shift + 7);
assert(spapr->fdt_skel != NULL);
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index f3f9246ee0..cf37628292 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -324,31 +324,13 @@ void spapr_create_phb(sPAPREnvironment *spapr,
#define b_fff(x) b_x((x), 8, 3) /* function number */
#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */
-static uint32_t regtype_to_ss(uint8_t type)
-{
- if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- return 3;
- }
- if (type == PCI_BASE_ADDRESS_SPACE_IO) {
- return 1;
- }
- return 2;
-}
-
int spapr_populate_pci_devices(sPAPRPHBState *phb,
uint32_t xics_phandle,
void *fdt)
{
PCIBus *bus = phb->host_state.bus;
- int bus_off, node_off = 0, devid, fn, i, n, devices;
- DeviceState *qdev;
+ int bus_off, i;
char nodename[256];
- struct {
- uint32_t hi;
- uint64_t addr;
- uint64_t size;
- } __attribute__((packed)) reg[PCI_NUM_REGIONS + 1],
- assigned_addresses[PCI_NUM_REGIONS];
uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
struct {
uint32_t hi;
@@ -369,7 +351,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
};
uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
uint32_t interrupt_map_mask[] = {
- cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0};
+ cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, 0x0};
uint32_t interrupt_map[bus->nirq][7];
/* Start populating the FDT */
@@ -397,118 +379,26 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
_FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
_FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
_FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
- _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
- &interrupt_map_mask, sizeof(interrupt_map_mask)));
_FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
- /* Populate PCI devices and allocate IRQs */
- devices = 0;
- QTAILQ_FOREACH(qdev, &bus->qbus.children, sibling) {
- PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev);
- int irq_index = pci_spapr_map_irq(dev, 0);
- uint32_t *irqmap = interrupt_map[devices];
- uint8_t *config = dev->config;
-
- devid = dev->devfn >> 3;
- fn = dev->devfn & 7;
-
- sprintf(nodename, "pci@%u,%u", devid, fn);
-
- /* Allocate interrupt from the map */
- if (devid > bus->nirq) {
- printf("Unexpected behaviour in spapr_populate_pci_devices,"
- "wrong devid %u\n", devid);
- exit(-1);
- }
- irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn));
+ /* Build the interrupt-map, this must matches what is done
+ * in pci_spapr_map_irq
+ */
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
+ &interrupt_map_mask, sizeof(interrupt_map_mask)));
+ for (i = 0; i < 7; i++) {
+ uint32_t *irqmap = interrupt_map[i];
+ irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
irqmap[1] = 0;
irqmap[2] = 0;
irqmap[3] = 0;
irqmap[4] = cpu_to_be32(xics_phandle);
- irqmap[5] = cpu_to_be32(phb->lsi_table[irq_index].dt_irq);
+ irqmap[5] = cpu_to_be32(phb->lsi_table[i % SPAPR_PCI_NUM_LSI].dt_irq);
irqmap[6] = cpu_to_be32(0x8);
-
- /* Add node to FDT */
- node_off = fdt_add_subnode(fdt, bus_off, nodename);
- if (node_off < 0) {
- return node_off;
- }
-
- _FDT(fdt_setprop_cell(fdt, node_off, "vendor-id",
- pci_get_word(&config[PCI_VENDOR_ID])));
- _FDT(fdt_setprop_cell(fdt, node_off, "device-id",
- pci_get_word(&config[PCI_DEVICE_ID])));
- _FDT(fdt_setprop_cell(fdt, node_off, "revision-id",
- pci_get_byte(&config[PCI_REVISION_ID])));
- _FDT(fdt_setprop_cell(fdt, node_off, "class-code",
- pci_get_long(&config[PCI_CLASS_REVISION]) >> 8));
- _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id",
- pci_get_word(&config[PCI_SUBSYSTEM_ID])));
- _FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id",
- pci_get_word(&config[PCI_SUBSYSTEM_VENDOR_ID])));
-
- /* Config space region comes first */
- reg[0].hi = cpu_to_be32(
- b_n(0) |
- b_p(0) |
- b_t(0) |
- b_ss(0/*config*/) |
- b_bbbbbbbb(0) |
- b_ddddd(devid) |
- b_fff(fn));
- reg[0].addr = 0;
- reg[0].size = 0;
-
- n = 0;
- for (i = 0; i < ARRAY_SIZE(bars); ++i) {
- if (0 == dev->io_regions[i].size) {
- continue;
- }
-
- reg[n+1].hi = cpu_to_be32(
- b_n(0) |
- b_p(0) |
- b_t(0) |
- b_ss(regtype_to_ss(dev->io_regions[i].type)) |
- b_bbbbbbbb(0) |
- b_ddddd(devid) |
- b_fff(fn) |
- b_rrrrrrrr(bars[i]));
- reg[n+1].addr = 0;
- reg[n+1].size = cpu_to_be64(dev->io_regions[i].size);
-
- assigned_addresses[n].hi = cpu_to_be32(
- b_n(1) |
- b_p(0) |
- b_t(0) |
- b_ss(regtype_to_ss(dev->io_regions[i].type)) |
- b_bbbbbbbb(0) |
- b_ddddd(devid) |
- b_fff(fn) |
- b_rrrrrrrr(bars[i]));
-
- /*
- * Writing zeroes to assigned_addresses causes the guest kernel to
- * reassign BARs
- */
- assigned_addresses[n].addr = cpu_to_be64(dev->io_regions[i].addr);
- assigned_addresses[n].size = reg[n+1].size;
-
- ++n;
- }
- _FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1)));
- _FDT(fdt_setprop(fdt, node_off, "assigned-addresses",
- assigned_addresses,
- sizeof(assigned_addresses[0])*(n)));
- _FDT(fdt_setprop_cell(fdt, node_off, "interrupts",
- pci_get_byte(&config[PCI_INTERRUPT_PIN])));
-
- ++devices;
}
-
/* Write interrupt map */
_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
- devices * sizeof(interrupt_map[0])));
+ 7 * sizeof(interrupt_map[0])));
return 0;
}