Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.8-20161028' into staging

ppc patch queue 2016-10-28 This pull request supersedes and extends the one from 2016-10-26 (which had a build bug). Highlights: * SLOF (pseries guest firmware) update * Enable a number of extra testcases on ppc / pseries * Added the 'powernv' machine type - Almost enough to be minimally usable - But still missing necessary interrupt controller updates * Cleanup and consolidation of NVRAM handling on several platforms with related firmware * Substantial cleanup to device tree construction * Some more POWER9 instruction emulation * Cleanup to handling of pseries option vectors and CAS reboot handling (host/guest feature negotiation mechanism) * Significant cleanups to handling of PCI devices in test cases * New hotplug event infrastructure * Memory hot unplug support for pseries * Several bug fixes The NVRAM cleanup affects some Sun sparc platforms as well as ppc ones, but have been tested by the sparc maintainer (Mark Cave-Ayland). The test additions also include substantial general changes to the test framework that aren't strictly ppc related. They don't seem to break tests on other platforms, they're for the benefit of enabling tests on ppc and there isn't a specific maintainer for them, so they're included in this tree. # gpg: Signature made Fri 28 Oct 2016 02:37:19 BST # gpg: using RSA key 0x6C38CACA20D9B392 # gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" # gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" # gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" # gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" # Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392 * remotes/dgibson/tags/ppc-for-2.8-20161028: (73 commits) ppc: allow certain HV interrupts to be delivered to guests spapr: Memory hot-unplug support spapr: use count+index for memory hotplug spapr: Add DRC count indexed hotplug identifier type spapr: add hotplug interrupt machine options spapr_events: add support for dedicated hotplug event source spapr: update spapr hotplug documentation target-ppc: Add xvcmpnesp, xvcmpnedp instructions target-ppc: add xscmp[eq,gt,ge,ne]dp instructions tests: Add pseries machine to the prom-env-test, too spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter libqos: Change PCI accessors to take opaque BAR handle tests: Don't assume structure of PCI IO base in ahci-test tests: Use qpci_mem{read,write} in ivshmem-test libqos: Add 64-bit PCI IO accessors tests: Clean up IO handling in ide-test libqos: Implement mmio accessors in terms of mem{read,write} libqos: Add streaming accessors for PCI MMIO tests: Adjust tco-test to use qpci_legacy_iomap() libqos: Better handling of PCI legacy IO ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2016-10-28 16:31:59 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2016-10-28 16:31:59 +0100
commit: 66a77ea676aea48092500bcddb015aa0aee42388 (patch)
tree: 9fe8c7f1a6bf8828a8ec239a6ced021486e883d3 /hw
parent: 01b601f06154c0d35f945b1321ddb3f39530cc43 (diff)
parent: 10c21b5c20bf3d20b7b0ad279db37ae89cc7937d (diff)
24 files changed, 3118 insertions, 625 deletions
diff --git a/hw/input/adb.c b/hw/input/adb.c
index 3d39368909..43d3205472 100644
--- a/hw/input/adb.c
+++ b/hw/input/adb.c
@@ -396,9 +396,15 @@ static int adb_kbd_request(ADBDevice *d, uint8_t *obuf,
                 d->devaddr = buf[1] & 0xf;
                 break;
             default:
-                /* XXX: check this */
                 d->devaddr = buf[1] & 0xf;
-                d->handler = buf[2];
+                /* we support handlers:
+                 * 1: Apple Standard Keyboard
+                 * 2: Apple Extended Keyboard (LShift = RShift)
+                 * 3: Apple Extended Keyboard (LShift != RShift)
+                 */
+                if (buf[2] == 1 || buf[2] == 2 || buf[2] == 3) {
+                    d->handler = buf[2];
+                }
                 break;
             }
         }
@@ -437,6 +443,7 @@ static void adb_keyboard_event(DeviceState *dev, QemuConsole *src,
     if (qcode >= ARRAY_SIZE(qcode_to_adb_keycode)) {
         return;
     }
+    /* FIXME: take handler into account when translating qcode */
     keycode = qcode_to_adb_keycode[qcode];
     if (keycode == NO_KEY) {  /* We don't want to send this to the guest */
         ADB_DPRINTF("Ignoring NO_KEY\n");
@@ -631,8 +638,21 @@ static int adb_mouse_request(ADBDevice *d, uint8_t *obuf,
                 d->devaddr = buf[1] & 0xf;
                 break;
             default:
-                /* XXX: check this */
                 d->devaddr = buf[1] & 0xf;
+                /* we support handlers:
+                 * 0x01: Classic Apple Mouse Protocol / 100 cpi operations
+                 * 0x02: Classic Apple Mouse Protocol / 200 cpi operations
+                 * we don't support handlers (at least):
+                 * 0x03: Mouse systems A3 trackball
+                 * 0x04: Extended Apple Mouse Protocol
+                 * 0x2f: Microspeed mouse
+                 * 0x42: Macally
+                 * 0x5f: Microspeed mouse
+                 * 0x66: Microspeed mouse
+                 */
+                if (buf[2] == 1 || buf[2] == 2) {
+                    d->handler = buf[2];
+                }
                 break;
             }
         }
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index f40b00003a..095c16a300 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -35,6 +35,8 @@
 #include "hw/ppc/xics.h"
 #include "qemu/error-report.h"
 #include "qapi/visitor.h"
+#include "monitor/monitor.h"
+#include "hw/intc/intc.h"
 
 int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
 {
@@ -90,6 +92,47 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     }
 }
 
+static void xics_common_pic_print_info(InterruptStatsProvider *obj,
+                                       Monitor *mon)
+{
+    XICSState *xics = XICS_COMMON(obj);
+    ICSState *ics;
+    uint32_t i;
+
+    for (i = 0; i < xics->nr_servers; i++) {
+        ICPState *icp = &xics->ss[i];
+
+        if (!icp->output) {
+            continue;
+        }
+        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+                       i, icp->xirr, icp->xirr_owner,
+                       icp->pending_priority, icp->mfrr);
+    }
+
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        monitor_printf(mon, "ICS %4x..%4x %p\n",
+                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
+
+        if (!ics->irqs) {
+            continue;
+        }
+
+        for (i = 0; i < ics->nr_irqs; i++) {
+            ICSIRQState *irq = ics->irqs + i;
+
+            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+                continue;
+            }
+            monitor_printf(mon, "  %4x %s %02x %02x\n",
+                           ics->offset + i,
+                           (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+                           "LSI" : "MSI",
+                           irq->priority, irq->status);
+        }
+    }
+}
+
 /*
  * XICS Common class - parent for emulated XICS and KVM-XICS
  */
@@ -140,6 +183,25 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
     info->set_nr_irqs(xics, value, errp);
 }
 
+void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
+                         const char *typename, Error **errp)
+{
+    int i;
+
+    xics->nr_servers = nr_servers;
+
+    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
+    for (i = 0; i < xics->nr_servers; i++) {
+        char name[32];
+        ICPState *icp = &xics->ss[i];
+
+        object_initialize(icp, sizeof(*icp), typename);
+        snprintf(name, sizeof(name), "icp[%d]", i);
+        object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
+        icp->xics = xics;
+    }
+}
+
 static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
                                      const char *name, void *opaque,
                                      Error **errp)
@@ -155,7 +217,7 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
                                      Error **errp)
 {
     XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+    XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
     Error *error = NULL;
     int64_t value;
 
@@ -170,8 +232,8 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
         return;
     }
 
-    assert(info->set_nr_servers);
-    info->set_nr_servers(xics, value, errp);
+    assert(xsc->set_nr_servers);
+    xsc->set_nr_servers(xics, value, errp);
 }
 
 static void xics_common_initfn(Object *obj)
@@ -190,8 +252,10 @@ static void xics_common_initfn(Object *obj)
 static void xics_common_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
+    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
 
     dc->reset = xics_common_reset;
+    ic->print_info = xics_common_pic_print_info;
 }
 
 static const TypeInfo xics_common_info = {
@@ -201,6 +265,10 @@ static const TypeInfo xics_common_info = {
     .class_size    = sizeof(XICSStateClass),
     .instance_init = xics_common_initfn,
     .class_init    = xics_common_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_INTERRUPT_STATS_PROVIDER },
+        { }
+    },
 };
 
 /*
@@ -258,22 +326,20 @@ static void icp_check_ipi(ICPState *ss)
     qemu_irq_raise(ss->output);
 }
 
-static void icp_resend(XICSState *xics, int server)
+static void icp_resend(ICPState *ss)
 {
-    ICPState *ss = xics->ss + server;
     ICSState *ics;
 
     if (ss->mfrr < CPPR(ss)) {
         icp_check_ipi(ss);
     }
-    QLIST_FOREACH(ics, &xics->ics, list) {
+    QLIST_FOREACH(ics, &ss->xics->ics, list) {
         ics_resend(ics);
     }
 }
 
-void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
+void icp_set_cppr(ICPState *ss, uint8_t cppr)
 {
-    ICPState *ss = xics->ss + server;
     uint8_t old_cppr;
     uint32_t old_xisr;
 
@@ -293,15 +359,13 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
         }
     } else {
         if (!XISR(ss)) {
-            icp_resend(xics, server);
+            icp_resend(ss);
         }
     }
 }
 
-void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr)
+void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
 {
-    ICPState *ss = xics->ss + server;
-
     ss->mfrr = mfrr;
     if (mfrr < CPPR(ss)) {
         icp_check_ipi(ss);
@@ -330,23 +394,22 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
     return ss->xirr;
 }
 
-void icp_eoi(XICSState *xics, int server, uint32_t xirr)
+void icp_eoi(ICPState *ss, uint32_t xirr)
 {
-    ICPState *ss = xics->ss + server;
     ICSState *ics;
     uint32_t irq;
 
     /* Send EOI -> ICS */
     ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
-    trace_xics_icp_eoi(server, xirr, ss->xirr);
+    trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
     irq = xirr & XISR_MASK;
-    QLIST_FOREACH(ics, &xics->ics, list) {
+    QLIST_FOREACH(ics, &ss->xics->ics, list) {
         if (ics_valid_irq(ics, irq)) {
             ics_eoi(ics, irq);
         }
     }
     if (!XISR(ss)) {
-        icp_resend(xics, server);
+        icp_resend(ss);
     }
 }
 
@@ -605,7 +668,7 @@ static int ics_simple_post_load(ICSState *ics, int version_id)
     int i;
 
     for (i = 0; i < ics->xics->nr_servers; i++) {
-        icp_resend(ics->xics, i);
+        icp_resend(&ics->xics->ss[i]);
     }
 
     return 0;
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 9c2f198fd1..17694eaa87 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -373,18 +373,7 @@ static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
 static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
                                     Error **errp)
 {
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char buffer[32];
-        object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_KVM_ICP);
-        snprintf(buffer, sizeof(buffer), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]),
-                                  errp);
-    }
+    xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
 }
 
 static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index e8d0623c2c..2e3f1c5e95 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -32,6 +32,7 @@
 #include "qemu/timer.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/xics.h"
+#include "hw/ppc/fdt.h"
 #include "qapi/visitor.h"
 #include "qapi/error.h"
 
@@ -43,9 +44,10 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
     target_ulong cppr = args[0];
 
-    icp_set_cppr(spapr->xics, cs->cpu_index, cppr);
+    icp_set_cppr(icp, cppr);
     return H_SUCCESS;
 }
 
@@ -59,7 +61,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return H_PARAMETER;
     }
 
-    icp_set_mfrr(spapr->xics, server, mfrr);
+    icp_set_mfrr(spapr->xics->ss + server, mfrr);
     return H_SUCCESS;
 }
 
@@ -67,7 +69,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    uint32_t xirr = icp_accept(spapr->xics->ss + cs->cpu_index);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
     return H_SUCCESS;
@@ -77,8 +80,8 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &spapr->xics->ss[cs->cpu_index];
-    uint32_t xirr = icp_accept(ss);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
     args[1] = cpu_get_host_ticks();
@@ -89,9 +92,10 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
     target_ulong xirr = args[0];
 
-    icp_eoi(spapr->xics, cs->cpu_index, xirr);
+    icp_eoi(icp, xirr);
     return H_SUCCESS;
 }
 
@@ -99,8 +103,9 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
     uint32_t mfrr;
-    uint32_t xirr = icp_ipoll(spapr->xics->ss + cs->cpu_index, &mfrr);
+    uint32_t xirr = icp_ipoll(icp, &mfrr);
 
     args[0] = xirr;
     args[1] = mfrr;
@@ -249,18 +254,7 @@ static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
 static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
                                       Error **errp)
 {
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char buffer[32];
-        object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_ICP);
-        snprintf(buffer, sizeof(buffer), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]),
-                                  errp);
-    }
+    xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
 }
 
 static void xics_spapr_realize(DeviceState *dev, Error **errp)
@@ -456,6 +450,27 @@ void xics_spapr_free(XICSState *xics, int irq, int num)
     }
 }
 
+void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+{
+    uint32_t interrupt_server_ranges_prop[] = {
+        0, cpu_to_be32(xics->nr_servers),
+    };
+    int node;
+
+    _FDT(node = fdt_add_subnode(fdt, 0, "interrupt-controller"));
+
+    _FDT(fdt_setprop_string(fdt, node, "device_type",
+                            "PowerPC-External-Interrupt-Presentation"));
+    _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,ppc-xicp"));
+    _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+    _FDT(fdt_setprop(fdt, node, "ibm,interrupt-server-ranges",
+                     interrupt_server_ranges_prop,
+                     sizeof(interrupt_server_ranges_prop)));
+    _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
+    _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
+}
+
 static void xics_spapr_register_types(void)
 {
     type_register_static(&xics_spapr_info);
diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs
index e9a66940e0..c018f6b2ff 100644
--- a/hw/nvram/Makefile.objs
+++ b/hw/nvram/Makefile.objs
@@ -1,5 +1,6 @@
 common-obj-$(CONFIG_DS1225Y) += ds1225y.o
 common-obj-y += eeprom93xx.o
 common-obj-y += fw_cfg.o
+common-obj-y += chrp_nvram.o
 common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
 obj-$(CONFIG_PSERIES) += spapr_nvram.o
diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c
new file mode 100644
index 0000000000..3837510dd2
--- /dev/null
+++ b/hw/nvram/chrp_nvram.c
@@ -0,0 +1,85 @@
+/*
+ * Common Hardware Reference Platform NVRAM helper functions.
+ *
+ * The CHRP NVRAM layout is used by OpenBIOS and SLOF. See CHRP
+ * specification, chapter 8, or the LoPAPR specification for details
+ * about the NVRAM layout.
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "hw/hw.h"
+#include "hw/nvram/chrp_nvram.h"
+#include "sysemu/sysemu.h"
+
+static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str)
+{
+    int len;
+
+    len = strlen(str) + 1;
+    memcpy(&nvram[addr], str, len);
+
+    return addr + len;
+}
+
+/**
+ * Create a "system partition", used for the Open Firmware
+ * environment variables.
+ */
+int chrp_nvram_create_system_partition(uint8_t *data, int min_len)
+{
+    ChrpNvramPartHdr *part_header;
+    unsigned int i;
+    int end;
+
+    part_header = (ChrpNvramPartHdr *)data;
+    part_header->signature = CHRP_NVPART_SYSTEM;
+    pstrcpy(part_header->name, sizeof(part_header->name), "system");
+
+    end = sizeof(ChrpNvramPartHdr);
+    for (i = 0; i < nb_prom_envs; i++) {
+        end = chrp_nvram_set_var(data, end, prom_envs[i]);
+    }
+
+    /* End marker */
+    data[end++] = '\0';
+
+    end = (end + 15) & ~15;
+    /* XXX: OpenBIOS is not able to grow up a partition. Leave some space for
+       new variables. */
+    if (end < min_len) {
+        end = min_len;
+    }
+    chrp_nvram_finish_partition(part_header, end);
+
+    return end;
+}
+
+/**
+ * Create a "free space" partition
+ */
+int chrp_nvram_create_free_partition(uint8_t *data, int len)
+{
+    ChrpNvramPartHdr *part_header;
+
+    part_header = (ChrpNvramPartHdr *)data;
+    part_header->signature = CHRP_NVPART_FREE;
+    pstrcpy(part_header->name, sizeof(part_header->name), "free");
+
+    chrp_nvram_finish_partition(part_header, len);
+
+    return len;
+}
diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 24f61212ba..63f9ed1d82 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -24,8 +24,7 @@
  */
 #include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "hw/nvram/openbios_firmware_abi.h"
-#include "sysemu/sysemu.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/ppc/mac.h"
 #include "qemu/cutils.h"
 #include <zlib.h>
@@ -146,38 +145,14 @@ static void macio_nvram_register_types(void)
 static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off,
                                            int len)
 {
-    unsigned int i;
-    uint32_t start = off, end;
-    struct OpenBIOS_nvpart_v1 *part_header;
-
-    // OpenBIOS nvram variables
-    // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
-    part_header->signature = OPENBIOS_PART_SYSTEM;
-    pstrcpy(part_header->name, sizeof(part_header->name), "system");
-
-    end = start + sizeof(struct OpenBIOS_nvpart_v1);
-    for (i = 0; i < nb_prom_envs; i++)
-        end = OpenBIOS_set_var(nvr->data, end, prom_envs[i]);
-
-    // End marker
-    nvr->data[end++] = '\0';
-
-    end = start + ((end - start + 15) & ~15);
-    /* XXX: OpenBIOS is not able to grow up a partition. Leave some space for
-       new variables. */
-    if (end < DEF_SYSTEM_SIZE)
-        end = DEF_SYSTEM_SIZE;
-    OpenBIOS_finish_partition(part_header, end - start);
-
-    // free partition
-    start = end;
-    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
-    part_header->signature = OPENBIOS_PART_FREE;
-    pstrcpy(part_header->name, sizeof(part_header->name), "free");
-
-    end = len;
-    OpenBIOS_finish_partition(part_header, end - start);
+    int sysp_end;
+
+    /* OpenBIOS nvram variables partition */
+    sysp_end = chrp_nvram_create_system_partition(&nvr->data[off],
+                                                  DEF_SYSTEM_SIZE) + off;
+
+    /* Free space partition */
+    chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end);
 }
 
 #define OSX_NVRAM_SIGNATURE     (0x5A)
@@ -187,15 +162,15 @@ static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off,
                                             int len)
 {
     uint32_t start = off;
-    struct OpenBIOS_nvpart_v1 *part_header;
+    ChrpNvramPartHdr *part_header;
     unsigned char *data = &nvr->data[start];
 
     /* empty partition */
-    part_header = (struct OpenBIOS_nvpart_v1 *)data;
+    part_header = (ChrpNvramPartHdr *)data;
     part_header->signature = OSX_NVRAM_SIGNATURE;
     pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww");
 
-    OpenBIOS_finish_partition(part_header, len);
+    chrp_nvram_finish_partition(part_header, len);
 
     /* Generation */
     stl_be_p(&data[20], 2);
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 4de5f705d8..eb42ea323f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -31,6 +31,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/device_tree.h"
 #include "hw/sysbus.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 
@@ -162,6 +163,11 @@ static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
             error_setg(errp, "can't read spapr-nvram contents");
             return;
         }
+    } else if (nb_prom_envs > 0) {
+        /* Create a system partition to pass the -prom-env variables */
+        chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4);
+        chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4],
+                                         nvram->size - MIN_NVRAM_SIZE / 4);
     }
 
     spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch);
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 99a0d4e581..8025129377 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,7 +4,9 @@ obj-y += ppc.o ppc_booke.o fdt.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
-obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
+obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
+# IBM PowerNV
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
new file mode 100644
index 0000000000..82276e0857
--- /dev/null
+++ b/hw/ppc/pnv.c
@@ -0,0 +1,819 @@
+/*
+ * QEMU PowerPC PowerNV machine model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
+#include "hw/hw.h"
+#include "target-ppc/cpu.h"
+#include "qemu/log.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/loader.h"
+#include "exec/address-spaces.h"
+#include "qemu/cutils.h"
+#include "qapi/visitor.h"
+
+#include "hw/ppc/pnv_xscom.h"
+
+#include "hw/isa/isa.h"
+#include "hw/char/serial.h"
+#include "hw/timer/mc146818rtc.h"
+
+#include <libfdt.h>
+
+#define FDT_MAX_SIZE            0x00100000
+
+#define FW_FILE_NAME            "skiboot.lid"
+#define FW_LOAD_ADDR            0x0
+#define FW_MAX_SIZE             0x00400000
+
+#define KERNEL_LOAD_ADDR        0x20000000
+#define INITRD_LOAD_ADDR        0x40000000
+
+/*
+ * On Power Systems E880 (POWER8), the max cpus (threads) should be :
+ *     4 * 4 sockets * 12 cores * 8 threads = 1536
+ * Let's make it 2^11
+ */
+#define MAX_CPUS                2048
+
+/*
+ * Memory nodes are created by hostboot, one for each range of memory
+ * that has a different "affinity". In practice, it means one range
+ * per chip.
+ */
+static void powernv_populate_memory_node(void *fdt, int chip_id, hwaddr start,
+                                         hwaddr size)
+{
+    char *mem_name;
+    uint64_t mem_reg_property[2];
+    int off;
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
+    g_free(mem_name);
+
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                       sizeof(mem_reg_property))));
+    _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
+}
+
+static int get_cpus_node(void *fdt)
+{
+    int cpus_offset = fdt_path_offset(fdt, "/cpus");
+
+    if (cpus_offset < 0) {
+        cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+                                      "cpus");
+        if (cpus_offset) {
+            _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+            _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+        }
+    }
+    _FDT(cpus_offset);
+    return cpus_offset;
+}
+
+/*
+ * The PowerNV cores (and threads) need to use real HW ids and not an
+ * incremental index like it has been done on other platforms. This HW
+ * id is stored in the CPU PIR, it is used to create cpu nodes in the
+ * device tree, used in XSCOM to address cores and in interrupt
+ * servers.
+ */
+static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt)
+{
+    CPUState *cs = CPU(DEVICE(pc->threads));
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    int smt_threads = ppc_get_compat_smt_threads(cpu);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    uint32_t servers_prop[smt_threads];
+    int i;
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
+    uint32_t cpufreq = 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    const uint8_t pa_features[] = { 24, 0,
+                                    0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
+                                    0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+                                    0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+    int offset;
+    char *nodename;
+    int cpus_offset = get_cpus_node(fdt);
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
+    offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+    _FDT(offset);
+    g_free(nodename);
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                            env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                               pcc->l1_dcache_size)));
+    } else {
+        error_report("Warning: Unknown L1 dcache size for cpu");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                               pcc->l1_icache_size)));
+    } else {
+        error_report("Warning: Unknown L1 icache size for cpu");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
+                       pa_features, sizeof(pa_features))));
+
+    if (cpu->cpu_version) {
+        _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", cpu->cpu_version)));
+    }
+
+    /* Build interrupt servers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(pc->pir + i);
+    }
+    _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                       servers_prop, sizeof(servers_prop))));
+}
+
+static void powernv_populate_chip(PnvChip *chip, void *fdt)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    char *typename = pnv_core_typename(pcc->cpu_model);
+    size_t typesize = object_type_get_instance_size(typename);
+    int i;
+
+    pnv_xscom_populate(chip, fdt, 0);
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
+
+        powernv_create_core_node(chip, pnv_core, fdt);
+    }
+
+    if (chip->ram_size) {
+        powernv_populate_memory_node(fdt, chip->chip_id, chip->ram_start,
+                                     chip->ram_size);
+    }
+    g_free(typename);
+}
+
+static void *powernv_create_fdt(MachineState *machine)
+{
+    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+    PnvMachineState *pnv = POWERNV_MACHINE(machine);
+    void *fdt;
+    char *buf;
+    int off;
+    int i;
+
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
+
+    /* Root node */
+    _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
+    _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
+    _FDT((fdt_setprop_string(fdt, 0, "model",
+                             "IBM PowerNV (emulated by qemu)")));
+    _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
+                      sizeof(plat_compat))));
+
+    buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
+    _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
+    if (qemu_uuid_set) {
+        _FDT((fdt_property_string(fdt, "system-id", buf)));
+    }
+    g_free(buf);
+
+    off = fdt_add_subnode(fdt, 0, "chosen");
+    if (machine->kernel_cmdline) {
+        _FDT((fdt_setprop_string(fdt, off, "bootargs",
+                                 machine->kernel_cmdline)));
+    }
+
+    if (pnv->initrd_size) {
+        uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
+        uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
+
+        _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
+                               &start_prop, sizeof(start_prop))));
+        _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
+                               &end_prop, sizeof(end_prop))));
+    }
+
+    /* Populate device tree for each chip */
+    for (i = 0; i < pnv->num_chips; i++) {
+        powernv_populate_chip(pnv->chips[i], fdt);
+    }
+    return fdt;
+}
+
+static void ppc_powernv_reset(void)
+{
+    MachineState *machine = MACHINE(qdev_get_machine());
+    void *fdt;
+
+    qemu_devices_reset();
+
+    fdt = powernv_create_fdt(machine);
+
+    /* Pack resulting tree */
+    _FDT((fdt_pack(fdt)));
+
+    cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
+}
+
+/* If we don't use the built-in LPC interrupt deserializer, we need
+ * to provide a set of qirqs for the ISA bus or things will go bad.
+ *
+ * Most machines using pre-Naples chips (without said deserializer)
+ * have a CPLD that will collect the SerIRQ and shoot them as a
+ * single level interrupt to the P8 chip. So let's setup a hook
+ * for doing just that.
+ *
+ * Note: The actual interrupt input isn't emulated yet, this will
+ * come with the PSI bridge model.
+ */
+static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level)
+{
+    /* We don't yet emulate the PSI bridge which provides the external
+     * interrupt, so just drop interrupts on the floor
+     */
+}
+
+static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
+{
+     /* XXX TODO */
+}
+
+static ISABus *pnv_isa_create(PnvChip *chip)
+{
+    PnvLpcController *lpc = &chip->lpc;
+    ISABus *isa_bus;
+    qemu_irq *irqs;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    /* let isa_bus_new() create its own bridge on SysBus otherwise
+     * devices speficied on the command line won't find the bus and
+     * will fail to create.
+     */
+    isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io,
+                          &error_fatal);
+
+    /* Not all variants have a working serial irq decoder. If not,
+     * handling of LPC interrupts becomes a platform issue (some
+     * platforms have a CPLD to do it).
+     */
+    if (pcc->chip_type == PNV_CHIP_POWER8NVL) {
+        irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler, chip, ISA_NUM_IRQS);
+    } else {
+        irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, chip,
+                                  ISA_NUM_IRQS);
+    }
+
+    isa_bus_irqs(isa_bus, irqs);
+    return isa_bus;
+}
+
+static void ppc_powernv_init(MachineState *machine)
+{
+    PnvMachineState *pnv = POWERNV_MACHINE(machine);
+    MemoryRegion *ram;
+    char *fw_filename;
+    long fw_size;
+    int i;
+    char *chip_typename;
+
+    /* allocate RAM */
+    if (machine->ram_size < (1 * G_BYTE)) {
+        error_report("Warning: skiboot may not work with < 1GB of RAM");
+    }
+
+    ram = g_new(MemoryRegion, 1);
+    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram",
+                                         machine->ram_size);
+    memory_region_add_subregion(get_system_memory(), 0, ram);
+
+    /* load skiboot firmware  */
+    if (bios_name == NULL) {
+        bios_name = FW_FILE_NAME;
+    }
+
+    fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+
+    fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
+    if (fw_size < 0) {
+        hw_error("qemu: could not load OPAL '%s'\n", fw_filename);
+        exit(1);
+    }
+    g_free(fw_filename);
+
+    /* load kernel */
+    if (machine->kernel_filename) {
+        long kernel_size;
+
+        kernel_size = load_image_targphys(machine->kernel_filename,
+                                          KERNEL_LOAD_ADDR, 0x2000000);
+        if (kernel_size < 0) {
+            hw_error("qemu: could not load kernel'%s'\n",
+                     machine->kernel_filename);
+            exit(1);
+        }
+    }
+
+    /* load initrd */
+    if (machine->initrd_filename) {
+        pnv->initrd_base = INITRD_LOAD_ADDR;
+        pnv->initrd_size = load_image_targphys(machine->initrd_filename,
+                                  pnv->initrd_base, 0x10000000); /* 128MB max */
+        if (pnv->initrd_size < 0) {
+            error_report("qemu: could not load initial ram disk '%s'",
+                         machine->initrd_filename);
+            exit(1);
+        }
+    }
+
+    /* We need some cpu model to instantiate the PnvChip class */
+    if (machine->cpu_model == NULL) {
+        machine->cpu_model = "POWER8";
+    }
+
+    /* Create the processor chips */
+    chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model);
+    if (!object_class_by_name(chip_typename)) {
+        error_report("qemu: invalid CPU model '%s' for %s machine",
+                     machine->cpu_model, MACHINE_GET_CLASS(machine)->name);
+        exit(1);
+    }
+
+    pnv->chips = g_new0(PnvChip *, pnv->num_chips);
+    for (i = 0; i < pnv->num_chips; i++) {
+        char chip_name[32];
+        Object *chip = object_new(chip_typename);
+
+        pnv->chips[i] = PNV_CHIP(chip);
+
+        /* TODO: put all the memory in one node on chip 0 until we find a
+         * way to specify different ranges for each chip
+         */
+        if (i == 0) {
+            object_property_set_int(chip, machine->ram_size, "ram-size",
+                                    &error_fatal);
+        }
+
+        snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i));
+        object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
+        object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
+                                &error_fatal);
+        object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
+        object_property_set_bool(chip, true, "realized", &error_fatal);
+    }
+    g_free(chip_typename);
+
+    /* Instantiate ISA bus on chip 0 */
+    pnv->isa_bus = pnv_isa_create(pnv->chips[0]);
+
+    /* Create serial port */
+    serial_hds_isa_init(pnv->isa_bus, 0, MAX_SERIAL_PORTS);
+
+    /* Create an RTC ISA device too */
+    rtc_init(pnv->isa_bus, 2000, NULL);
+}
+
+/*
+ *    0:21  Reserved - Read as zeros
+ *   22:24  Chip ID
+ *   25:28  Core number
+ *   29:31  Thread ID
+ */
+static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 7) | (core_id << 3);
+}
+
+/*
+ *    0:48  Reserved - Read as zeroes
+ *   49:52  Node ID
+ *   53:55  Chip ID
+ *   56     Reserved - Read as zero
+ *   57:61  Core number
+ *   62:63  Thread ID
+ *
+ * We only care about the lower bits. uint32_t is fine for the moment.
+ */
+static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 8) | (core_id << 2);
+}
+
+/* Allowed core identifiers on a POWER8 Processor Chip :
+ *
+ * <EX0 reserved>
+ *  EX1  - Venice only
+ *  EX2  - Venice only
+ *  EX3  - Venice only
+ *  EX4
+ *  EX5
+ *  EX6
+ * <EX7,8 reserved> <reserved>
+ *  EX9  - Venice only
+ *  EX10 - Venice only
+ *  EX11 - Venice only
+ *  EX12
+ *  EX13
+ *  EX14
+ * <EX15 reserved>
+ */
+#define POWER8E_CORE_MASK  (0x7070ull)
+#define POWER8_CORE_MASK   (0x7e7eull)
+
+/*
+ * POWER9 has 24 cores, ids starting at 0x20
+ */
+#define POWER9_CORE_MASK   (0xffffff00000000ull)
+
+static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER8E";
+    k->chip_type = PNV_CHIP_POWER8E;
+    k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
+    k->cores_mask = POWER8E_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->xscom_base = 0x003fc0000000000ull;
+    dc->desc = "PowerNV Chip POWER8E";
+}
+
+static const TypeInfo pnv_chip_power8e_info = {
+    .name          = TYPE_PNV_CHIP_POWER8E,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power8e_class_init,
+};
+
+static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER8";
+    k->chip_type = PNV_CHIP_POWER8;
+    k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
+    k->cores_mask = POWER8_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->xscom_base = 0x003fc0000000000ull;
+    dc->desc = "PowerNV Chip POWER8";
+}
+
+static const TypeInfo pnv_chip_power8_info = {
+    .name          = TYPE_PNV_CHIP_POWER8,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power8_class_init,
+};
+
+static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER8NVL";
+    k->chip_type = PNV_CHIP_POWER8NVL;
+    k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
+    k->cores_mask = POWER8_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->xscom_base = 0x003fc0000000000ull;
+    dc->desc = "PowerNV Chip POWER8NVL";
+}
+
+static const TypeInfo pnv_chip_power8nvl_info = {
+    .name          = TYPE_PNV_CHIP_POWER8NVL,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power8nvl_class_init,
+};
+
+static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER9";
+    k->chip_type = PNV_CHIP_POWER9;
+    k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */
+    k->cores_mask = POWER9_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p9;
+    k->xscom_base = 0x00603fc00000000ull;
+    dc->desc = "PowerNV Chip POWER9";
+}
+
+static const TypeInfo pnv_chip_power9_info = {
+    .name          = TYPE_PNV_CHIP_POWER9,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power9_class_init,
+};
+
+static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    int cores_max;
+
+    /*
+     * No custom mask for this chip, let's use the default one from *
+     * the chip class
+     */
+    if (!chip->cores_mask) {
+        chip->cores_mask = pcc->cores_mask;
+    }
+
+    /* filter alien core ids ! some are reserved */
+    if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
+        error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
+                   chip->cores_mask);
+        return;
+    }
+    chip->cores_mask &= pcc->cores_mask;
+
+    /* now that we have a sane layout, let check the number of cores */
+    cores_max = hweight_long(chip->cores_mask);
+    if (chip->nr_cores > cores_max) {
+        error_setg(errp, "warning: too many cores for chip ! Limit is %d",
+                   cores_max);
+        return;
+    }
+}
+
+static void pnv_chip_init(Object *obj)
+{
+    PnvChip *chip = PNV_CHIP(obj);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    chip->xscom_base = pcc->xscom_base;
+
+    object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC);
+    object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL);
+}
+
+static void pnv_chip_realize(DeviceState *dev, Error **errp)
+{
+    PnvChip *chip = PNV_CHIP(dev);
+    Error *error = NULL;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    char *typename = pnv_core_typename(pcc->cpu_model);
+    size_t typesize = object_type_get_instance_size(typename);
+    int i, core_hwid;
+
+    if (!object_class_by_name(typename)) {
+        error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
+        return;
+    }
+
+    /* XSCOM bridge */
+    pnv_xscom_realize(chip, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
+
+    /* Cores */
+    pnv_chip_core_sanitize(chip, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+
+    chip->cores = g_malloc0(typesize * chip->nr_cores);
+
+    for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
+             && (i < chip->nr_cores); core_hwid++) {
+        char core_name[32];
+        void *pnv_core = chip->cores + i * typesize;
+
+        if (!(chip->cores_mask & (1ull << core_hwid))) {
+            continue;
+        }
+
+        object_initialize(pnv_core, typesize, typename);
+        snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
+        object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core),
+                                  &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads",
+                                &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), core_hwid,
+                                CPU_CORE_PROP_CORE_ID, &error_fatal);
+        object_property_set_int(OBJECT(pnv_core),
+                                pcc->core_pir(chip, core_hwid),
+                                "pir", &error_fatal);
+        object_property_set_bool(OBJECT(pnv_core), true, "realized",
+                                 &error_fatal);
+        object_unref(OBJECT(pnv_core));
+
+        /* Each core has an XSCOM MMIO region */
+        pnv_xscom_add_subregion(chip, PNV_XSCOM_EX_CORE_BASE(core_hwid),
+                                &PNV_CORE(pnv_core)->xscom_regs);
+        i++;
+    }
+    g_free(typename);
+
+    /* Create LPC controller */
+    object_property_set_bool(OBJECT(&chip->lpc), true, "realized",
+                             &error_fatal);
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs);
+}
+
+static Property pnv_chip_properties[] = {
+    DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
+    DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
+    DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
+    DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
+    DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_chip_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_chip_realize;
+    dc->props = pnv_chip_properties;
+    dc->desc = "PowerNV Chip";
+}
+
+static const TypeInfo pnv_chip_info = {
+    .name          = TYPE_PNV_CHIP,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .class_init    = pnv_chip_class_init,
+    .instance_init = pnv_chip_init,
+    .class_size    = sizeof(PnvChipClass),
+    .abstract      = true,
+};
+
+static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp);
+}
+
+static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    PnvMachineState *pnv = POWERNV_MACHINE(obj);
+    uint32_t num_chips;
+    Error *local_err = NULL;
+
+    visit_type_uint32(v, name, &num_chips, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /*
+     * TODO: should we decide on how many chips we can create based
+     * on #cores and Venice vs. Murano vs. Naples chip type etc...,
+     */
+    if (!is_power_of_2(num_chips) || num_chips > 4) {
+        error_setg(errp, "invalid number of chips: '%d'", num_chips);
+        return;
+    }
+
+    pnv->num_chips = num_chips;
+}
+
+static void powernv_machine_initfn(Object *obj)
+{
+    PnvMachineState *pnv = POWERNV_MACHINE(obj);
+    pnv->num_chips = 1;
+}
+
+static void powernv_machine_class_props_init(ObjectClass *oc)
+{
+    object_class_property_add(oc, "num-chips", "uint32_t",
+                              pnv_get_num_chips, pnv_set_num_chips,
+                              NULL, NULL, NULL);
+    object_class_property_set_description(oc, "num-chips",
+                              "Specifies the number of processor chips",
+                              NULL);
+}
+
+static void powernv_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "IBM PowerNV (Non-Virtualized)";
+    mc->init = ppc_powernv_init;
+    mc->reset = ppc_powernv_reset;
+    mc->max_cpus = MAX_CPUS;
+    mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for
+                                      * storage */
+    mc->no_parallel = 1;
+    mc->default_boot_order = NULL;
+    mc->default_ram_size = 1 * G_BYTE;
+
+    powernv_machine_class_props_init(oc);
+}
+
+static const TypeInfo powernv_machine_info = {
+    .name          = TYPE_POWERNV_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .instance_size = sizeof(PnvMachineState),
+    .instance_init = powernv_machine_initfn,
+    .class_init    = powernv_machine_class_init,
+};
+
+static void powernv_machine_register_types(void)
+{
+    type_register_static(&powernv_machine_info);
+    type_register_static(&pnv_chip_info);
+    type_register_static(&pnv_chip_power8e_info);
+    type_register_static(&pnv_chip_power8_info);
+    type_register_static(&pnv_chip_power8nvl_info);
+    type_register_static(&pnv_chip_power9_info);
+}
+
+type_init(powernv_machine_register_types)
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
new file mode 100644
index 0000000000..2acda9637d
--- /dev/null
+++ b/hw/ppc/pnv_core.c
@@ -0,0 +1,232 @@
+/*
+ * QEMU PowerPC PowerNV CPU Core model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "target-ppc/cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+
+static void powernv_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_reset(cs);
+
+    /*
+     * the skiboot firmware elects a primary thread to initialize the
+     * system and it can be any.
+     */
+    env->gpr[3] = PNV_FDT_ADDR;
+    env->nip = 0x10;
+    env->msr |= MSR_HVB; /* Hypervisor mode */
+}
+
+static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
+{
+    CPUPPCState *env = &cpu->env;
+    int core_pir;
+    int thread_index = 0; /* TODO: TCG supports only one thread */
+    ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
+
+    core_pir = object_property_get_int(OBJECT(cpu), "core-pir", &error_abort);
+
+    /*
+     * The PIR of a thread is the core PIR + the thread index. We will
+     * need to find a way to get the thread index when TCG supports
+     * more than 1. We could use the object name ?
+     */
+    pir->default_value = core_pir + thread_index;
+
+    /* Set time-base frequency to 512 MHz */
+    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
+
+    qemu_register_reset(powernv_cpu_reset, cpu);
+}
+
+/*
+ * These values are read by the PowerNV HW monitors under Linux
+ */
+#define PNV_XSCOM_EX_DTS_RESULT0     0x50000
+#define PNV_XSCOM_EX_DTS_RESULT1     0x50001
+
+static uint64_t pnv_core_xscom_read(void *opaque, hwaddr addr,
+                                    unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    /* The result should be 38 C */
+    switch (offset) {
+    case PNV_XSCOM_EX_DTS_RESULT0:
+        val = 0x26f024f023f0000ull;
+        break;
+    case PNV_XSCOM_EX_DTS_RESULT1:
+        val = 0x24f000000000000ull;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx,
+                  addr);
+    }
+
+    return val;
+}
+
+static void pnv_core_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                                 unsigned int width)
+{
+    qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx,
+                  addr);
+}
+
+static const MemoryRegionOps pnv_core_xscom_ops = {
+    .read = pnv_core_xscom_read,
+    .write = pnv_core_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_core_realize_child(Object *child, Error **errp)
+{
+    Error *local_err = NULL;
+    CPUState *cs = CPU(child);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    object_property_set_bool(child, true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    powernv_cpu_init(cpu, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static void pnv_core_realize(DeviceState *dev, Error **errp)
+{
+    PnvCore *pc = PNV_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(OBJECT(dev));
+    PnvCoreClass *pcc = PNV_CORE_GET_CLASS(OBJECT(dev));
+    const char *typename = object_class_get_name(pcc->cpu_oc);
+    size_t size = object_type_get_instance_size(typename);
+    Error *local_err = NULL;
+    void *obj;
+    int i, j;
+    char name[32];
+
+    pc->threads = g_malloc0(size * cc->nr_threads);
+    for (i = 0; i < cc->nr_threads; i++) {
+        obj = pc->threads + i * size;
+
+        object_initialize(obj, size, typename);
+
+        snprintf(name, sizeof(name), "thread[%d]", i);
+        object_property_add_child(OBJECT(pc), name, obj, &local_err);
+        object_property_add_alias(obj, "core-pir", OBJECT(pc),
+                                  "pir", &local_err);
+        if (local_err) {
+            goto err;
+        }
+        object_unref(obj);
+    }
+
+    for (j = 0; j < cc->nr_threads; j++) {
+        obj = pc->threads + j * size;
+
+        pnv_core_realize_child(obj, &local_err);
+        if (local_err) {
+            goto err;
+        }
+    }
+
+    snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id);
+    pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), &pnv_core_xscom_ops,
+                          pc, name, PNV_XSCOM_EX_CORE_SIZE);
+    return;
+
+err:
+    while (--i >= 0) {
+        obj = pc->threads + i * size;
+        object_unparent(obj);
+    }
+    g_free(pc->threads);
+    error_propagate(errp, local_err);
+}
+
+static Property pnv_core_properties[] = {
+    DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_core_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+    dc->realize = pnv_core_realize;
+    dc->props = pnv_core_properties;
+    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
+}
+
+static const TypeInfo pnv_core_info = {
+    .name           = TYPE_PNV_CORE,
+    .parent         = TYPE_CPU_CORE,
+    .instance_size  = sizeof(PnvCore),
+    .class_size     = sizeof(PnvCoreClass),
+    .abstract       = true,
+};
+
+static const char *pnv_core_models[] = {
+    "POWER8E", "POWER8", "POWER8NVL", "POWER9"
+};
+
+static void pnv_core_register_types(void)
+{
+    int i ;
+
+    type_register_static(&pnv_core_info);
+    for (i = 0; i < ARRAY_SIZE(pnv_core_models); ++i) {
+        TypeInfo ti = {
+            .parent = TYPE_PNV_CORE,
+            .instance_size = sizeof(PnvCore),
+            .class_init = pnv_core_class_init,
+            .class_data = (void *) pnv_core_models[i],
+        };
+        ti.name = pnv_core_typename(pnv_core_models[i]);
+        type_register(&ti);
+        g_free((void *)ti.name);
+    }
+}
+
+type_init(pnv_core_register_types)
+
+char *pnv_core_typename(const char *model)
+{
+    return g_strdup_printf(TYPE_PNV_CORE "-%s", model);
+}
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
new file mode 100644
index 0000000000..00dbd8b07b
--- /dev/null
+++ b/hw/ppc/pnv_lpc.c
@@ -0,0 +1,471 @@
+/*
+ * QEMU PowerPC PowerNV LPC controller
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
+#include "target-ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/fdt.h"
+
+#include <libfdt.h>
+
+enum {
+    ECCB_CTL    = 0,
+    ECCB_RESET  = 1,
+    ECCB_STAT   = 2,
+    ECCB_DATA   = 3,
+};
+
+/* OPB Master LS registers */
+#define OPB_MASTER_LS_IRQ_STAT  0x50
+#define   OPB_MASTER_IRQ_LPC            0x00000800
+#define OPB_MASTER_LS_IRQ_MASK  0x54
+#define OPB_MASTER_LS_IRQ_POL   0x58
+#define OPB_MASTER_LS_IRQ_INPUT 0x5c
+
+/* LPC HC registers */
+#define LPC_HC_FW_SEG_IDSEL     0x24
+#define LPC_HC_FW_RD_ACC_SIZE   0x28
+#define   LPC_HC_FW_RD_1B               0x00000000
+#define   LPC_HC_FW_RD_2B               0x01000000
+#define   LPC_HC_FW_RD_4B               0x02000000
+#define   LPC_HC_FW_RD_16B              0x04000000
+#define   LPC_HC_FW_RD_128B             0x07000000
+#define LPC_HC_IRQSER_CTRL      0x30
+#define   LPC_HC_IRQSER_EN              0x80000000
+#define   LPC_HC_IRQSER_QMODE           0x40000000
+#define   LPC_HC_IRQSER_START_MASK      0x03000000
+#define   LPC_HC_IRQSER_START_4CLK      0x00000000
+#define   LPC_HC_IRQSER_START_6CLK      0x01000000
+#define   LPC_HC_IRQSER_START_8CLK      0x02000000
+#define LPC_HC_IRQMASK          0x34    /* same bit defs as LPC_HC_IRQSTAT */
+#define LPC_HC_IRQSTAT          0x38
+#define   LPC_HC_IRQ_SERIRQ0            0x80000000 /* all bits down to ... */
+#define   LPC_HC_IRQ_SERIRQ16           0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */
+#define   LPC_HC_IRQ_SERIRQ_ALL         0xffff8000
+#define   LPC_HC_IRQ_LRESET             0x00000400
+#define   LPC_HC_IRQ_SYNC_ABNORM_ERR    0x00000080
+#define   LPC_HC_IRQ_SYNC_NORESP_ERR    0x00000040
+#define   LPC_HC_IRQ_SYNC_NORM_ERR      0x00000020
+#define   LPC_HC_IRQ_SYNC_TIMEOUT_ERR   0x00000010
+#define   LPC_HC_IRQ_SYNC_TARG_TAR_ERR  0x00000008
+#define   LPC_HC_IRQ_SYNC_BM_TAR_ERR    0x00000004
+#define   LPC_HC_IRQ_SYNC_BM0_REQ       0x00000002
+#define   LPC_HC_IRQ_SYNC_BM1_REQ       0x00000001
+#define LPC_HC_ERROR_ADDRESS    0x40
+
+#define LPC_OPB_SIZE            0x100000000ull
+
+#define ISA_IO_SIZE             0x00010000
+#define ISA_MEM_SIZE            0x10000000
+#define LPC_IO_OPB_ADDR         0xd0010000
+#define LPC_IO_OPB_SIZE         0x00010000
+#define LPC_MEM_OPB_ADDR        0xe0010000
+#define LPC_MEM_OPB_SIZE        0x10000000
+#define LPC_FW_OPB_ADDR         0xf0000000
+#define LPC_FW_OPB_SIZE         0x10000000
+
+#define LPC_OPB_REGS_OPB_ADDR   0xc0010000
+#define LPC_OPB_REGS_OPB_SIZE   0x00002000
+#define LPC_HC_REGS_OPB_ADDR    0xc0012000
+#define LPC_HC_REGS_OPB_SIZE    0x00001000
+
+
+/*
+ * TODO: the "primary" cell should only be added on chip 0. This is
+ * how skiboot chooses the default LPC controller on multichip
+ * systems.
+ *
+ * It would be easly done if we can change the populate() interface to
+ * replace the PnvXScomInterface parameter by a PnvChip one
+ */
+static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+    const char compat[] = "ibm,power8-lpc\0ibm,lpc";
+    char *name;
+    int offset;
+    uint32_t lpc_pcba = PNV_XSCOM_LPC_BASE;
+    uint32_t reg[] = {
+        cpu_to_be32(lpc_pcba),
+        cpu_to_be32(PNV_XSCOM_LPC_SIZE)
+    };
+
+    name = g_strdup_printf("isa@%x", lpc_pcba);
+    offset = fdt_add_subnode(fdt, xscom_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, offset, "primary", NULL, 0)));
+    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+    return 0;
+}
+
+/*
+ * These read/write handlers of the OPB address space should be common
+ * with the P9 LPC Controller which uses direct MMIOs.
+ *
+ * TODO: rework to use address_space_stq() and address_space_ldq()
+ * instead.
+ */
+static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+                     int sz)
+{
+    bool success;
+
+    /* XXX Handle access size limits and FW read caching here */
+    success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz, false);
+
+    return success;
+}
+
+static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+                      int sz)
+{
+    bool success;
+
+    /* XXX Handle access size limits here */
+    success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz, true);
+
+    return success;
+}
+
+#define ECCB_CTL_READ           (1ull << (63 - 15))
+#define ECCB_CTL_SZ_LSH         (63 - 7)
+#define ECCB_CTL_SZ_MASK        (0xfull << ECCB_CTL_SZ_LSH)
+#define ECCB_CTL_ADDR_MASK      0xffffffffu;
+
+#define ECCB_STAT_OP_DONE       (1ull << (63 - 52))
+#define ECCB_STAT_OP_ERR        (1ull << (63 - 52))
+#define ECCB_STAT_RD_DATA_LSH   (63 - 37)
+#define ECCB_STAT_RD_DATA_MASK  (0xffffffff << ECCB_STAT_RD_DATA_LSH)
+
+static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd)
+{
+    /* XXX Check for magic bits at the top, addr size etc... */
+    unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH;
+    uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK;
+    uint8_t data[4];
+    bool success;
+
+    if (cmd & ECCB_CTL_READ) {
+        success = opb_read(lpc, opb_addr, data, sz);
+        if (success) {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (((uint64_t)data[0]) << 24 |
+                     ((uint64_t)data[1]) << 16 |
+                     ((uint64_t)data[2]) <<  8 |
+                     ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH;
+        } else {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (0xffffffffull << ECCB_STAT_RD_DATA_LSH);
+        }
+    } else {
+        data[0] = lpc->eccb_data_reg >> 24;
+        data[1] = lpc->eccb_data_reg >> 16;
+        data[2] = lpc->eccb_data_reg >>  8;
+        data[3] = lpc->eccb_data_reg;
+
+        success = opb_write(lpc, opb_addr, data, sz);
+        lpc->eccb_stat_reg = ECCB_STAT_OP_DONE;
+    }
+    /* XXX Which error bit (if any) to signal OPB error ? */
+}
+
+static uint64_t pnv_lpc_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    switch (offset & 3) {
+    case ECCB_CTL:
+    case ECCB_RESET:
+        val = 0;
+        break;
+    case ECCB_STAT:
+        val = lpc->eccb_stat_reg;
+        lpc->eccb_stat_reg = 0;
+        break;
+    case ECCB_DATA:
+        val = ((uint64_t)lpc->eccb_data_reg) << 32;
+        break;
+    }
+    return val;
+}
+
+static void pnv_lpc_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t offset = addr >> 3;
+
+    switch (offset & 3) {
+    case ECCB_CTL:
+        pnv_lpc_do_eccb(lpc, val);
+        break;
+    case ECCB_RESET:
+        /*  XXXX  */
+        break;
+    case ECCB_STAT:
+        break;
+    case ECCB_DATA:
+        lpc->eccb_data_reg = val >> 32;
+        break;
+    }
+}
+
+static const MemoryRegionOps pnv_lpc_xscom_ops = {
+    .read = pnv_lpc_xscom_read,
+    .write = pnv_lpc_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+    uint64_t val = 0xfffffffffffffffful;
+
+    switch (addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        val =  lpc->lpc_hc_fw_seg_idsel;
+        break;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        val =  lpc->lpc_hc_fw_rd_acc_size;
+        break;
+    case LPC_HC_IRQSER_CTRL:
+        val =  lpc->lpc_hc_irqser_ctrl;
+        break;
+    case LPC_HC_IRQMASK:
+        val =  lpc->lpc_hc_irqmask;
+        break;
+    case LPC_HC_IRQSTAT:
+        val =  lpc->lpc_hc_irqstat;
+        break;
+    case LPC_HC_ERROR_ADDRESS:
+        val =  lpc->lpc_hc_error_addr;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+    return val;
+}
+
+static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
+                         unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    /* XXX Filter out reserved bits */
+
+    switch (addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        /* XXX Actually figure out how that works as this impact
+         * memory regions/aliases
+         */
+        lpc->lpc_hc_fw_seg_idsel = val;
+        break;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        lpc->lpc_hc_fw_rd_acc_size = val;
+        break;
+    case LPC_HC_IRQSER_CTRL:
+        lpc->lpc_hc_irqser_ctrl = val;
+        break;
+    case LPC_HC_IRQMASK:
+        lpc->lpc_hc_irqmask = val;
+        break;
+    case LPC_HC_IRQSTAT:
+        lpc->lpc_hc_irqstat &= ~val;
+        break;
+    case LPC_HC_ERROR_ADDRESS:
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps lpc_hc_ops = {
+    .read = lpc_hc_read,
+    .write = lpc_hc_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+    uint64_t val = 0xfffffffffffffffful;
+
+    switch (addr) {
+    case OPB_MASTER_LS_IRQ_STAT:
+        val = lpc->opb_irq_stat;
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        val = lpc->opb_irq_mask;
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        val = lpc->opb_irq_pol;
+        break;
+    case OPB_MASTER_LS_IRQ_INPUT:
+        val = lpc->opb_irq_input;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OPB MASTER Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+
+    return val;
+}
+
+static void opb_master_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    switch (addr) {
+    case OPB_MASTER_LS_IRQ_STAT:
+        lpc->opb_irq_stat &= ~val;
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        /* XXX Filter out reserved bits */
+        lpc->opb_irq_mask = val;
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        /* XXX Filter out reserved bits */
+        lpc->opb_irq_pol = val;
+        break;
+    case OPB_MASTER_LS_IRQ_INPUT:
+        /* Read only */
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OPB MASTER Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps opb_master_ops = {
+    .read = opb_master_read,
+    .write = opb_master_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void pnv_lpc_realize(DeviceState *dev, Error **errp)
+{
+    PnvLpcController *lpc = PNV_LPC(dev);
+
+    /* Reg inits */
+    lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B;
+
+    /* Create address space and backing MR for the OPB bus */
+    memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull);
+    address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb");
+
+    /* Create ISA IO and Mem space regions which are the root of
+     * the ISA bus (ie, ISA address spaces). We don't create a
+     * separate one for FW which we alias to memory.
+     */
+    memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE);
+    memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE);
+
+    /* Create windows from the OPB space to the ISA space */
+    memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io",
+                             &lpc->isa_io, 0, LPC_IO_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR,
+                                &lpc->opb_isa_io);
+    memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem",
+                             &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR,
+                                &lpc->opb_isa_mem);
+    memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw",
+                             &lpc->isa_mem, 0, LPC_FW_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR,
+                                &lpc->opb_isa_fw);
+
+    /* Create MMIO regions for LPC HC and OPB registers */
+    memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops,
+                          lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR,
+                                &lpc->opb_master_regs);
+    memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc,
+                          "lpc-hc", LPC_HC_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR,
+                                &lpc->lpc_hc_regs);
+
+    /* XScom region for LPC registers */
+    pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(dev),
+                          &pnv_lpc_xscom_ops, lpc, "xscom-lpc",
+                          PNV_XSCOM_LPC_SIZE);
+}
+
+static void pnv_lpc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+    xdc->populate = pnv_lpc_populate;
+
+    dc->realize = pnv_lpc_realize;
+}
+
+static const TypeInfo pnv_lpc_info = {
+    .name          = TYPE_PNV_LPC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvLpcController),
+    .class_init    = pnv_lpc_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_lpc_register_types(void)
+{
+    type_register_static(&pnv_lpc_info);
+}
+
+type_init(pnv_lpc_register_types)
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
new file mode 100644
index 0000000000..5aaa264bd7
--- /dev/null
+++ b/hw/ppc/pnv_xscom.c
@@ -0,0 +1,275 @@
+/*
+ * QEMU PowerPC PowerNV XSCOM bus
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "qemu/log.h"
+#include "sysemu/kvm.h"
+#include "target-ppc/cpu.h"
+#include "hw/sysbus.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
+{
+    /*
+     * TODO: When the read/write comes from the monitor, NULL is
+     * passed for the cpu, and no CPU completion is generated.
+     */
+    if (cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+
+        /*
+         * TODO: Need a CPU helper to set HMER, also handle generation
+         * of HMIs
+         */
+        cpu_synchronize_state(cs);
+        env->spr[SPR_HMER] |= hmer_bits;
+    }
+}
+
+static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    addr &= (PNV_XSCOM_SIZE - 1);
+    if (pcc->chip_type == PNV_CHIP_POWER9) {
+        return addr >> 3;
+    } else {
+        return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
+    }
+}
+
+static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
+{
+    switch (pcba) {
+    case 0xf000f:
+        return PNV_CHIP_GET_CLASS(chip)->chip_cfam_id;
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x2020007:     /* ADU stuff */
+    case 0x2020009:     /* ADU stuff */
+    case 0x202000f:     /* ADU stuff */
+        return 0;
+    case 0x2013f00:     /* PBA stuff */
+    case 0x2013f01:     /* PBA stuff */
+    case 0x2013f02:     /* PBA stuff */
+    case 0x2013f03:     /* PBA stuff */
+    case 0x2013f04:     /* PBA stuff */
+    case 0x2013f05:     /* PBA stuff */
+    case 0x2013f06:     /* PBA stuff */
+    case 0x2013f07:     /* PBA stuff */
+        return 0;
+    case 0x2013028:     /* CAPP stuff */
+    case 0x201302a:     /* CAPP stuff */
+    case 0x2013801:     /* CAPP stuff */
+    case 0x2013802:     /* CAPP stuff */
+        return 0;
+    default:
+        return -1;
+    }
+}
+
+static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
+{
+    /* We ignore writes to these */
+    switch (pcba) {
+    case 0xf000f:       /* chip id is RO */
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c01:     /* PIBAM FIR */
+    case 0x1010c02:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x1010c04:     /* PIBAM FIR MASK */
+    case 0x1010c05:     /* PIBAM FIR MASK */
+    case 0x2020007:     /* ADU stuff */
+    case 0x2020009:     /* ADU stuff */
+    case 0x202000f:     /* ADU stuff */
+        return true;
+    default:
+        return false;
+    }
+}
+
+static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
+{
+    PnvChip *chip = opaque;
+    uint32_t pcba = pnv_xscom_pcba(chip, addr);
+    uint64_t val = 0;
+    MemTxResult result;
+
+    /* Handle some SCOMs here before dispatch */
+    val = xscom_read_default(chip, pcba);
+    if (val != -1) {
+        goto complete;
+    }
+
+    val = address_space_ldq(&chip->xscom_as, pcba << 3, MEMTXATTRS_UNSPECIFIED,
+                            &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XSCOM read failed at @0x%"
+                      HWADDR_PRIx " pcba=0x%08x\n", addr, pcba);
+        xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+        return 0;
+    }
+
+complete:
+    xscom_complete(current_cpu, HMER_XSCOM_DONE);
+    return val;
+}
+
+static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned width)
+{
+    PnvChip *chip = opaque;
+    uint32_t pcba = pnv_xscom_pcba(chip, addr);
+    MemTxResult result;
+
+    /* Handle some SCOMs here before dispatch */
+    if (xscom_write_default(chip, pcba, val)) {
+        goto complete;
+    }
+
+    address_space_stq(&chip->xscom_as, pcba << 3, val, MEMTXATTRS_UNSPECIFIED,
+                      &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XSCOM write failed at @0x%"
+                      HWADDR_PRIx " pcba=0x%08x data=0x%" PRIx64 "\n",
+                      addr, pcba, val);
+        xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+        return;
+    }
+
+complete:
+    xscom_complete(current_cpu, HMER_XSCOM_DONE);
+}
+
+const MemoryRegionOps pnv_xscom_ops = {
+    .read = xscom_read,
+    .write = xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+void pnv_xscom_realize(PnvChip *chip, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(chip);
+    char *name;
+
+    name = g_strdup_printf("xscom-%x", chip->chip_id);
+    memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops,
+                          chip, name, PNV_XSCOM_SIZE);
+    sysbus_init_mmio(sbd, &chip->xscom_mmio);
+
+    memory_region_init(&chip->xscom, OBJECT(chip), name, PNV_XSCOM_SIZE);
+    address_space_init(&chip->xscom_as, &chip->xscom, name);
+    g_free(name);
+}
+
+static const TypeInfo pnv_xscom_interface_info = {
+    .name = TYPE_PNV_XSCOM_INTERFACE,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(PnvXScomInterfaceClass),
+};
+
+static void pnv_xscom_register_types(void)
+{
+    type_register_static(&pnv_xscom_interface_info);
+}
+
+type_init(pnv_xscom_register_types)
+
+typedef struct ForeachPopulateArgs {
+    void *fdt;
+    int xscom_offset;
+} ForeachPopulateArgs;
+
+static int xscom_populate_child(Object *child, void *opaque)
+{
+    if (object_dynamic_cast(child, TYPE_PNV_XSCOM_INTERFACE)) {
+        ForeachPopulateArgs *args = opaque;
+        PnvXScomInterface *xd = PNV_XSCOM_INTERFACE(child);
+        PnvXScomInterfaceClass *xc = PNV_XSCOM_INTERFACE_GET_CLASS(xd);
+
+        if (xc->populate) {
+            _FDT((xc->populate(xd, args->fdt, args->xscom_offset)));
+        }
+    }
+    return 0;
+}
+
+static const char compat_p8[] = "ibm,power8-xscom\0ibm,xscom";
+static const char compat_p9[] = "ibm,power9-xscom\0ibm,xscom";
+
+int pnv_xscom_populate(PnvChip *chip, void *fdt, int root_offset)
+{
+    uint64_t reg[] = { cpu_to_be64(PNV_XSCOM_BASE(chip)),
+                       cpu_to_be64(PNV_XSCOM_SIZE) };
+    int xscom_offset;
+    ForeachPopulateArgs args;
+    char *name;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0]));
+    xscom_offset = fdt_add_subnode(fdt, root_offset, name);
+    _FDT(xscom_offset);
+    g_free(name);
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg))));
+
+    if (pcc->chip_type == PNV_CHIP_POWER9) {
+        _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p9,
+                          sizeof(compat_p9))));
+    } else {
+        _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p8,
+                          sizeof(compat_p8))));
+    }
+
+    _FDT((fdt_setprop(fdt, xscom_offset, "scom-controller", NULL, 0)));
+
+    args.fdt = fdt;
+    args.xscom_offset = xscom_offset;
+
+    object_child_foreach(OBJECT(chip), xscom_populate_child, &args);
+    return 0;
+}
+
+void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, MemoryRegion *mr)
+{
+    memory_region_add_subregion(&chip->xscom, offset << 3, mr);
+}
+
+void pnv_xscom_region_init(MemoryRegion *mr,
+                           struct Object *owner,
+                           const MemoryRegionOps *ops,
+                           void *opaque,
+                           const char *name,
+                           uint64_t size)
+{
+    memory_region_init_io(mr, owner, ops, opaque, name, size << 3);
+}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 486f57d6f6..c8e29212cb 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -271,205 +271,6 @@ static void add_str(GString *s, const gchar *s1)
     g_string_append_len(s, s1, strlen(s1) + 1);
 }
 
-static void *spapr_create_fdt_skel(hwaddr initrd_base,
-                                   hwaddr initrd_size,
-                                   hwaddr kernel_size,
-                                   bool little_endian,
-                                   const char *kernel_cmdline,
-                                   uint32_t epow_irq)
-{
-    void *fdt;
-    uint32_t start_prop = cpu_to_be32(initrd_base);
-    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-    GString *hypertas = g_string_sized_new(256);
-    GString *qemu_hypertas = g_string_sized_new(256);
-    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
-    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
-    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    char *buf;
-
-    add_str(hypertas, "hcall-pft");
-    add_str(hypertas, "hcall-term");
-    add_str(hypertas, "hcall-dabr");
-    add_str(hypertas, "hcall-interrupt");
-    add_str(hypertas, "hcall-tce");
-    add_str(hypertas, "hcall-vio");
-    add_str(hypertas, "hcall-splpar");
-    add_str(hypertas, "hcall-bulk");
-    add_str(hypertas, "hcall-set-mode");
-    add_str(hypertas, "hcall-sprg0");
-    add_str(hypertas, "hcall-copy");
-    add_str(hypertas, "hcall-debug");
-    add_str(qemu_hypertas, "hcall-memop1");
-
-    fdt = g_malloc0(FDT_MAX_SIZE);
-    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
-
-    if (kernel_size) {
-        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
-    }
-    if (initrd_size) {
-        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
-    }
-    _FDT((fdt_finish_reservemap(fdt)));
-
-    /* Root node */
-    _FDT((fdt_begin_node(fdt, "")));
-    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
-    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
-    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
-
-    /*
-     * Add info to guest to indentify which host is it being run on
-     * and what is the uuid of the guest
-     */
-    if (kvmppc_get_host_model(&buf)) {
-        _FDT((fdt_property_string(fdt, "host-model", buf)));
-        g_free(buf);
-    }
-    if (kvmppc_get_host_serial(&buf)) {
-        _FDT((fdt_property_string(fdt, "host-serial", buf)));
-        g_free(buf);
-    }
-
-    buf = qemu_uuid_unparse_strdup(&qemu_uuid);
-
-    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
-    if (qemu_uuid_set) {
-        _FDT((fdt_property_string(fdt, "system-id", buf)));
-    }
-    g_free(buf);
-
-    if (qemu_get_vm_name()) {
-        _FDT((fdt_property_string(fdt, "ibm,partition-name",
-                                  qemu_get_vm_name())));
-    }
-
-    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
-    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
-
-    /* /chosen */
-    _FDT((fdt_begin_node(fdt, "chosen")));
-
-    /* Set Form1_affinity */
-    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
-
-    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
-    _FDT((fdt_property(fdt, "linux,initrd-start",
-                       &start_prop, sizeof(start_prop))));
-    _FDT((fdt_property(fdt, "linux,initrd-end",
-                       &end_prop, sizeof(end_prop))));
-    if (kernel_size) {
-        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
-                              cpu_to_be64(kernel_size) };
-
-        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
-        if (little_endian) {
-            _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
-        }
-    }
-    if (boot_menu) {
-        _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
-    }
-    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
-    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
-    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* RTAS */
-    _FDT((fdt_begin_node(fdt, "rtas")));
-
-    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
-        add_str(hypertas, "hcall-multi-tce");
-    }
-    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
-                       hypertas->len)));
-    g_string_free(hypertas, TRUE);
-    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
-                       qemu_hypertas->len)));
-    g_string_free(qemu_hypertas, TRUE);
-
-    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
-        refpoints, sizeof(refpoints))));
-
-    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
-    _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
-                            RTAS_EVENT_SCAN_RATE)));
-
-    if (msi_nonbroken) {
-        _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0)));
-    }
-
-    /*
-     * According to PAPR, rtas ibm,os-term does not guarantee a return
-     * back to the guest cpu.
-     *
-     * While an additional ibm,extended-os-term property indicates that
-     * rtas call return will always occur. Set this property.
-     */
-    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* interrupt controller */
-    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
-
-    _FDT((fdt_property_string(fdt, "device_type",
-                              "PowerPC-External-Interrupt-Presentation")));
-    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
-    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
-    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
-                       interrupt_server_ranges_prop,
-                       sizeof(interrupt_server_ranges_prop))));
-    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
-    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
-    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* vdevice */
-    _FDT((fdt_begin_node(fdt, "vdevice")));
-
-    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
-    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
-    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
-    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
-    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
-    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* event-sources */
-    spapr_events_fdt_skel(fdt, epow_irq);
-
-    /* /hypervisor node */
-    if (kvm_enabled()) {
-        uint8_t hypercall[16];
-
-        /* indicate KVM hypercall interface */
-        _FDT((fdt_begin_node(fdt, "hypervisor")));
-        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
-        if (kvmppc_has_cap_fixup_hcalls()) {
-            /*
-             * Older KVM versions with older guest kernels were broken with the
-             * magic page, don't allow the guest to map it.
-             */
-            if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
-                                      sizeof(hypercall))) {
-                _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
-                                   sizeof(hypercall))));
-            }
-        }
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    _FDT((fdt_end_node(fdt))); /* close root node */
-    _FDT((fdt_finish(fdt)));
-
-    return fdt;
-}
-
 static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
                                        hwaddr size)
 {
@@ -854,13 +655,42 @@ out:
     return ret;
 }
 
+static int spapr_dt_cas_updates(sPAPRMachineState *spapr, void *fdt,
+                                sPAPROptionVector *ov5_updates)
+{
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    int ret = 0, offset;
+
+    /* Generate ibm,dynamic-reconfiguration-memory node if required */
+    if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) {
+        g_assert(smc->dr_lmb_enabled);
+        ret = spapr_populate_drconf_memory(spapr, fdt);
+        if (ret) {
+            goto out;
+        }
+    }
+
+    offset = fdt_path_offset(fdt, "/chosen");
+    if (offset < 0) {
+        offset = fdt_add_subnode(fdt, 0, "chosen");
+        if (offset < 0) {
+            return offset;
+        }
+    }
+    ret = spapr_ovec_populate_dt(fdt, offset, spapr->ov5_cas,
+                                 "ibm,architecture-vec-5");
+
+out:
+    return ret;
+}
+
 int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
                                  target_ulong addr, target_ulong size,
-                                 bool cpu_update, bool memory_update)
+                                 bool cpu_update,
+                                 sPAPROptionVector *ov5_updates)
 {
     void *fdt, *fdt_skel;
     sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
 
     size -= sizeof(hdr);
 
@@ -879,9 +709,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
         _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
     }
 
-    /* Generate ibm,dynamic-reconfiguration-memory node if required */
-    if (memory_update && smc->dr_lmb_enabled) {
-        _FDT((spapr_populate_drconf_memory(spapr, fdt)));
+    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
+        return -1;
     }
 
     /* Pack resulting tree */
@@ -900,25 +729,206 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
     return 0;
 }
 
-static void spapr_finalize_fdt(sPAPRMachineState *spapr,
-                               hwaddr fdt_addr,
-                               hwaddr rtas_addr,
-                               hwaddr rtas_size)
+static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
+{
+    int rtas;
+    GString *hypertas = g_string_sized_new(256);
+    GString *qemu_hypertas = g_string_sized_new(256);
+    uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
+    uint64_t max_hotplug_addr = spapr->hotplug_memory.base +
+        memory_region_size(&spapr->hotplug_memory.mr);
+    uint32_t lrdr_capacity[] = {
+        cpu_to_be32(max_hotplug_addr >> 32),
+        cpu_to_be32(max_hotplug_addr & 0xffffffff),
+        0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
+        cpu_to_be32(max_cpus / smp_threads),
+    };
+
+    _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
+
+    /* hypertas */
+    add_str(hypertas, "hcall-pft");
+    add_str(hypertas, "hcall-term");
+    add_str(hypertas, "hcall-dabr");
+    add_str(hypertas, "hcall-interrupt");
+    add_str(hypertas, "hcall-tce");
+    add_str(hypertas, "hcall-vio");
+    add_str(hypertas, "hcall-splpar");
+    add_str(hypertas, "hcall-bulk");
+    add_str(hypertas, "hcall-set-mode");
+    add_str(hypertas, "hcall-sprg0");
+    add_str(hypertas, "hcall-copy");
+    add_str(hypertas, "hcall-debug");
+    add_str(qemu_hypertas, "hcall-memop1");
+
+    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+        add_str(hypertas, "hcall-multi-tce");
+    }
+    _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
+                     hypertas->str, hypertas->len));
+    g_string_free(hypertas, TRUE);
+    _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions",
+                     qemu_hypertas->str, qemu_hypertas->len));
+    g_string_free(qemu_hypertas, TRUE);
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+                     refpoints, sizeof(refpoints)));
+
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
+                          RTAS_ERROR_LOG_MAX));
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
+                          RTAS_EVENT_SCAN_RATE));
+
+    if (msi_nonbroken) {
+        _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0));
+    }
+
+    /*
+     * According to PAPR, rtas ibm,os-term does not guarantee a return
+     * back to the guest cpu.
+     *
+     * While an additional ibm,extended-os-term property indicates
+     * that rtas call return will always occur. Set this property.
+     */
+    _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0));
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
+                     lrdr_capacity, sizeof(lrdr_capacity)));
+
+    spapr_dt_rtas_tokens(fdt, rtas);
+}
+
+static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
+{
+    MachineState *machine = MACHINE(spapr);
+    int chosen;
+    const char *boot_device = machine->boot_order;
+    char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+    size_t cb = 0;
+    char *bootlist = get_boot_devices_list(&cb, true);
+
+    _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
+
+    _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline));
+    _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
+                          spapr->initrd_base));
+    _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
+                          spapr->initrd_base + spapr->initrd_size));
+
+    if (spapr->kernel_size) {
+        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
+                              cpu_to_be64(spapr->kernel_size) };
+
+        _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
+                         &kprop, sizeof(kprop)));
+        if (spapr->kernel_le) {
+            _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
+        }
+    }
+    if (boot_menu) {
+        _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
+    }
+    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
+    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
+    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
+
+    if (cb && bootlist) {
+        int i;
+
+        for (i = 0; i < cb; i++) {
+            if (bootlist[i] == '\n') {
+                bootlist[i] = ' ';
+            }
+        }
+        _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
+    }
+
+    if (boot_device && strlen(boot_device)) {
+        _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
+    }
+
+    if (!spapr->has_graphics && stdout_path) {
+        _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
+    }
+
+    g_free(stdout_path);
+    g_free(bootlist);
+}
+
+static void spapr_dt_hypervisor(sPAPRMachineState *spapr, void *fdt)
+{
+    /* The /hypervisor node isn't in PAPR - this is a hack to allow PR
+     * KVM to work under pHyp with some guest co-operation */
+    int hypervisor;
+    uint8_t hypercall[16];
+
+    _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor"));
+    /* indicate KVM hypercall interface */
+    _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm"));
+    if (kvmppc_has_cap_fixup_hcalls()) {
+        /*
+         * Older KVM versions with older guest kernels were broken
+         * with the magic page, don't allow the guest to map it.
+         */
+        if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+                                  sizeof(hypercall))) {
+            _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions",
+                             hypercall, sizeof(hypercall)));
+        }
+    }
+}
+
+static void *spapr_build_fdt(sPAPRMachineState *spapr,
+                             hwaddr rtas_addr,
+                             hwaddr rtas_size)
 {
     MachineState *machine = MACHINE(qdev_get_machine());
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
-    const char *boot_device = machine->boot_order;
-    int ret, i;
-    size_t cb = 0;
-    char *bootlist;
+    int ret;
     void *fdt;
     sPAPRPHBState *phb;
+    char *buf;
+
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
+
+    /* Root node */
+    _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp"));
+    _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)"));
+    _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries"));
+
+    /*
+     * Add info to guest to indentify which host is it being run on
+     * and what is the uuid of the guest
+     */
+    if (kvmppc_get_host_model(&buf)) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
+        g_free(buf);
+    }
+    if (kvmppc_get_host_serial(&buf)) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
+        g_free(buf);
+    }
+
+    buf = qemu_uuid_unparse_strdup(&qemu_uuid);
+
+    _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf));
+    if (qemu_uuid_set) {
+        _FDT(fdt_setprop_string(fdt, 0, "system-id", buf));
+    }
+    g_free(buf);
+
+    if (qemu_get_vm_name()) {
+        _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name",
+                                qemu_get_vm_name()));
+    }
 
-    fdt = g_malloc(FDT_MAX_SIZE);
+    _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
 
-    /* open out the base tree into a temp buffer for the final tweaks */
-    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
+    /* /interrupt controller */
+    spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
 
     ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
@@ -926,11 +936,8 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
         exit(1);
     }
 
-    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
-    if (ret < 0) {
-        error_report("couldn't setup vio devices in fdt");
-        exit(1);
-    }
+    /* /vdevice */
+    spapr_dt_vdevice(spapr->vio_bus, fdt);
 
     if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
         ret = spapr_rng_populate_dt(fdt);
@@ -948,43 +955,9 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
         }
     }
 
-    /* RTAS */
-    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
-    if (ret < 0) {
-        error_report("Couldn't set up RTAS device tree properties");
-    }
-
     /* cpus */
     spapr_populate_cpus_dt_node(fdt, spapr);
 
-    bootlist = get_boot_devices_list(&cb, true);
-    if (cb && bootlist) {
-        int offset = fdt_path_offset(fdt, "/chosen");
-        if (offset < 0) {
-            exit(1);
-        }
-        for (i = 0; i < cb; i++) {
-            if (bootlist[i] == '\n') {
-                bootlist[i] = ' ';
-            }
-
-        }
-        ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist);
-    }
-
-    if (boot_device && strlen(boot_device)) {
-        int offset = fdt_path_offset(fdt, "/chosen");
-
-        if (offset < 0) {
-            exit(1);
-        }
-        fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device);
-    }
-
-    if (!spapr->has_graphics) {
-        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
-    }
-
     if (smc->dr_lmb_enabled) {
         _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
     }
@@ -999,19 +972,36 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
         }
     }
 
-    _FDT((fdt_pack(fdt)));
+    /* /event-sources */
+    spapr_dt_events(spapr, fdt);
 
-    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
-        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
-                     fdt_totalsize(fdt), FDT_MAX_SIZE);
-        exit(1);
+    /* /rtas */
+    spapr_dt_rtas(spapr, fdt);
+
+    /* /chosen */
+    spapr_dt_chosen(spapr, fdt);
+
+    /* /hypervisor */
+    if (kvm_enabled()) {
+        spapr_dt_hypervisor(spapr, fdt);
     }
 
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
-    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+    /* Build memory reserve map */
+    if (spapr->kernel_size) {
+        _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size)));
+    }
+    if (spapr->initrd_size) {
+        _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size)));
+    }
 
-    g_free(bootlist);
-    g_free(fdt);
+    /* ibm,client-architecture-support updates */
+    ret = spapr_dt_cas_updates(spapr, fdt, spapr->ov5_cas);
+    if (ret < 0) {
+        error_report("couldn't setup CAS properties fdt");
+        exit(1);
+    }
+
+    return fdt;
 }
 
 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
@@ -1147,6 +1137,9 @@ static void ppc_spapr_reset(void)
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     PowerPCCPU *first_ppc_cpu;
     uint32_t rtas_limit;
+    hwaddr rtas_addr, fdt_addr;
+    void *fdt;
+    int rc;
 
     /* Check for unknown sysbus devices */
     foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
@@ -1170,24 +1163,44 @@ static void ppc_spapr_reset(void)
      * processed with 32-bit real mode code if necessary
      */
     rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
-    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
-    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
+    rtas_addr = rtas_limit - RTAS_MAX_SIZE;
+    fdt_addr = rtas_addr - FDT_MAX_SIZE;
 
-    /* Load the fdt */
-    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
-                       spapr->rtas_size);
+    /* if this reset wasn't generated by CAS, we should reset our
+     * negotiated options and start from scratch */
+    if (!spapr->cas_reboot) {
+        spapr_ovec_cleanup(spapr->ov5_cas);
+        spapr->ov5_cas = spapr_ovec_new();
+    }
+
+    fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size);
+
+    spapr_load_rtas(spapr, fdt, rtas_addr);
 
-    /* Copy RTAS over */
-    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
-                              spapr->rtas_size);
+    rc = fdt_pack(fdt);
+
+    /* Should only fail if we've built a corrupted tree */
+    assert(rc == 0);
+
+    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
+        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
+                     fdt_totalsize(fdt), FDT_MAX_SIZE);
+        exit(1);
+    }
+
+    /* Load the fdt */
+    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+    g_free(fdt);
 
     /* Set up the entry state */
     first_ppc_cpu = POWERPC_CPU(first_cpu);
-    first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
+    first_ppc_cpu->env.gpr[3] = fdt_addr;
     first_ppc_cpu->env.gpr[5] = 0;
     first_cpu->halted = 0;
     first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
 
+    spapr->cas_reboot = false;
 }
 
 static void spapr_create_nvram(sPAPRMachineState *spapr)
@@ -1682,7 +1695,6 @@ static void ppc_spapr_init(MachineState *machine)
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
     const char *kernel_filename = machine->kernel_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
     PCIHostState *phb;
     int i;
@@ -1692,10 +1704,7 @@ static void ppc_spapr_init(MachineState *machine)
     void *rma = NULL;
     hwaddr rma_alloc_size;
     hwaddr node0_size = spapr_node0_size();
-    uint32_t initrd_base = 0;
-    long kernel_size = 0, initrd_size = 0;
     long load_limit, fw_size;
-    bool kernel_le = false;
     char *filename;
     int smt = kvmppc_smt_threads();
     int spapr_cores = smp_cpus / smp_threads;
@@ -1769,10 +1778,22 @@ static void ppc_spapr_init(MachineState *machine)
                                    DIV_ROUND_UP(max_cpus * smt, smp_threads),
                                    XICS_IRQS_SPAPR, &error_fatal);
 
+    /* Set up containers for ibm,client-set-architecture negotiated options */
+    spapr->ov5 = spapr_ovec_new();
+    spapr->ov5_cas = spapr_ovec_new();
+
     if (smc->dr_lmb_enabled) {
+        spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY);
         spapr_validate_node_memory(machine, &error_fatal);
     }
 
+    spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
+
+    /* advertise support for dedicated HP event source to guests */
+    if (spapr->use_hotplug_event_source) {
+        spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
+    }
+
     /* init CPUs */
     if (machine->cpu_model == NULL) {
         machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu;
@@ -1896,7 +1917,7 @@ static void ppc_spapr_init(MachineState *machine)
     }
     g_free(filename);
 
-    /* Set up EPOW events infrastructure */
+    /* Set up RTAS event infrastructure */
     spapr_events_init(spapr);
 
     /* Set up the RTC RTAS interfaces */
@@ -1968,19 +1989,19 @@ static void ppc_spapr_init(MachineState *machine)
     if (kernel_filename) {
         uint64_t lowaddr = 0;
 
-        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
-                               NULL, &lowaddr, NULL, 1, PPC_ELF_MACHINE,
-                               0, 0);
-        if (kernel_size == ELF_LOAD_WRONG_ENDIAN) {
-            kernel_size = load_elf(kernel_filename,
-                                   translate_kernel_address, NULL,
-                                   NULL, &lowaddr, NULL, 0, PPC_ELF_MACHINE,
-                                   0, 0);
-            kernel_le = kernel_size > 0;
-        }
-        if (kernel_size < 0) {
-            error_report("error loading %s: %s",
-                         kernel_filename, load_elf_strerror(kernel_size));
+        spapr->kernel_size = load_elf(kernel_filename, translate_kernel_address,
+                                      NULL, NULL, &lowaddr, NULL, 1,
+                                      PPC_ELF_MACHINE, 0, 0);
+        if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
+            spapr->kernel_size = load_elf(kernel_filename,
+                                          translate_kernel_address, NULL, NULL,
+                                          &lowaddr, NULL, 0, PPC_ELF_MACHINE,
+                                          0, 0);
+            spapr->kernel_le = spapr->kernel_size > 0;
+        }
+        if (spapr->kernel_size < 0) {
+            error_report("error loading %s: %s", kernel_filename,
+                         load_elf_strerror(spapr->kernel_size));
             exit(1);
         }
 
@@ -1989,17 +2010,17 @@ static void ppc_spapr_init(MachineState *machine)
             /* Try to locate the initrd in the gap between the kernel
              * and the firmware. Add a bit of space just in case
              */
-            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
-            initrd_size = load_image_targphys(initrd_filename, initrd_base,
-                                              load_limit - initrd_base);
-            if (initrd_size < 0) {
+            spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size
+                                  + 0x1ffff) & ~0xffff;
+            spapr->initrd_size = load_image_targphys(initrd_filename,
+                                                     spapr->initrd_base,
+                                                     load_limit
+                                                     - spapr->initrd_base);
+            if (spapr->initrd_size < 0) {
                 error_report("could not load initial ram disk '%s'",
                              initrd_filename);
                 exit(1);
             }
-        } else {
-            initrd_base = 0;
-            initrd_size = 0;
         }
     }
 
@@ -2025,13 +2046,6 @@ static void ppc_spapr_init(MachineState *machine)
     register_savevm_live(NULL, "spapr/htab", -1, 1,
                          &savevm_htab_handlers, spapr);
 
-    /* Prepare the device tree */
-    spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
-                                            kernel_size, kernel_le,
-                                            kernel_cmdline,
-                                            spapr->check_exception_irq);
-    assert(spapr->fdt_skel != NULL);
-
     /* used by RTAS */
     QTAILQ_INIT(&spapr->ccs_list);
     qemu_register_reset(spapr_ccs_reset_hook, spapr);
@@ -2129,16 +2143,41 @@ static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
     spapr->kvm_type = g_strdup(value);
 }
 
+static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return spapr->use_hotplug_event_source;
+}
+
+static void spapr_set_modern_hotplug_events(Object *obj, bool value,
+                                            Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    spapr->use_hotplug_event_source = value;
+}
+
 static void spapr_machine_initfn(Object *obj)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
 
     spapr->htab_fd = -1;
+    spapr->use_hotplug_event_source = true;
     object_property_add_str(obj, "kvm-type",
                             spapr_get_kvm_type, spapr_set_kvm_type, NULL);
     object_property_set_description(obj, "kvm-type",
                                     "Specifies the KVM virtualization mode (HV, PR)",
                                     NULL);
+    object_property_add_bool(obj, "modern-hotplug-events",
+                            spapr_get_modern_hotplug_events,
+                            spapr_set_modern_hotplug_events,
+                            NULL);
+    object_property_set_description(obj, "modern-hotplug-events",
+                                    "Use dedicated hotplug event mechanism in"
+                                    " place of standard EPOW events when possible"
+                                    " (required for memory hot-unplug support)",
+                                    NULL);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -2163,14 +2202,16 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
     }
 }
 
-static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
-                           uint32_t node, Error **errp)
+static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
+                           uint32_t node, bool dedicated_hp_event_source,
+                           Error **errp)
 {
     sPAPRDRConnector *drc;
     sPAPRDRConnectorClass *drck;
     uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
     int i, fdt_offset, fdt_size;
     void *fdt;
+    uint64_t addr = addr_start;
 
     for (i = 0; i < nr_lmbs; i++) {
         drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
@@ -2189,7 +2230,17 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
      * guest only in case of hotplugged memory
      */
     if (dev->hotplugged) {
-       spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs);
+        if (dedicated_hp_event_source) {
+            drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                    addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+            drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+            spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                                   nr_lmbs,
+                                                   drck->get_index(drc));
+        } else {
+            spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                           nr_lmbs);
+        }
     }
 }
 
@@ -2222,8 +2273,94 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    spapr_add_lmbs(dev, addr, size, node, &error_abort);
+    spapr_add_lmbs(dev, addr, size, node,
+                   spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
+                   &error_abort);
+
+out:
+    error_propagate(errp, local_err);
+}
+
+typedef struct sPAPRDIMMState {
+    uint32_t nr_lmbs;
+} sPAPRDIMMState;
+
+static void spapr_lmb_release(DeviceState *dev, void *opaque)
+{
+    sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
+    HotplugHandler *hotplug_ctrl;
+
+    if (--ds->nr_lmbs) {
+        return;
+    }
+
+    g_free(ds);
+
+    /*
+     * Now that all the LMBs have been removed by the guest, call the
+     * pc-dimm unplug handler to cleanup up the pc-dimm device.
+     */
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+}
+
+static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
+                           Error **errp)
+{
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+    int i;
+    sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState));
+    uint64_t addr = addr_start;
+
+    ds->nr_lmbs = nr_lmbs;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drck->detach(drc, dev, spapr_lmb_release, ds, errp);
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                              nr_lmbs,
+                                              drck->get_index(drc));
+}
+
+static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+
+    pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
+                                        DeviceState *dev, Error **errp)
+{
+    Error *local_err = NULL;
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+    uint64_t size = memory_region_size(mr);
+    uint64_t addr;
 
+    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    spapr_del_lmbs(dev, addr, size, &error_abort);
 out:
     error_propagate(errp, local_err);
 }
@@ -2301,10 +2438,42 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
 static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
+    sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine());
     MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        error_setg(errp, "Memory hot unplug not supported by sPAPR");
+        if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
+            spapr_memory_unplug(hotplug_dev, dev, errp);
+        } else {
+            error_setg(errp, "Memory hot unplug not supported for this guest");
+        }
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+        if (!mc->query_hotpluggable_cpus) {
+            error_setg(errp, "CPU hot unplug not supported on this machine");
+            return;
+        }
+        spapr_core_unplug(hotplug_dev, dev, errp);
+    }
+}
+
+static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
+                                                DeviceState *dev, Error **errp)
+{
+    sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
+            spapr_memory_unplug_request(hotplug_dev, dev, errp);
+        } else {
+            /* NOTE: this means there is a window after guest reset, prior to
+             * CAS negotiation, where unplug requests will fail due to the
+             * capability not being detected yet. This is a bit different than
+             * the case with PCI unplug, where the events will be queued and
+             * eventually handled by the guest after boot
+             */
+            error_setg(errp, "Memory hot unplug not supported for this guest");
+        }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
         if (!mc->query_hotpluggable_cpus) {
             error_setg(errp, "CPU hot unplug not supported on this machine");
@@ -2450,6 +2619,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
     mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+    hc->unplug_request = spapr_machine_device_unplug_request;
 
     smc->dr_lmb_enabled = true;
     smc->tcg_default_cpu = "POWER8";
@@ -2585,7 +2755,10 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index,
 
 static void spapr_machine_2_7_instance_options(MachineState *machine)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
+
     spapr_machine_2_8_instance_options(machine);
+    spapr->use_hotplug_event_source = false;
 }
 
 static void spapr_machine_2_7_class_options(MachineClass *mc)
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index bc922bc86f..e0c14f6b77 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -184,7 +184,7 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     /*
      * Setup CPU DT entries only for hotplugged CPUs. For boot time or
-     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
+     * coldplugged CPUs DT entries are setup in spapr_build_fdt().
      */
     if (dev->hotplugged) {
         fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 6e54fd4743..a0c44ee593 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -68,6 +68,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
         }
     }
 
+    /*
+     * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
+     * belong to a DIMM device that is marked for removal.
+     *
+     * Currently the guest userspace tool drmgr that drives the memory
+     * hotplug/unplug will just try to remove a set of 'removable' LMBs
+     * in response to a hot unplug request that is based on drc-count.
+     * If the LMB being removed doesn't belong to a DIMM device that is
+     * actually being unplugged, fail the isolation request here.
+     */
+    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) {
+        if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
+             !drc->awaiting_release) {
+            return RTAS_OUT_HW_ERROR;
+        }
+    }
+
     drc->isolation_state = state;
 
     if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 6d3534541c..f85a9c32a7 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "hw/ppc/spapr_ovec.h"
 #include <libfdt.h>
 
 struct rtas_error_log {
@@ -174,6 +175,16 @@ struct epow_log_full {
     struct rtas_event_log_v6_epow epow;
 } QEMU_PACKED;
 
+union drc_identifier {
+    uint32_t index;
+    uint32_t count;
+    struct {
+        uint32_t count;
+        uint32_t index;
+    } count_indexed;
+    char name[1];
+} QEMU_PACKED;
+
 struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
     struct rtas_event_log_v6_section_header hdr;
@@ -190,12 +201,9 @@ struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
 #define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
 #define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED              4
     uint8_t reserved;
-    union {
-        uint32_t index;
-        uint32_t count;
-        char name[1];
-    } drc;
+    union drc_identifier drc_id;
 } QEMU_PACKED;
 
 struct hp_log_full {
@@ -206,28 +214,132 @@ struct hp_log_full {
     struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
-#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
-#define EVENT_MASK_EPOW                      0x40000000
-#define EVENT_MASK_HOTPLUG                   0x10000000
-#define EVENT_MASK_IO                        0x08000000
+typedef enum EventClass {
+    EVENT_CLASS_INTERNAL_ERRORS     = 0,
+    EVENT_CLASS_EPOW                = 1,
+    EVENT_CLASS_RESERVED            = 2,
+    EVENT_CLASS_HOT_PLUG            = 3,
+    EVENT_CLASS_IO                  = 4,
+    EVENT_CLASS_MAX
+} EventClassIndex;
+#define EVENT_CLASS_MASK(index) (1 << (31 - index))
+
+static const char * const event_names[EVENT_CLASS_MAX] = {
+    [EVENT_CLASS_INTERNAL_ERRORS]       = "internal-errors",
+    [EVENT_CLASS_EPOW]                  = "epow-events",
+    [EVENT_CLASS_HOT_PLUG]              = "hot-plug-events",
+    [EVENT_CLASS_IO]                    = "ibm,io-events",
+};
+
+struct sPAPREventSource {
+    int irq;
+    uint32_t mask;
+    bool enabled;
+};
+
+static sPAPREventSource *spapr_event_sources_new(void)
+{
+    return g_new0(sPAPREventSource, EVENT_CLASS_MAX);
+}
+
+static void spapr_event_sources_register(sPAPREventSource *event_sources,
+                                        EventClassIndex index, int irq)
+{
+    /* we only support 1 irq per event class at the moment */
+    g_assert(event_sources);
+    g_assert(!event_sources[index].enabled);
+    event_sources[index].irq = irq;
+    event_sources[index].mask = EVENT_CLASS_MASK(index);
+    event_sources[index].enabled = true;
+}
+
+static const sPAPREventSource *
+spapr_event_sources_get_source(sPAPREventSource *event_sources,
+                               EventClassIndex index)
+{
+    g_assert(index < EVENT_CLASS_MAX);
+    g_assert(event_sources);
+
+    return &event_sources[index];
+}
+
+void spapr_dt_events(sPAPRMachineState *spapr, void *fdt)
+{
+    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
+    int i, count = 0, event_sources;
+    sPAPREventSource *events = spapr->event_sources;
+
+    g_assert(events);
+
+    _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources"));
+
+    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
+        int node_offset;
+        uint32_t interrupts[2];
+        const sPAPREventSource *source =
+            spapr_event_sources_get_source(events, i);
+        const char *source_name = event_names[i];
+
+        if (!source->enabled) {
+            continue;
+        }
+
+        interrupts[0] = cpu_to_be32(source->irq);
+        interrupts[1] = 0;
 
-void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
+        _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name));
+        _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts,
+                         sizeof(interrupts)));
+
+        irq_ranges[count++] = interrupts[0];
+        irq_ranges[count++] = cpu_to_be32(1);
+    }
+
+    irq_ranges[count] = cpu_to_be32(count);
+    count++;
+
+    _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2)));
+    _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges",
+                      irq_ranges, count * sizeof(uint32_t))));
+}
+
+static const sPAPREventSource *
+rtas_event_log_to_source(sPAPRMachineState *spapr, int log_type)
 {
-    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
-    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
+    const sPAPREventSource *source;
+
+    g_assert(spapr->event_sources);
+
+    switch (log_type) {
+    case RTAS_LOG_TYPE_HOTPLUG:
+        source = spapr_event_sources_get_source(spapr->event_sources,
+                                                EVENT_CLASS_HOT_PLUG);
+        if (spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)) {
+            g_assert(source->enabled);
+            break;
+        }
+        /* fall back to epow for legacy hotplug interrupt source */
+    case RTAS_LOG_TYPE_EPOW:
+        source = spapr_event_sources_get_source(spapr->event_sources,
+                                                EVENT_CLASS_EPOW);
+        break;
+    default:
+        source = NULL;
+    }
 
-    _FDT((fdt_begin_node(fdt, "event-sources")));
+    return source;
+}
 
-    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
-    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
-    _FDT((fdt_property(fdt, "interrupt-ranges",
-                       irq_ranges, sizeof(irq_ranges))));
+static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type)
+{
+    const sPAPREventSource *source;
 
-    _FDT((fdt_begin_node(fdt, "epow-events")));
-    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
-    _FDT((fdt_end_node(fdt)));
+    source = rtas_event_log_to_source(spapr, log_type);
+    g_assert(source);
+    g_assert(source->enabled);
 
-    _FDT((fdt_end_node(fdt)));
+    return source->irq;
 }
 
 static void rtas_event_log_queue(int log_type, void *data, bool exception)
@@ -248,19 +360,15 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
-    /* we only queue EPOW events atm. */
-    if ((event_mask & EVENT_MASK_EPOW) == 0) {
-        return NULL;
-    }
-
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const sPAPREventSource *source =
+            rtas_event_log_to_source(spapr, entry->log_type);
+
         if (entry->exception != exception) {
             continue;
         }
 
-        /* EPOW and hotplug events are surfaced in the same manner */
-        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
-            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+        if (source->mask & event_mask) {
             break;
         }
     }
@@ -277,19 +385,15 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
-    /* we only queue EPOW events atm. */
-    if ((event_mask & EVENT_MASK_EPOW) == 0) {
-        return false;
-    }
-
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const sPAPREventSource *source =
+            rtas_event_log_to_source(spapr, entry->log_type);
+
         if (entry->exception != exception) {
             continue;
         }
 
-        /* EPOW and hotplug events are surfaced in the same manner */
-        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
-            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+        if (source->mask & event_mask) {
             return true;
         }
     }
@@ -377,7 +481,9 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+                                 rtas_event_log_to_irq(spapr,
+                                                       RTAS_LOG_TYPE_EPOW)));
 }
 
 static void spapr_hotplug_set_signalled(uint32_t drc_index)
@@ -389,7 +495,7 @@ static void spapr_hotplug_set_signalled(uint32_t drc_index)
 
 static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
                                     sPAPRDRConnectorType drc_type,
-                                    uint32_t drc)
+                                    union drc_identifier *drc_id)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct hp_log_full *new_hp;
@@ -434,7 +540,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     case SPAPR_DR_CONNECTOR_TYPE_PCI:
         hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
         if (hp->hotplug_action == RTAS_LOG_V6_HP_ACTION_ADD) {
-            spapr_hotplug_set_signalled(drc);
+            spapr_hotplug_set_signalled(drc_id->index);
         }
         break;
     case SPAPR_DR_CONNECTOR_TYPE_LMB:
@@ -452,48 +558,89 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     }
 
     if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
-        hp->drc.count = cpu_to_be32(drc);
+        hp->drc_id.count = cpu_to_be32(drc_id->count);
     } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
-        hp->drc.index = cpu_to_be32(drc);
+        hp->drc_id.index = cpu_to_be32(drc_id->index);
+    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
+        /* we should not be using count_indexed value unless the guest
+         * supports dedicated hotplug event source
+         */
+        g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
+        hp->drc_id.count_indexed.count =
+            cpu_to_be32(drc_id->count_indexed.count);
+        hp->drc_id.count_indexed.index =
+            cpu_to_be32(drc_id->count_indexed.index);
     }
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+                                 rtas_event_log_to_irq(spapr,
+                                                       RTAS_LOG_TYPE_HOTPLUG)));
 }
 
 void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDRConnectorType drc_type = drck->get_type(drc);
-    uint32_t index = drck->get_index(drc);
+    union drc_identifier drc_id;
 
+    drc_id.index = drck->get_index(drc);
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
-                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index);
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc)
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDRConnectorType drc_type = drck->get_type(drc);
-    uint32_t index = drck->get_index(drc);
+    union drc_identifier drc_id;
 
+    drc_id.index = drck->get_index(drc);
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
-                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index);
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
                                        uint32_t count)
 {
+    union drc_identifier drc_id;
+
+    drc_id.count = count;
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
-                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count);
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
                                           uint32_t count)
 {
+    union drc_identifier drc_id;
+
+    drc_id.count = count;
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
-                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count);
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                            uint32_t count, uint32_t index)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count_indexed.count = count;
+    drc_id.count_indexed.index = index;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                               uint32_t count, uint32_t index)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count_indexed.count = count;
+    drc_id.count_indexed.index = index;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
 static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
@@ -505,6 +652,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     uint64_t xinfo;
     sPAPREventLogEntry *event;
     struct rtas_error_log *hdr;
+    int i;
 
     if ((nargs < 6) || (nargs > 7) || nret != 1) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -541,8 +689,14 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
      * do the latter here, since our code relies on edge-triggered
      * interrupts.
      */
-    if (rtas_event_log_contains(mask, true)) {
-        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    for (i = 0; i < EVENT_CLASS_MAX; i++) {
+        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
+            const sPAPREventSource *source =
+                spapr_event_sources_get_source(spapr->event_sources, i);
+
+            g_assert(source->enabled);
+            qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+        }
     }
 
     return;
@@ -594,8 +748,27 @@ out_no_events:
 void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
-    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, false,
-                                            &error_fatal);
+
+    spapr->event_sources = spapr_event_sources_new();
+
+    spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
+                                 xics_spapr_alloc(spapr->xics, 0, false,
+                                                  &error_fatal));
+
+    /* NOTE: if machine supports modern/dedicated hotplug event source,
+     * we add it to the device-tree unconditionally. This means we may
+     * have cases where the source is enabled in QEMU, but unused by the
+     * guest because it does not support modern hotplug events, so we
+     * take care to rely on checking for negotiation of OV5_HP_EVT option
+     * before attempting to use it to signal events, rather than simply
+     * checking that it's enabled.
+     */
+    if (spapr->use_hotplug_event_source) {
+        spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
+                                     xics_spapr_alloc(spapr->xics, 0, false,
+                                                      &error_fatal));
+    }
+
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
     spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index c5e7e8c995..7c46d4625b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -11,6 +11,7 @@
 #include "trace.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "hw/ppc/spapr_ovec.h"
 
 struct SPRSyncState {
     int spr;
@@ -880,32 +881,6 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     return ret;
 }
 
-/*
- * Return the offset to the requested option vector @vector in the
- * option vector table @table.
- */
-static target_ulong cas_get_option_vector(int vector, target_ulong table)
-{
-    int i;
-    char nr_vectors, nr_entries;
-
-    if (!table) {
-        return 0;
-    }
-
-    nr_vectors = (ldl_phys(&address_space_memory, table) >> 24) + 1;
-    if (!vector || vector > nr_vectors) {
-        return 0;
-    }
-    table++; /* skip nr option vectors */
-
-    for (i = 0; i < vector - 1; i++) {
-        nr_entries = ldl_phys(&address_space_memory, table) >> 24;
-        table += nr_entries + 2;
-    }
-    return table;
-}
-
 typedef struct {
     uint32_t cpu_version;
     Error *err;
@@ -961,23 +936,21 @@ static void cas_handle_compat_cpu(PowerPCCPUClass *pcc, uint32_t pvr,
     }
 }
 
-#define OV5_DRCONF_MEMORY 0x20
-
 static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
                                                   sPAPRMachineState *spapr,
                                                   target_ulong opcode,
                                                   target_ulong *args)
 {
     target_ulong list = ppc64_phys_to_real(args[0]);
-    target_ulong ov_table, ov5;
+    target_ulong ov_table;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu_);
     CPUState *cs;
-    bool cpu_match = false, cpu_update = true, memory_update = false;
+    bool cpu_match = false, cpu_update = true;
     unsigned old_cpu_version = cpu_->cpu_version;
     unsigned compat_lvl = 0, cpu_version = 0;
     unsigned max_lvl = get_compat_level(cpu_->max_compat);
     int counter;
-    char ov5_byte2;
+    sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates;
 
     /* Parse PVR list */
     for (counter = 0; counter < 512; ++counter) {
@@ -1033,19 +1006,34 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
     /* For the future use: here @ov_table points to the first option vector */
     ov_table = list;
 
-    ov5 = cas_get_option_vector(5, ov_table);
-    if (!ov5) {
-        return H_SUCCESS;
-    }
+    ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+
+    /* NOTE: there are actually a number of ov5 bits where input from the
+     * guest is always zero, and the platform/QEMU enables them independently
+     * of guest input. To model these properly we'd want some sort of mask,
+     * but since they only currently apply to memory migration as defined
+     * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
+     * to worry about this for now.
+     */
+    ov5_cas_old = spapr_ovec_clone(spapr->ov5_cas);
+    /* full range of negotiated ov5 capabilities */
+    spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
+    spapr_ovec_cleanup(ov5_guest);
+    /* capabilities that have been added since CAS-generated guest reset.
+     * if capabilities have since been removed, generate another reset
+     */
+    ov5_updates = spapr_ovec_new();
+    spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
+                                        ov5_cas_old, spapr->ov5_cas);
 
-    /* @list now points to OV 5 */
-    ov5_byte2 = ldub_phys(&address_space_memory, ov5 + 2);
-    if (ov5_byte2 & OV5_DRCONF_MEMORY) {
-        memory_update = true;
+    if (!spapr->cas_reboot) {
+        spapr->cas_reboot =
+            (spapr_h_cas_compose_response(spapr, args[1], args[2], cpu_update,
+                                          ov5_updates) != 0);
     }
+    spapr_ovec_cleanup(ov5_updates);
 
-    if (spapr_h_cas_compose_response(spapr, args[1], args[2],
-                                     cpu_update, memory_update)) {
+    if (spapr->cas_reboot) {
         qemu_system_reset_request();
     }
 
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
new file mode 100644
index 0000000000..c2a0d18577
--- /dev/null
+++ b/hw/ppc/spapr_ovec.c
@@ -0,0 +1,242 @@
+/*
+ * QEMU SPAPR Architecture Option Vector Helper Functions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Authors:
+ *  Bharata B Rao     <bharata@linux.vnet.ibm.com>
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "qemu/bitmap.h"
+#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
+#include <libfdt.h>
+
+/* #define DEBUG_SPAPR_OVEC */
+
+#ifdef DEBUG_SPAPR_OVEC
+#define DPRINTFN(fmt, ...) \
+    do { fprintf(stderr, fmt "\n", ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTFN(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define OV_MAXBYTES 256 /* not including length byte */
+#define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE)
+
+/* we *could* work with bitmaps directly, but handling the bitmap privately
+ * allows us to more safely make assumptions about the bitmap size and
+ * simplify the calling code somewhat
+ */
+struct sPAPROptionVector {
+    unsigned long *bitmap;
+};
+
+sPAPROptionVector *spapr_ovec_new(void)
+{
+    sPAPROptionVector *ov;
+
+    ov = g_new0(sPAPROptionVector, 1);
+    ov->bitmap = bitmap_new(OV_MAXBITS);
+
+    return ov;
+}
+
+sPAPROptionVector *spapr_ovec_clone(sPAPROptionVector *ov_orig)
+{
+    sPAPROptionVector *ov;
+
+    g_assert(ov_orig);
+
+    ov = spapr_ovec_new();
+    bitmap_copy(ov->bitmap, ov_orig->bitmap, OV_MAXBITS);
+
+    return ov;
+}
+
+void spapr_ovec_intersect(sPAPROptionVector *ov,
+                          sPAPROptionVector *ov1,
+                          sPAPROptionVector *ov2)
+{
+    g_assert(ov);
+    g_assert(ov1);
+    g_assert(ov2);
+
+    bitmap_and(ov->bitmap, ov1->bitmap, ov2->bitmap, OV_MAXBITS);
+}
+
+/* returns true if options bits were removed, false otherwise */
+bool spapr_ovec_diff(sPAPROptionVector *ov,
+                     sPAPROptionVector *ov_old,
+                     sPAPROptionVector *ov_new)
+{
+    unsigned long *change_mask = bitmap_new(OV_MAXBITS);
+    unsigned long *removed_bits = bitmap_new(OV_MAXBITS);
+    bool bits_were_removed = false;
+
+    g_assert(ov);
+    g_assert(ov_old);
+    g_assert(ov_new);
+
+    bitmap_xor(change_mask, ov_old->bitmap, ov_new->bitmap, OV_MAXBITS);
+    bitmap_and(ov->bitmap, ov_new->bitmap, change_mask, OV_MAXBITS);
+    bitmap_and(removed_bits, ov_old->bitmap, change_mask, OV_MAXBITS);
+
+    if (!bitmap_empty(removed_bits, OV_MAXBITS)) {
+        bits_were_removed = true;
+    }
+
+    g_free(change_mask);
+    g_free(removed_bits);
+
+    return bits_were_removed;
+}
+
+void spapr_ovec_cleanup(sPAPROptionVector *ov)
+{
+    if (ov) {
+        g_free(ov->bitmap);
+        g_free(ov);
+    }
+}
+
+void spapr_ovec_set(sPAPROptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert_cmpint(bitnr, <, OV_MAXBITS);
+
+    set_bit(bitnr, ov->bitmap);
+}
+
+void spapr_ovec_clear(sPAPROptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert_cmpint(bitnr, <, OV_MAXBITS);
+
+    clear_bit(bitnr, ov->bitmap);
+}
+
+bool spapr_ovec_test(sPAPROptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert_cmpint(bitnr, <, OV_MAXBITS);
+
+    return test_bit(bitnr, ov->bitmap) ? true : false;
+}
+
+static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
+                                 long bitmap_offset)
+{
+    int i;
+
+    for (i = 0; i < BITS_PER_BYTE; i++) {
+        if (entry & (1 << (BITS_PER_BYTE - 1 - i))) {
+            bitmap_set(bitmap, bitmap_offset + i, 1);
+        }
+    }
+}
+
+static uint8_t guest_byte_from_bitmap(unsigned long *bitmap, long bitmap_offset)
+{
+    uint8_t entry = 0;
+    int i;
+
+    for (i = 0; i < BITS_PER_BYTE; i++) {
+        if (test_bit(bitmap_offset + i, bitmap)) {
+            entry |= (1 << (BITS_PER_BYTE - 1 - i));
+        }
+    }
+
+    return entry;
+}
+
+static target_ulong vector_addr(target_ulong table_addr, int vector)
+{
+    uint16_t vector_count, vector_len;
+    int i;
+
+    vector_count = ldub_phys(&address_space_memory, table_addr) + 1;
+    if (vector > vector_count) {
+        return 0;
+    }
+    table_addr++; /* skip nr option vectors */
+
+    for (i = 0; i < vector - 1; i++) {
+        vector_len = ldub_phys(&address_space_memory, table_addr) + 1;
+        table_addr += vector_len + 1; /* bit-vector + length byte */
+    }
+    return table_addr;
+}
+
+sPAPROptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector)
+{
+    sPAPROptionVector *ov;
+    target_ulong addr;
+    uint16_t vector_len;
+    int i;
+
+    g_assert(table_addr);
+    g_assert_cmpint(vector, >=, 1); /* vector numbering starts at 1 */
+
+    addr = vector_addr(table_addr, vector);
+    if (!addr) {
+        /* specified vector isn't present */
+        return NULL;
+    }
+
+    vector_len = ldub_phys(&address_space_memory, addr++) + 1;
+    g_assert_cmpint(vector_len, <=, OV_MAXBYTES);
+    ov = spapr_ovec_new();
+
+    for (i = 0; i < vector_len; i++) {
+        uint8_t entry = ldub_phys(&address_space_memory, addr + i);
+        if (entry) {
+            DPRINTFN("read guest vector %2d, byte %3d / %3d: 0x%.2x",
+                     vector, i + 1, vector_len, entry);
+            guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE);
+        }
+    }
+
+    return ov;
+}
+
+int spapr_ovec_populate_dt(void *fdt, int fdt_offset,
+                           sPAPROptionVector *ov, const char *name)
+{
+    uint8_t vec[OV_MAXBYTES + 1];
+    uint16_t vec_len;
+    unsigned long lastbit;
+    int i;
+
+    g_assert(ov);
+
+    lastbit = find_last_bit(ov->bitmap, OV_MAXBITS);
+    /* if no bits are set, include at least 1 byte of the vector so we can
+     * still encoded this in the device tree while abiding by the same
+     * encoding/sizing expected in ibm,client-architecture-support
+     */
+    vec_len = (lastbit == OV_MAXBITS) ? 1 : lastbit / BITS_PER_BYTE + 1;
+    g_assert_cmpint(vec_len, <=, OV_MAXBYTES);
+    /* guest expects vector len encoded as vec_len - 1, since the length byte
+     * is assumed and not included, and the first byte of the vector
+     * is assumed as well
+     */
+    vec[0] = vec_len - 1;
+
+    for (i = 1; i < vec_len + 1; i++) {
+        vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE);
+        if (vec[i]) {
+            DPRINTFN("encoding guest vector byte %3d / %3d: 0x%.2x",
+                     i, vec_len, vec[i]);
+        }
+    }
+
+    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len);
+}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 2a1ccf59ea..7cde30ee09 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1392,6 +1392,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (sphb->numa_node != -1 &&
+        (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) {
+        error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
+        return;
+    }
+
     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
 
     namebuf = alloca(strlen(sphb->dtbusname) + 32);
@@ -1880,7 +1886,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     }
 
     /* Advertise NUMA via ibm,associativity */
-    if (nb_numa_nodes > 1) {
+    if (phb->numa_node != -1) {
         _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
                          sizeof(associativity)));
     }
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 0db84c816d..bb19944686 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -46,6 +46,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/cutils.h"
 #include "trace.h"
+#include "hw/ppc/fdt.h"
 
 static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr,
                                                     uint32_t drc_index)
@@ -710,78 +711,60 @@ void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
     rtas_table[token].fn = fn;
 }
 
-int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
-                                 hwaddr rtas_size)
+void spapr_dt_rtas_tokens(void *fdt, int rtas)
 {
-    int ret;
     int i;
-    uint32_t lrdr_capacity[5];
-    MachineState *machine = MACHINE(qdev_get_machine());
-    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
-    uint64_t max_hotplug_addr = spapr->hotplug_memory.base +
-                                memory_region_size(&spapr->hotplug_memory.mr);
 
-    ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size);
+    for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
+        struct rtas_call *call = &rtas_table[i];
+
+        if (!call->name) {
+            continue;
+        }
+
+        _FDT(fdt_setprop_cell(fdt, rtas, call->name, i + RTAS_TOKEN_BASE));
+    }
+}
+
+void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr)
+{
+    int rtas_node;
+    int ret;
+
+    /* Copy RTAS blob into guest RAM */
+    cpu_physical_memory_write(addr, spapr->rtas_blob, spapr->rtas_size);
+
+    ret = fdt_add_mem_rsv(fdt, addr, spapr->rtas_size);
     if (ret < 0) {
         error_report("Couldn't add RTAS reserve entry: %s",
-                fdt_strerror(ret));
-        return ret;
+                     fdt_strerror(ret));
+        exit(1);
     }
 
-    ret = qemu_fdt_setprop_cell(fdt, "/rtas", "linux,rtas-base",
-                                rtas_addr);
+    /* Update the device tree with the blob's location */
+    rtas_node = fdt_path_offset(fdt, "/rtas");
+    assert(rtas_node >= 0);
+
+    ret = fdt_setprop_cell(fdt, rtas_node, "linux,rtas-base", addr);
     if (ret < 0) {
         error_report("Couldn't add linux,rtas-base property: %s",
-                fdt_strerror(ret));
-        return ret;
+                     fdt_strerror(ret));
+        exit(1);
     }
 
-    ret = qemu_fdt_setprop_cell(fdt, "/rtas", "linux,rtas-entry",
-                                rtas_addr);
+    ret = fdt_setprop_cell(fdt, rtas_node, "linux,rtas-entry", addr);
     if (ret < 0) {
         error_report("Couldn't add linux,rtas-entry property: %s",
-                fdt_strerror(ret));
-        return ret;
+                     fdt_strerror(ret));
+        exit(1);
     }
 
-    ret = qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size",
-                                rtas_size);
+    ret = fdt_setprop_cell(fdt, rtas_node, "rtas-size", spapr->rtas_size);
     if (ret < 0) {
         error_report("Couldn't add rtas-size property: %s",
-                fdt_strerror(ret));
-        return ret;
+                     fdt_strerror(ret));
+        exit(1);
     }
-
-    for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
-        struct rtas_call *call = &rtas_table[i];
-
-        if (!call->name) {
-            continue;
-        }
-
-        ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name,
-                                    i + RTAS_TOKEN_BASE);
-        if (ret < 0) {
-            error_report("Couldn't add rtas token for %s: %s",
-                    call->name, fdt_strerror(ret));
-            return ret;
-        }
-
-    }
-
-    lrdr_capacity[0] = cpu_to_be32(max_hotplug_addr >> 32);
-    lrdr_capacity[1] = cpu_to_be32(max_hotplug_addr & 0xffffffff);
-    lrdr_capacity[2] = 0;
-    lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE);
-    lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads);
-    ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity,
-                     sizeof(lrdr_capacity));
-    if (ret < 0) {
-        error_report("Couldn't add ibm,lrdr-capacity rtas property");
-        return ret;
-    }
-
-    return 0;
 }
 
 static void core_rtas_register_types(void)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 3648aa5960..cc1e09c568 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -36,6 +36,7 @@
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "hw/ppc/xics.h"
+#include "hw/ppc/fdt.h"
 #include "trace.h"
 
 #include <libfdt.h>
@@ -624,11 +625,21 @@ static int compare_reg(const void *p1, const void *p2)
     return 1;
 }
 
-int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt)
+void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt)
 {
     DeviceState *qdev, **qdevs;
     BusChild *kid;
     int i, num, ret = 0;
+    int node;
+
+    _FDT(node = fdt_add_subnode(fdt, 0, "vdevice"));
+
+    _FDT(fdt_setprop_string(fdt, node, "device_type", "vdevice"));
+    _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,vdevice"));
+    _FDT(fdt_setprop_cell(fdt, node, "#address-cells", 1));
+    _FDT(fdt_setprop_cell(fdt, node, "#size-cells", 0));
+    _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+    _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
 
     /* Count qdevs on the bus list */
     num = 0;
@@ -650,43 +661,32 @@ int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt)
      * to know that will mean they are in forward order in the tree. */
     for (i = num - 1; i >= 0; i--) {
         VIOsPAPRDevice *dev = (VIOsPAPRDevice *)(qdevs[i]);
+        VIOsPAPRDeviceClass *vdc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
 
         ret = vio_make_devnode(dev, fdt);
-
         if (ret < 0) {
-            goto out;
+            error_report("Couldn't create device node /vdevice/%s@%"PRIx32,
+                         vdc->dt_name, dev->reg);
+            exit(1);
         }
     }
 
-    ret = 0;
-out:
     g_free(qdevs);
-
-    return ret;
 }
 
-int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus)
+gchar *spapr_vio_stdout_path(VIOsPAPRBus *bus)
 {
     VIOsPAPRDevice *dev;
     char *name, *path;
-    int ret, offset;
 
     dev = spapr_vty_get_default(bus);
-    if (!dev)
-        return 0;
-
-    offset = fdt_path_offset(fdt, "/chosen");
-    if (offset < 0) {
-        return offset;
+    if (!dev) {
+        return NULL;
     }
 
     name = spapr_vio_get_dev_name(DEVICE(dev));
     path = g_strdup_printf("/vdevice/%s", name);
 
-    ret = fdt_setprop_string(fdt, offset, "linux,stdout-path", path);
-
     g_free(name);
-    g_free(path);
-
-    return ret;
+    return path;
 }
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index b3915e4fd6..6224288ac3 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -35,10 +35,11 @@
 #include "sysemu/sysemu.h"
 #include "net/net.h"
 #include "hw/boards.h"
-#include "hw/nvram/openbios_firmware_abi.h"
 #include "hw/scsi/esp.h"
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
+#include "hw/nvram/sun_nvram.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/char/escc.h"
 #include "hw/empty_slot.h"
@@ -117,39 +118,17 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr,
                        int nvram_machine_id, const char *arch)
 {
     unsigned int i;
-    uint32_t start, end;
+    int sysp_end;
     uint8_t image[0x1ff0];
-    struct OpenBIOS_nvpart_v1 *part_header;
     NvramClass *k = NVRAM_GET_CLASS(nvram);
 
     memset(image, '\0', sizeof(image));
 
-    start = 0;
+    /* OpenBIOS nvram variables partition */
+    sysp_end = chrp_nvram_create_system_partition(image, 0);
 
-    // OpenBIOS nvram variables
-    // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_SYSTEM;
-    pstrcpy(part_header->name, sizeof(part_header->name), "system");
-
-    end = start + sizeof(struct OpenBIOS_nvpart_v1);
-    for (i = 0; i < nb_prom_envs; i++)
-        end = OpenBIOS_set_var(image, end, prom_envs[i]);
-
-    // End marker
-    image[end++] = '\0';
-
-    end = start + ((end - start + 15) & ~15);
-    OpenBIOS_finish_partition(part_header, end - start);
-
-    // free partition
-    start = end;
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_FREE;
-    pstrcpy(part_header->name, sizeof(part_header->name), "free");
-
-    end = 0x1fd0;
-    OpenBIOS_finish_partition(part_header, end - start);
+    /* Free space partition */
+    chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end);
 
     Sun_init_header((struct Sun_nvram *)&image[0x1fd8], macaddr,
                     nvram_machine_id);
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 7b8134ef51..271d8bc592 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -36,7 +36,8 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
-#include "hw/nvram/openbios_firmware_abi.h"
+#include "hw/nvram/sun_nvram.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/sysbus.h"
 #include "hw/ide.h"
@@ -124,39 +125,17 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size,
                                   const uint8_t *macaddr)
 {
     unsigned int i;
-    uint32_t start, end;
+    int sysp_end;
     uint8_t image[0x1ff0];
-    struct OpenBIOS_nvpart_v1 *part_header;
     NvramClass *k = NVRAM_GET_CLASS(nvram);
 
     memset(image, '\0', sizeof(image));
 
-    start = 0;
+    /* OpenBIOS nvram variables partition */
+    sysp_end = chrp_nvram_create_system_partition(image, 0);
 
-    // OpenBIOS nvram variables
-    // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_SYSTEM;
-    pstrcpy(part_header->name, sizeof(part_header->name), "system");
-
-    end = start + sizeof(struct OpenBIOS_nvpart_v1);
-    for (i = 0; i < nb_prom_envs; i++)
-        end = OpenBIOS_set_var(image, end, prom_envs[i]);
-
-    // End marker
-    image[end++] = '\0';
-
-    end = start + ((end - start + 15) & ~15);
-    OpenBIOS_finish_partition(part_header, end - start);
-
-    // free partition
-    start = end;
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_FREE;
-    pstrcpy(part_header->name, sizeof(part_header->name), "free");
-
-    end = 0x1fd0;
-    OpenBIOS_finish_partition(part_header, end - start);
+    /* Free space partition */
+    chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end);
 
     Sun_init_header((struct Sun_nvram *)&image[0x1fd8], macaddr, 0x80);
author	Peter Maydell <peter.maydell@linaro.org>	2016-10-28 16:31:59 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2016-10-28 16:31:59 +0100
commit	66a77ea676aea48092500bcddb015aa0aee42388 (patch)
tree	9fe8c7f1a6bf8828a8ec239a6ced021486e883d3 /hw
parent	01b601f06154c0d35f945b1321ddb3f39530cc43 (diff)
parent	10c21b5c20bf3d20b7b0ad279db37ae89cc7937d (diff)