52 files changed, 3675 insertions, 275 deletions
diff --git a/backends/rng.c b/backends/rng.c
index 85cb83f5e1..8b8d5a4973 100644
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -12,6 +12,7 @@
 
 #include "sysemu/rng.h"
 #include "qapi/qmp/qerror.h"
+#include "qom/object_interfaces.h"
 
 void rng_backend_request_entropy(RngBackend *s, size_t size,
                                  EntropyReceiveFunc *receive_entropy,
@@ -40,9 +41,9 @@ static bool rng_backend_prop_get_opened(Object *obj, Error **errp)
     return s->opened;
 }
 
-void rng_backend_open(RngBackend *s, Error **errp)
+static void rng_backend_complete(UserCreatable *uc, Error **errp)
 {
-    object_property_set_bool(OBJECT(s), true, "opened", errp);
+    object_property_set_bool(OBJECT(uc), true, "opened", errp);
 }
 
 static void rng_backend_prop_set_opened(Object *obj, bool value, Error **errp)
@@ -76,13 +77,25 @@ static void rng_backend_init(Object *obj)
                              NULL);
 }
 
+static void rng_backend_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = rng_backend_complete;
+}
+
 static const TypeInfo rng_backend_info = {
     .name = TYPE_RNG_BACKEND,
     .parent = TYPE_OBJECT,
     .instance_size = sizeof(RngBackend),
     .instance_init = rng_backend_init,
     .class_size = sizeof(RngBackendClass),
+    .class_init = rng_backend_class_init,
     .abstract = true,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
 };
 
 static void register_types(void)
diff --git a/exec.c b/exec.c
index 2435d9ecd9..9ad0a4b045 100644
--- a/exec.c
+++ b/exec.c
@@ -325,7 +325,7 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
                                  hwaddr *plen, bool resolve_subpage)
 {
     MemoryRegionSection *section;
-    Int128 diff;
+    Int128 diff, diff_page;
 
     section = address_space_lookup_region(d, addr, resolve_subpage);
     /* Compute offset within MemoryRegionSection */
@@ -334,7 +334,9 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
     /* Compute offset within MemoryRegion */
     *xlat = addr + section->offset_within_region;
 
+    diff_page = int128_make64(((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr);
     diff = int128_sub(section->mr->size, int128_make64(addr));
+    diff = int128_min(diff, diff_page);
     *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
     return section;
 }
@@ -349,7 +351,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
     hwaddr len = *plen;
 
     for (;;) {
-        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
+        section = address_space_translate_internal(as->dispatch, addr, &addr, &len, true);
         mr = section->mr;
 
         if (!mr->iommu_ops) {
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 1c1b0e5258..4036262f50 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -173,6 +173,11 @@ static void default_reset_secondary(ARMCPU *cpu,
     env->regs[15] = info->smp_loader_start;
 }
 
+static inline bool have_dtb(const struct arm_boot_info *info)
+{
+    return info->dtb_filename || info->get_dtb;
+}
+
 #define WRITE_WORD(p, value) do { \
     stl_phys_notdirty(p, value);  \
     p += 4;                       \
@@ -421,7 +426,7 @@ static void do_cpu_reset(void *opaque)
                     env->regs[15] = info->loader_start;
                 }
 
-                if (!info->dtb_filename) {
+                if (!have_dtb(info)) {
                     if (old_param) {
                         set_kernel_args_old(info);
                     } else {
@@ -542,7 +547,7 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
         /* for device tree boot, we pass the DTB directly in r2. Otherwise
          * we point to the kernel args.
          */
-        if (info->dtb_filename || info->get_dtb) {
+        if (have_dtb(info)) {
             /* Place the DTB after the initrd in memory. Note that some
              * kernels will trash anything in the 4K page the initrd
              * ends in, so make sure the DTB isn't caught up in that.
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 98e0958a77..9ee21e726a 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -37,6 +37,7 @@
 #define IRQ_OFFSET 32 /* pic interrupts start from index 32 */
 
 #define MPCORE_PERIPHBASE 0xF8F00000
+#define ZYNQ_BOARD_MIDR 0x413FC090
 
 static const int dma_irqs[8] = {
     46, 47, 48, 49, 72, 73, 74, 75
@@ -125,6 +126,12 @@ static void zynq_init(QEMUMachineInitArgs *args)
 
     cpu = ARM_CPU(object_new(object_class_get_name(cpu_oc)));
 
+    object_property_set_int(OBJECT(cpu), ZYNQ_BOARD_MIDR, "midr", &err);
+    if (err) {
+        error_report("%s", error_get_pretty(err));
+        exit(1);
+    }
+
     object_property_set_int(OBJECT(cpu), MPCORE_PERIPHBASE, "reset-cbar", &err);
     if (err) {
         error_report("%s", error_get_pretty(err));
diff --git a/hw/cris/Makefile.objs b/hw/cris/Makefile.objs
index 776db7c5cd..7624173f77 100644
--- a/hw/cris/Makefile.objs
+++ b/hw/cris/Makefile.objs
@@ -1,3 +1,2 @@
-obj-y += pic_cpu.o
 obj-y += boot.o
 obj-y += axis_dev88.o
diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c
index 55240886f5..645e45ccdf 100644
--- a/hw/cris/axis_dev88.c
+++ b/hw/cris/axis_dev88.c
@@ -254,7 +254,7 @@ void axisdev88_init(QEMUMachineInitArgs *args)
     DeviceState *dev;
     SysBusDevice *s;
     DriveInfo *nand;
-    qemu_irq irq[30], nmi[2], *cpu_irq;
+    qemu_irq irq[30], nmi[2];
     void *etraxfs_dmac;
     struct etraxfs_dma_client *dma_eth;
     int i;
@@ -296,15 +296,14 @@ void axisdev88_init(QEMUMachineInitArgs *args)
                                 &gpio_state.iomem);
 
 
-    cpu_irq = cris_pic_init_cpu(env);
     dev = qdev_create(NULL, "etraxfs,pic");
     /* FIXME: Is there a proper way to signal vectors to the CPU core?  */
     qdev_prop_set_ptr(dev, "interrupt_vector", &env->interrupt_vector);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
     sysbus_mmio_map(s, 0, 0x3001c000);
-    sysbus_connect_irq(s, 0, cpu_irq[0]);
-    sysbus_connect_irq(s, 1, cpu_irq[1]);
+    sysbus_connect_irq(s, 0, qdev_get_gpio_in(DEVICE(cpu), CRIS_CPU_IRQ));
+    sysbus_connect_irq(s, 1, qdev_get_gpio_in(DEVICE(cpu), CRIS_CPU_NMI));
     for (i = 0; i < 30; i++) {
         irq[i] = qdev_get_gpio_in(dev, i);
     }
diff --git a/hw/cris/pic_cpu.c b/hw/cris/pic_cpu.c
deleted file mode 100644
index bd47bf1a5d..0000000000
--- a/hw/cris/pic_cpu.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * QEMU CRIS CPU interrupt wrapper logic.
- *
- * Copyright (c) 2009 Edgar E. Iglesias, Axis Communications AB.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "hw/sysbus.h"
-#include "hw/hw.h"
-#include "hw/cris/etraxfs.h"
-
-#define D(x)
-
-static void cris_pic_cpu_handler(void *opaque, int irq, int level)
-{
-    CRISCPU *cpu = opaque;
-    CPUState *cs = CPU(cpu);
-    int type = irq ? CPU_INTERRUPT_NMI : CPU_INTERRUPT_HARD;
-
-    if (level) {
-        cpu_interrupt(cs, type);
-    } else {
-        cpu_reset_interrupt(cs, type);
-    }
-}
-
-qemu_irq *cris_pic_init_cpu(CPUCRISState *env)
-{
-    return qemu_allocate_irqs(cris_pic_cpu_handler, cris_env_get_cpu(env), 2);
-}
diff --git a/hw/display/blizzard_template.h b/hw/display/blizzard_template.h
index a8a8899478..b7ef27c808 100644
--- a/hw/display/blizzard_template.h
+++ b/hw/display/blizzard_template.h
@@ -18,25 +18,35 @@
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#define SKIP_PIXEL(to)		to += deststep
+#define SKIP_PIXEL(to)         (to += deststep)
 #if DEPTH == 8
-# define PIXEL_TYPE		uint8_t
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	*to ++ = from
+# define PIXEL_TYPE            uint8_t
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
+# define COPY_PIXEL1(to, from) (*to++ = from)
 #elif DEPTH == 15 || DEPTH == 16
-# define PIXEL_TYPE		uint16_t
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	*to ++ = from
+# define PIXEL_TYPE            uint16_t
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
+# define COPY_PIXEL1(to, from) (*to++ = from)
 #elif DEPTH == 24
-# define PIXEL_TYPE		uint8_t
-# define COPY_PIXEL(to, from)	\
-    to[0] = from; to[1] = (from) >> 8; to[2] = (from) >> 16; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	\
-    *to ++ = from; *to ++ = (from) >> 8; *to ++ = (from) >> 16
+# define PIXEL_TYPE            uint8_t
+# define COPY_PIXEL(to, from) \
+    do {                      \
+        to[0] = from;         \
+        to[1] = (from) >> 8;  \
+        to[2] = (from) >> 16; \
+        SKIP_PIXEL(to);       \
+    } while (0)
+
+# define COPY_PIXEL1(to, from) \
+    do {                       \
+        *to++ = from;          \
+        *to++ = (from) >> 8;   \
+        *to++ = (from) >> 16;  \
+    } while (0)
 #elif DEPTH == 32
-# define PIXEL_TYPE		uint32_t
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
-# define COPY_PIXEL1(to, from)	*to ++ = from
+# define PIXEL_TYPE            uint32_t
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
+# define COPY_PIXEL1(to, from) (*to++ = from)
 #else
 # error unknown bit depth
 #endif
diff --git a/hw/display/pl110_template.h b/hw/display/pl110_template.h
index e738e4a241..36ba791c6f 100644
--- a/hw/display/pl110_template.h
+++ b/hw/display/pl110_template.h
@@ -14,12 +14,16 @@
 #if BITS == 8
 #define COPY_PIXEL(to, from) *(to++) = from
 #elif BITS == 15 || BITS == 16
-#define COPY_PIXEL(to, from) *(uint16_t *)to = from; to += 2;
+#define COPY_PIXEL(to, from) do { *(uint16_t *)to = from; to += 2; } while (0)
 #elif BITS == 24
-#define COPY_PIXEL(to, from) \
-  *(to++) = from; *(to++) = (from) >> 8; *(to++) = (from) >> 16
+#define COPY_PIXEL(to, from)    \
+    do {                        \
+        *(to++) = from;         \
+        *(to++) = (from) >> 8;  \
+        *(to++) = (from) >> 16; \
+    } while (0)
 #elif BITS == 32
-#define COPY_PIXEL(to, from) *(uint32_t *)to = from; to += 4;
+#define COPY_PIXEL(to, from) do { *(uint32_t *)to = from; to += 4; } while (0)
 #else
 #error unknown bit depth
 #endif
diff --git a/hw/display/pxa2xx_template.h b/hw/display/pxa2xx_template.h
index 1cbe36cb80..c64eebc4b6 100644
--- a/hw/display/pxa2xx_template.h
+++ b/hw/display/pxa2xx_template.h
@@ -11,14 +11,26 @@
 
 # define SKIP_PIXEL(to)		to += deststep
 #if BITS == 8
-# define COPY_PIXEL(to, from)	*to = from; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)  do { *to = from; SKIP_PIXEL(to); } while (0)
 #elif BITS == 15 || BITS == 16
-# define COPY_PIXEL(to, from)	*(uint16_t *) to = from; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)    \
+    do {                         \
+        *(uint16_t *) to = from; \
+        SKIP_PIXEL(to);          \
+    } while (0)
 #elif BITS == 24
-# define COPY_PIXEL(to, from)	\
-	*(uint16_t *) to = from; *(to + 2) = (from) >> 16; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)     \
+    do {                          \
+        *(uint16_t *) to = from;  \
+        *(to + 2) = (from) >> 16; \
+        SKIP_PIXEL(to);           \
+    } while (0)
 #elif BITS == 32
-# define COPY_PIXEL(to, from)	*(uint32_t *) to = from; SKIP_PIXEL(to)
+# define COPY_PIXEL(to, from)    \
+    do {                         \
+        *(uint32_t *) to = from; \
+        SKIP_PIXEL(to);          \
+    } while (0)
 #else
 # error unknown bit depth
 #endif
diff --git a/hw/display/tc6393xb_template.h b/hw/display/tc6393xb_template.h
index 154aafd400..78629c07f9 100644
--- a/hw/display/tc6393xb_template.h
+++ b/hw/display/tc6393xb_template.h
@@ -22,14 +22,18 @@
  */
 
 #if BITS == 8
-# define SET_PIXEL(addr, color)	*(uint8_t*)addr = color;
+# define SET_PIXEL(addr, color)  (*(uint8_t *)addr = color)
 #elif BITS == 15 || BITS == 16
-# define SET_PIXEL(addr, color)	*(uint16_t*)addr = color;
+# define SET_PIXEL(addr, color)  (*(uint16_t *)addr = color)
 #elif BITS == 24
-# define SET_PIXEL(addr, color)	\
-    addr[0] = color; addr[1] = (color) >> 8; addr[2] = (color) >> 16;
+# define SET_PIXEL(addr, color)  \
+    do {                         \
+        addr[0] = color;         \
+        addr[1] = (color) >> 8;  \
+        addr[2] = (color) >> 16; \
+    } while (0)
 #elif BITS == 32
-# define SET_PIXEL(addr, color)	*(uint32_t*)addr = color;
+# define SET_PIXEL(addr, color)  (*(uint32_t *)addr = color)
 #else
 # error unknown bit depth
 #endif
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 9409684ce8..1c4a1143af 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -380,8 +380,10 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0x100) * 8 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
-        if (irq < 16)
-          value = 0xff;
+        if (irq < GIC_NR_SGIS) {
+            value = 0xff;
+        }
+
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
                 int mask =
@@ -406,8 +408,10 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0x180) * 8 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
-        if (irq < 16)
-          value = 0;
+        if (irq < GIC_NR_SGIS) {
+            value = 0;
+        }
+
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
                 int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
@@ -423,8 +427,9 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0x200) * 8 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
-        if (irq < 16)
-          irq = 0;
+        if (irq < GIC_NR_SGIS) {
+            value = 0;
+        }
 
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
@@ -436,6 +441,10 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0x280) * 8 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
+        if (irq < GIC_NR_SGIS) {
+            value = 0;
+        }
+
         for (i = 0; i < 8; i++) {
             /* ??? This currently clears the pending bit for all CPUs, even
                for per-CPU interrupts.  It's unclear whether this is the
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 9aecaa82bc..8db182fa3d 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -135,12 +135,18 @@ enum {
 
 struct VFIOGroup;
 
+typedef struct VFIOType1 {
+    MemoryListener listener;
+    int error;
+    bool initialized;
+} VFIOType1;
+
 typedef struct VFIOContainer {
     int fd; /* /dev/vfio/vfio, empowered by the attached groups */
     struct {
         /* enable abstraction to support various iommu backends */
         union {
-            MemoryListener listener; /* Used by type1 iommu */
+            VFIOType1 type1;
         };
         void (*release)(struct VFIOContainer *);
     } iommu_data;
@@ -191,6 +197,7 @@ typedef struct VFIODevice {
     bool has_flr;
     bool has_pm_reset;
     bool needs_reset;
+    bool rom_read_failed;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -592,7 +599,7 @@ static void vfio_msi_interrupt(void *opaque)
         return;
     }
 
-#ifdef VFIO_DEBUG
+#ifdef DEBUG_VFIO
     MSIMessage msg;
 
     if (vdev->interrupt == VFIO_INT_MSIX) {
@@ -1125,6 +1132,14 @@ static void vfio_pci_load_rom(VFIODevice *vdev)
     vdev->rom_offset = reg_info.offset;
 
     if (!vdev->rom_size) {
+        vdev->rom_read_failed = true;
+        error_report("vfio-pci: Cannot read device rom at "
+                    "%04x:%02x:%02x.%x\n",
+                    vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                    vdev->host.function);
+        error_printf("Device option ROM contents are probably invalid "
+                    "(check dmesg).\nSkip option ROM probe with rombar=0, "
+                    "or load from file with romfile=\n");
         return;
     }
 
@@ -1156,6 +1171,9 @@ static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
     /* Load the ROM lazily when the guest tries to read it */
     if (unlikely(!vdev->rom)) {
         vfio_pci_load_rom(vdev);
+        if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
+            vfio_pci_load_rom(vdev);
+        }
     }
 
     memcpy(&val, vdev->rom + addr,
@@ -1223,6 +1241,7 @@ static void vfio_pci_size_rom(VFIODevice *vdev)
                      PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
 
     vdev->pdev.has_rom = true;
+    vdev->rom_read_failed = false;
 }
 
 static void vfio_vga_write(void *opaque, hwaddr addr,
@@ -1968,6 +1987,7 @@ static void vfio_vga_quirk_teardown(VFIODevice *vdev)
         while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
             memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
+            memory_region_destroy(&quirk->mem);
             QLIST_REMOVE(quirk, next);
             g_free(quirk);
         }
@@ -1990,6 +2010,7 @@ static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr)
     while (!QLIST_EMPTY(&bar->quirks)) {
         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
         memory_region_del_subregion(&bar->mem, &quirk->mem);
+        memory_region_destroy(&quirk->mem);
         QLIST_REMOVE(quirk, next);
         g_free(quirk);
     }
@@ -2141,14 +2162,21 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
 
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
-    return !memory_region_is_ram(section->mr);
+    return !memory_region_is_ram(section->mr) ||
+           /*
+            * Sizing an enabled 64-bit BAR can cause spurious mappings to
+            * addresses in the upper part of the 64-bit address space.  These
+            * are never accessed by the CPU and beyond the address width of
+            * some IOMMU hardware.  TODO: VFIO should tell us the IOMMU width.
+            */
+           section->offset_within_address_space & (1ULL << 63);
 }
 
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.listener);
+                                            iommu_data.type1.listener);
     hwaddr iova, end;
     void *vaddr;
     int ret;
@@ -2190,6 +2218,19 @@ static void vfio_listener_region_add(MemoryListener *listener,
         error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                      "0x%"HWADDR_PRIx", %p) = %d (%m)",
                      container, iova, end - iova, vaddr, ret);
+
+        /*
+         * On the initfn path, store the first error in the container so we
+         * can gracefully fail.  Runtime, there's not much we can do other
+         * than throw a hardware error.
+         */
+        if (!container->iommu_data.type1.initialized) {
+            if (!container->iommu_data.type1.error) {
+                container->iommu_data.type1.error = ret;
+            }
+        } else {
+            hw_error("vfio: DMA mapping failed, unable to continue\n");
+        }
     }
 }
 
@@ -2197,7 +2238,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer,
-                                            iommu_data.listener);
+                                            iommu_data.type1.listener);
     hwaddr iova, end;
     int ret;
 
@@ -2242,7 +2283,7 @@ static MemoryListener vfio_memory_listener = {
 
 static void vfio_listener_release(VFIOContainer *container)
 {
-    memory_listener_unregister(&container->iommu_data.listener);
+    memory_listener_unregister(&container->iommu_data.type1.listener);
 }
 
 /*
@@ -2412,10 +2453,12 @@ static void vfio_unmap_bar(VFIODevice *vdev, int nr)
 
     memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
     munmap(bar->mmap, memory_region_size(&bar->mmap_mem));
+    memory_region_destroy(&bar->mmap_mem);
 
     if (vdev->msix && vdev->msix->table_bar == nr) {
         memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
         munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
+        memory_region_destroy(&vdev->msix->mmap_mem);
     }
 
     memory_region_destroy(&bar->mem);
@@ -2501,7 +2544,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr)
      * potentially insert a direct-mapped subregion before and after it.
      */
     if (vdev->msix && vdev->msix->table_bar == nr) {
-        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
+        size = vdev->msix->table_offset & qemu_host_page_mask;
     }
 
     strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
@@ -2513,8 +2556,8 @@ static void vfio_map_bar(VFIODevice *vdev, int nr)
     if (vdev->msix && vdev->msix->table_bar == nr) {
         unsigned start;
 
-        start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
-                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+        start = HOST_PAGE_ALIGN(vdev->msix->table_offset +
+                                (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
 
         size = start < bar->size ? bar->size - start : 0;
         strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
@@ -3212,10 +3255,23 @@ static int vfio_connect_container(VFIOGroup *group)
             return -errno;
         }
 
-        container->iommu_data.listener = vfio_memory_listener;
+        container->iommu_data.type1.listener = vfio_memory_listener;
         container->iommu_data.release = vfio_listener_release;
 
-        memory_listener_register(&container->iommu_data.listener, &address_space_memory);
+        memory_listener_register(&container->iommu_data.type1.listener,
+                                 &address_space_memory);
+
+        if (container->iommu_data.type1.error) {
+            ret = container->iommu_data.type1.error;
+            vfio_listener_release(container);
+            g_free(container);
+            close(fd);
+            error_report("vfio: memory listener initialization failed for container\n");
+            return ret;
+        }
+
+        container->iommu_data.type1.initialized = true;
+
     } else {
         error_report("vfio: No available IOMMU models");
         g_free(container);
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 77e1218447..1ba6c3ab70 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -3,6 +3,7 @@ obj-y += s390-virtio-hcall.o
 obj-y += sclp.o
 obj-y += event-facility.o
 obj-y += sclpquiesce.o
+obj-y += sclpcpu.o
 obj-y += ipl.o
 obj-y += css.o
 obj-y += s390-virtio-ccw.o
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index 25951a020a..a73c0b924a 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -32,6 +32,8 @@ struct SCLPEventFacility {
     unsigned int receive_mask;
 };
 
+SCLPEvent cpu_hotplug;
+
 /* return true if any child has event pending set */
 static bool event_pending(SCLPEventFacility *ef)
 {
@@ -335,6 +337,10 @@ static int init_event_facility(S390SCLPDevice *sdev)
     }
     qdev_init_nofail(quiesce);
 
+    object_initialize(&cpu_hotplug, sizeof(cpu_hotplug), TYPE_SCLP_CPU_HOTPLUG);
+    qdev_set_parent_bus(DEVICE(&cpu_hotplug), BUS(&event_facility->sbus));
+    object_property_set_bool(OBJECT(&cpu_hotplug), true, "realized", NULL);
+
     return 0;
 }
 
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 86d6ae0023..4e0c564c5c 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -15,6 +15,7 @@
 #include "cpu.h"
 #include "sysemu/kvm.h"
 #include "exec/memory.h"
+#include "sysemu/sysemu.h"
 
 #include "hw/s390x/sclp.h"
 
@@ -31,7 +32,26 @@ static inline S390SCLPDevice *get_event_facility(void)
 static void read_SCP_info(SCCB *sccb)
 {
     ReadInfo *read_info = (ReadInfo *) sccb;
+    CPUState *cpu;
     int shift = 0;
+    int cpu_count = 0;
+    int i = 0;
+
+    CPU_FOREACH(cpu) {
+        cpu_count++;
+    }
+
+    /* CPU information */
+    read_info->entries_cpu = cpu_to_be16(cpu_count);
+    read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries));
+    read_info->highest_cpu = cpu_to_be16(max_cpus);
+
+    for (i = 0; i < cpu_count; i++) {
+        read_info->entries[i].address = i;
+        read_info->entries[i].type = 0;
+    }
+
+    read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO);
 
     while ((ram_size >> (20 + shift)) > 65535) {
         shift++;
@@ -41,15 +61,46 @@ static void read_SCP_info(SCCB *sccb)
     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
 }
 
+/* Provide information about the CPU */
+static void sclp_read_cpu_info(SCCB *sccb)
+{
+    ReadCpuInfo *cpu_info = (ReadCpuInfo *) sccb;
+    CPUState *cpu;
+    int cpu_count = 0;
+    int i = 0;
+
+    CPU_FOREACH(cpu) {
+        cpu_count++;
+    }
+
+    cpu_info->nr_configured = cpu_to_be16(cpu_count);
+    cpu_info->offset_configured = cpu_to_be16(offsetof(ReadCpuInfo, entries));
+    cpu_info->nr_standby = cpu_to_be16(0);
+
+    /* The standby offset is 16-byte for each CPU */
+    cpu_info->offset_standby = cpu_to_be16(cpu_info->offset_configured
+        + cpu_info->nr_configured*sizeof(CPUEntry));
+
+    for (i = 0; i < cpu_count; i++) {
+        cpu_info->entries[i].address = i;
+        cpu_info->entries[i].type = 0;
+    }
+
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
+}
+
 static void sclp_execute(SCCB *sccb, uint64_t code)
 {
     S390SCLPDevice *sdev = get_event_facility();
 
-    switch (code) {
+    switch (code & SCLP_CMD_CODE_MASK) {
     case SCLP_CMDW_READ_SCP_INFO:
     case SCLP_CMDW_READ_SCP_INFO_FORCED:
         read_SCP_info(sccb);
         break;
+    case SCLP_CMDW_READ_CPU_INFO:
+        sclp_read_cpu_info(sccb);
+        break;
     default:
         sdev->sclp_command_handler(sdev->ef, sccb, code);
         break;
diff --git a/hw/s390x/sclpcpu.c b/hw/s390x/sclpcpu.c
new file mode 100644
index 0000000000..b9c238a0da
--- /dev/null
+++ b/hw/s390x/sclpcpu.c
@@ -0,0 +1,112 @@
+/*
+ * SCLP event type
+ *    Signal CPU - Trigger SCLP interrupt for system CPU configure or
+ *    de-configure
+ *
+ * Copyright IBM, Corp. 2013
+ *
+ * Authors:
+ *  Thang Pham <thang.pham@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+#include "sysemu/sysemu.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/event-facility.h"
+#include "cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+
+typedef struct ConfigMgtData {
+    EventBufferHeader ebh;
+    uint8_t reserved;
+    uint8_t event_qualifier;
+} QEMU_PACKED ConfigMgtData;
+
+static qemu_irq irq_cpu_hotplug; /* Only used in this file */
+
+#define EVENT_QUAL_CPU_CHANGE  1
+
+void raise_irq_cpu_hotplug(void)
+{
+    qemu_irq_raise(irq_cpu_hotplug);
+}
+
+static unsigned int send_mask(void)
+{
+    return SCLP_EVENT_MASK_CONFIG_MGT_DATA;
+}
+
+static unsigned int receive_mask(void)
+{
+    return 0;
+}
+
+static int read_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr,
+                           int *slen)
+{
+    ConfigMgtData *cdata = (ConfigMgtData *) evt_buf_hdr;
+    if (*slen < sizeof(ConfigMgtData)) {
+        return 0;
+    }
+
+    /* Event is no longer pending */
+    if (!event->event_pending) {
+        return 0;
+    }
+    event->event_pending = false;
+
+    /* Event header data */
+    cdata->ebh.length = cpu_to_be16(sizeof(ConfigMgtData));
+    cdata->ebh.type = SCLP_EVENT_CONFIG_MGT_DATA;
+    cdata->ebh.flags |= SCLP_EVENT_BUFFER_ACCEPTED;
+
+    /* Trigger a rescan of CPUs by setting event qualifier */
+    cdata->event_qualifier = EVENT_QUAL_CPU_CHANGE;
+    *slen -= sizeof(ConfigMgtData);
+
+    return 1;
+}
+
+static void trigger_signal(void *opaque, int n, int level)
+{
+    SCLPEvent *event = opaque;
+    event->event_pending = true;
+
+    /* Trigger SCLP read operation */
+    sclp_service_interrupt(0);
+}
+
+static int irq_cpu_hotplug_init(SCLPEvent *event)
+{
+    irq_cpu_hotplug = *qemu_allocate_irqs(trigger_signal, event, 1);
+    return 0;
+}
+
+static void cpu_class_init(ObjectClass *oc, void *data)
+{
+    SCLPEventClass *k = SCLP_EVENT_CLASS(oc);
+
+    k->init = irq_cpu_hotplug_init;
+    k->get_send_mask = send_mask;
+    k->get_receive_mask = receive_mask;
+    k->read_event_data = read_event_data;
+    k->write_event_data = NULL;
+}
+
+static const TypeInfo sclp_cpu_info = {
+    .name          = "sclp-cpu-hotplug",
+    .parent        = TYPE_SCLP_EVENT,
+    .instance_size = sizeof(SCLPEvent),
+    .class_init    = cpu_class_init,
+    .class_size    = sizeof(SCLPEventClass),
+};
+
+static void sclp_cpu_register_types(void)
+{
+    type_register_static(&sclp_cpu_info);
+}
+
+type_init(sclp_cpu_register_types)
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 755fdee628..a16e3bc52e 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -15,6 +15,7 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-rng.h"
 #include "sysemu/rng.h"
+#include "qom/object_interfaces.h"
 
 static bool is_guest_ready(VirtIORNG *vrng)
 {
@@ -148,6 +149,14 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
     if (vrng->conf.rng == NULL) {
         vrng->conf.default_backend = RNG_RANDOM(object_new(TYPE_RNG_RANDOM));
 
+        user_creatable_complete(OBJECT(vrng->conf.default_backend),
+                                &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            object_unref(OBJECT(vrng->conf.default_backend));
+            return;
+        }
+
         object_property_add_child(OBJECT(dev),
                                   "default-backend",
                                   OBJECT(vrng->conf.default_backend),
@@ -166,12 +175,6 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    rng_backend_open(vrng->rng, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
     vrng->vq = virtio_add_queue(vdev, 8, handle_input);
 
     assert(vrng->conf.max_bytes <= INT64_MAX);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index ea90b649d4..3b03cbfcf8 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -81,6 +81,7 @@ void cpu_gen_init(void);
 int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
                  int *gen_code_size_ptr);
 bool cpu_restore_state(CPUArchState *env, uintptr_t searched_pc);
+void page_size_init(void);
 
 void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc);
 void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr);
diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h
index ab30559c79..73a6134c1e 100644
--- a/include/hw/cris/etraxfs.h
+++ b/include/hw/cris/etraxfs.h
@@ -28,8 +28,6 @@
 #include "net/net.h"
 #include "hw/cris/etraxfs_dma.h"
 
-qemu_irq *cris_pic_init_cpu(CPUCRISState *env);
-
 /* Instantiate an ETRAXFS Ethernet MAC.  */
 static inline DeviceState *
 etraxfs_eth_init(NICInfo *nd, hwaddr base, int phyaddr,
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 0d232dfb67..8a2aa00cee 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -27,6 +27,7 @@
 #define GIC_MAXIRQ 1020
 /* First 32 are private to each CPU (SGIs and PPIs). */
 #define GIC_INTERNAL 32
+#define GIC_NR_SGIS 16
 /* Maximum number of possible CPU interfaces, determined by GIC architecture */
 #define GIC_NCPU 8
 
diff --git a/include/hw/s390x/event-facility.h b/include/hw/s390x/event-facility.h
index 7ce7079f9f..870edd46f6 100644
--- a/include/hw/s390x/event-facility.h
+++ b/include/hw/s390x/event-facility.h
@@ -17,10 +17,12 @@
 
 #include <hw/qdev.h>
 #include "qemu/thread.h"
+#include "hw/s390x/sclp.h"
 
 /* SCLP event types */
 #define SCLP_EVENT_OPRTNS_COMMAND               0x01
 #define SCLP_EVENT_MESSAGE                      0x02
+#define SCLP_EVENT_CONFIG_MGT_DATA              0x04
 #define SCLP_EVENT_PMSGCMD                      0x09
 #define SCLP_EVENT_ASCII_CONSOLE_DATA           0x1a
 #define SCLP_EVENT_SIGNAL_QUIESCE               0x1d
@@ -28,6 +30,7 @@
 /* SCLP event masks */
 #define SCLP_EVENT_MASK_SIGNAL_QUIESCE          0x00000008
 #define SCLP_EVENT_MASK_MSG_ASCII               0x00000040
+#define SCLP_EVENT_MASK_CONFIG_MGT_DATA         0x10000000
 #define SCLP_EVENT_MASK_OP_CMD                  0x80000000
 #define SCLP_EVENT_MASK_MSG                     0x40000000
 #define SCLP_EVENT_MASK_PMSGCMD                 0x00800000
@@ -43,6 +46,8 @@
 #define SCLP_EVENT_GET_CLASS(obj) \
      OBJECT_GET_CLASS(SCLPEventClass, (obj), TYPE_SCLP_EVENT)
 
+#define TYPE_SCLP_CPU_HOTPLUG "sclp-cpu-hotplug"
+
 typedef struct WriteEventMask {
     SCCBHeader h;
     uint16_t _reserved;
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index 231a38aa09..35112d92b1 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -17,21 +17,41 @@
 #include <hw/sysbus.h>
 #include <hw/qdev.h>
 
+#define SCLP_CMD_CODE_MASK                      0xffff00ff
+
 /* SCLP command codes */
 #define SCLP_CMDW_READ_SCP_INFO                 0x00020001
 #define SCLP_CMDW_READ_SCP_INFO_FORCED          0x00120001
+#define SCLP_READ_STORAGE_ELEMENT_INFO          0x00040001
+#define SCLP_ATTACH_STORAGE_ELEMENT             0x00080001
+#define SCLP_ASSIGN_STORAGE                     0x000D0001
+#define SCLP_UNASSIGN_STORAGE                   0x000C0001
 #define SCLP_CMD_READ_EVENT_DATA                0x00770005
 #define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
 #define SCLP_CMD_READ_EVENT_DATA                0x00770005
 #define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
 #define SCLP_CMD_WRITE_EVENT_MASK               0x00780005
 
+/* SCLP Memory hotplug codes */
+#define SCLP_FC_ASSIGN_ATTACH_READ_STOR         0xE00000000000ULL
+#define SCLP_STARTING_SUBINCREMENT_ID           0x10001
+#define SCLP_INCREMENT_UNIT                     0x10000
+#define MAX_AVAIL_SLOTS                         32
+
+/* CPU hotplug SCLP codes */
+#define SCLP_HAS_CPU_INFO                       0x0C00000000000000ULL
+#define SCLP_CMDW_READ_CPU_INFO                 0x00010001
+#define SCLP_CMDW_CONFIGURE_CPU                 0x00110001
+#define SCLP_CMDW_DECONFIGURE_CPU               0x00100001
+
 /* SCLP response codes */
 #define SCLP_RC_NORMAL_READ_COMPLETION          0x0010
 #define SCLP_RC_NORMAL_COMPLETION               0x0020
+#define SCLP_RC_SCCB_BOUNDARY_VIOLATION         0x0100
 #define SCLP_RC_INVALID_SCLP_COMMAND            0x01f0
 #define SCLP_RC_CONTAINED_EQUIPMENT_CHECK       0x0340
 #define SCLP_RC_INSUFFICIENT_SCCB_LENGTH        0x0300
+#define SCLP_RC_STANDBY_READ_COMPLETION         0x0410
 #define SCLP_RC_INVALID_FUNCTION                0x40f0
 #define SCLP_RC_NO_EVENT_BUFFERS_STORED         0x60f0
 #define SCLP_RC_INVALID_SELECTION_MASK          0x70f0
@@ -71,12 +91,66 @@ typedef struct SCCBHeader {
 
 #define SCCB_DATA_LEN (SCCB_SIZE - sizeof(SCCBHeader))
 
+/* CPU information */
+typedef struct CPUEntry {
+    uint8_t address;
+    uint8_t reserved0[13];
+    uint8_t type;
+    uint8_t reserved1;
+} QEMU_PACKED CPUEntry;
+
 typedef struct ReadInfo {
     SCCBHeader h;
     uint16_t rnmax;
     uint8_t rnsize;
+    uint8_t  _reserved1[16 - 11];       /* 11-15 */
+    uint16_t entries_cpu;               /* 16-17 */
+    uint16_t offset_cpu;                /* 18-19 */
+    uint8_t  _reserved2[24 - 20];       /* 20-23 */
+    uint8_t  loadparm[8];               /* 24-31 */
+    uint8_t  _reserved3[48 - 32];       /* 32-47 */
+    uint64_t facilities;                /* 48-55 */
+    uint8_t  _reserved0[100 - 56];
+    uint32_t rnsize2;
+    uint64_t rnmax2;
+    uint8_t  _reserved4[120-112];       /* 112-119 */
+    uint16_t highest_cpu;
+    uint8_t  _reserved5[128 - 122];     /* 122-127 */
+    struct CPUEntry entries[0];
 } QEMU_PACKED ReadInfo;
 
+typedef struct ReadCpuInfo {
+    SCCBHeader h;
+    uint16_t nr_configured;         /* 8-9 */
+    uint16_t offset_configured;     /* 10-11 */
+    uint16_t nr_standby;            /* 12-13 */
+    uint16_t offset_standby;        /* 14-15 */
+    uint8_t reserved0[24-16];       /* 16-23 */
+    struct CPUEntry entries[0];
+} QEMU_PACKED ReadCpuInfo;
+
+typedef struct ReadStorageElementInfo {
+    SCCBHeader h;
+    uint16_t max_id;
+    uint16_t assigned;
+    uint16_t standby;
+    uint8_t _reserved0[16 - 14]; /* 14-15 */
+    uint32_t entries[0];
+} QEMU_PACKED ReadStorageElementInfo;
+
+typedef struct AttachStorageElement {
+    SCCBHeader h;
+    uint8_t _reserved0[10 - 8];  /* 8-9 */
+    uint16_t assigned;
+    uint8_t _reserved1[16 - 12]; /* 12-15 */
+    uint32_t entries[0];
+} QEMU_PACKED AttachStorageElement;
+
+typedef struct AssignStorage {
+    SCCBHeader h;
+    uint16_t rn;
+} QEMU_PACKED AssignStorage;
+
 typedef struct SCCB {
     SCCBHeader h;
     char data[SCCB_DATA_LEN];
@@ -114,5 +188,6 @@ typedef struct S390SCLPDeviceClass {
 
 void s390_sclp_init(void);
 void sclp_service_interrupt(uint32_t sccb);
+void raise_irq_cpu_hotplug(void);
 
 #endif
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
new file mode 100644
index 0000000000..b7922833e1
--- /dev/null
+++ b/include/qom/object_interfaces.h
@@ -0,0 +1,62 @@
+#ifndef OBJECT_INTERFACES_H
+#define OBJECT_INTERFACES_H
+
+#include "qom/object.h"
+
+#define TYPE_USER_CREATABLE "user-creatable"
+
+#define USER_CREATABLE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(UserCreatableClass, (klass), \
+                        TYPE_USER_CREATABLE)
+#define USER_CREATABLE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(UserCreatableClass, (obj), \
+                      TYPE_USER_CREATABLE)
+#define USER_CREATABLE(obj) \
+     INTERFACE_CHECK(UserCreatable, (obj), \
+                     TYPE_USER_CREATABLE)
+
+
+typedef struct UserCreatable {
+    /* <private> */
+    Object Parent;
+} UserCreatable;
+
+/**
+ * UserCreatableClass:
+ * @parent_class: the base class
+ * @complete: callback to be called after @obj's properties are set.
+ *
+ * Interface is designed to work with -object/object-add/object_add
+ * commands.
+ * Interface is mandatory for objects that are designed to be user
+ * creatable (i.e. -object/object-add/object_add, will accept only
+ * objects that inherit this interface).
+ *
+ * Interface also provides an optional ability to do the second
+ * stage * initialization of the object after its properties were
+ * set.
+ *
+ * For objects created without using -object/object-add/object_add,
+ * @user_creatable_complete() wrapper should be called manually if
+ * object's type implements USER_CREATABLE interface and needs
+ * complete() callback to be called.
+ */
+typedef struct UserCreatableClass {
+    /* <private> */
+    InterfaceClass parent_class;
+
+    /* <public> */
+    void (*complete)(UserCreatable *uc, Error **errp);
+} UserCreatableClass;
+
+/**
+ * user_creatable_complete:
+ * @obj: the object whose complete() method is called if defined
+ * @errp: if an error occurs, a pointer to an area to store the error
+ *
+ * Wrapper to call complete() method if one of types it's inherited
+ * from implements USER_CREATABLE interface, otherwise the call does
+ * nothing.
+ */
+void user_creatable_complete(Object *obj, Error **errp);
+#endif
diff --git a/include/sysemu/rng.h b/include/sysemu/rng.h
index 7637fac52d..0a27c9b88c 100644
--- a/include/sysemu/rng.h
+++ b/include/sysemu/rng.h
@@ -79,15 +79,4 @@ void rng_backend_request_entropy(RngBackend *s, size_t size,
  * to stop tracking any request.
  */
 void rng_backend_cancel_requests(RngBackend *s);
-
-/**
- * rng_backend_open:
- * @s: the backend to open
- * @errp: a pointer to return the #Error object if an error occurs.
- *
- * This function will open the backend if it is not already open.  Calling this
- * function on an already opened backend will not result in an error.
- */
-void rng_backend_open(RngBackend *s, Error **errp);
-
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index a3fb8de268..9588feab99 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1360,6 +1360,7 @@ int kvm_init(void)
      * page size for the system though.
      */
     assert(TARGET_PAGE_SIZE <= getpagesize());
+    page_size_init();
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
     QTAILQ_INIT(&s->kvm_sw_breakpoints);
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 01d7c393df..82e8592546 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3659,7 +3659,7 @@ struct target_sigcontext {
 struct target_signal_frame {
         struct target_sigcontext sc;
         uint32_t extramask[TARGET_NSIG_WORDS - 1];
-        uint8_t retcode[8];       /* Trampoline code. */
+        uint16_t retcode[4];      /* Trampoline code. */
 };
 
 struct rt_signal_frame {
@@ -3667,7 +3667,7 @@ struct rt_signal_frame {
         void *puc;
         siginfo_t info;
         struct ucontext uc;
-        uint8_t retcode[8];       /* Trampoline code. */
+        uint16_t retcode[4];      /* Trampoline code. */
 };
 
 static void setup_sigcontext(struct target_sigcontext *sc, CPUCRISState *env)
@@ -3745,8 +3745,8 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 	 */
 	err |= __put_user(0x9c5f, frame->retcode+0);
 	err |= __put_user(TARGET_NR_sigreturn, 
-			  frame->retcode+2);
-	err |= __put_user(0xe93d, frame->retcode+4);
+			  frame->retcode + 1);
+	err |= __put_user(0xe93d, frame->retcode + 2);
 
 	/* Save the mask.  */
 	err |= __put_user(set->sig[0], &frame->sc.oldmask);
diff --git a/monitor.c b/monitor.c
index 80456fbe5b..cba56bc4b7 100644
--- a/monitor.c
+++ b/monitor.c
@@ -288,8 +288,8 @@ void monitor_flush(Monitor *mon)
 
     if (len && !mon->mux_out) {
         rc = qemu_chr_fe_write(mon->chr, (const uint8_t *) buf, len);
-        if (rc == len) {
-            /* all flushed */
+        if ((rc < 0 && errno != EAGAIN) || (rc == len)) {
+            /* all flushed or error */
             QDECREF(mon->outbuf);
             mon->outbuf = qstring_new();
             return;
diff --git a/pc-bios/QEMU,tcx.bin b/pc-bios/QEMU,tcx.bin
index a8ddd70ef3..eed108f3f1 100644
--- a/pc-bios/QEMU,tcx.bin
+++ b/pc-bios/QEMU,tcx.bin
diff --git a/pc-bios/README b/pc-bios/README
index a110125a97..f1900686dc 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -12,7 +12,7 @@
   1275-1994 (referred to as Open Firmware) compliant firmware.
   The included images for PowerPC (for 32 and 64 bit PPC CPUs),
   Sparc32 (including QEMU,tcx.bin) and Sparc64 are built from OpenBIOS SVN
-  revision 1229.
+  revision 1246.
 
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 550273a5ef..f4a3a396c4 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 01105fc904..bb7cdfb4ec 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 62c9e77983..46b4fddd08 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
diff --git a/qmp.c b/qmp.c
index 0f46171aad..d0d98e777b 100644
--- a/qmp.c
+++ b/qmp.c
@@ -27,6 +27,7 @@
 #include "qapi/qmp/qobject.h"
 #include "qapi/qmp-input-visitor.h"
 #include "hw/boards.h"
+#include "qom/object_interfaces.h"
 
 NameInfo *qmp_query_name(Error **errp)
 {
@@ -549,15 +550,28 @@ void object_add(const char *type, const char *id, const QDict *qdict,
         for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
             object_property_set(obj, v, e->key, &local_err);
             if (local_err) {
-                error_propagate(errp, local_err);
-                object_unref(obj);
-                return;
+                goto out;
             }
         }
     }
 
+    if (!object_dynamic_cast(obj, TYPE_USER_CREATABLE)) {
+        error_setg(&local_err, "object '%s' isn't supported by object-add",
+                   id);
+        goto out;
+    }
+
+    user_creatable_complete(obj, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
     object_property_add_child(container_get(object_get_root(), "/objects"),
-                              id, obj, errp);
+                              id, obj, &local_err);
+out:
+    if (local_err) {
+        error_propagate(errp, local_err);
+    }
     object_unref(obj);
 }
 
diff --git a/qom/Makefile.objs b/qom/Makefile.objs
index 6a93ac7398..985003bd03 100644
--- a/qom/Makefile.objs
+++ b/qom/Makefile.objs
@@ -1,2 +1,3 @@
 common-obj-y = object.o container.o qom-qobject.o
 common-obj-y += cpu.o
+common-obj-y += object_interfaces.o
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
new file mode 100644
index 0000000000..6360818397
--- /dev/null
+++ b/qom/object_interfaces.c
@@ -0,0 +1,32 @@
+#include "qom/object_interfaces.h"
+#include "qemu/module.h"
+
+void user_creatable_complete(Object *obj, Error **errp)
+{
+
+    UserCreatableClass *ucc;
+    UserCreatable *uc =
+        (UserCreatable *)object_dynamic_cast(obj, TYPE_USER_CREATABLE);
+
+    if (!uc) {
+        return;
+    }
+
+    ucc = USER_CREATABLE_GET_CLASS(uc);
+    if (ucc->complete) {
+        ucc->complete(uc, errp);
+    }
+}
+
+static void register_types(void)
+{
+    static const TypeInfo uc_interface_info = {
+        .name          = TYPE_USER_CREATABLE,
+        .parent        = TYPE_INTERFACE,
+        .class_size = sizeof(UserCreatableClass),
+    };
+
+    type_register_static(&uc_interface_info);
+}
+
+type_init(register_types)
diff --git a/roms/openbios b/roms/openbios
-Subproject d363cf50c50c268da7e6d0bf707adde1893d1ab
+Subproject 888126272f92294b0da45158393f1b862742cf6
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 52efd5d66f..45ad7f0260 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -982,6 +982,7 @@ static const ARMCPUInfo arm_cpus[] = {
 
 static Property arm_cpu_properties[] = {
     DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
+    DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 198b6b8d4e..383c58221e 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -496,6 +496,8 @@ enum arm_fprounding {
     FPROUNDING_ODD
 };
 
+int arm_rmode_to_sf(int rmode);
+
 enum arm_cpu_mode {
   ARM_CPU_MODE_USR = 0x10,
   ARM_CPU_MODE_FIQ = 0x11,
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 4ce0d01a85..6ca958afb1 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -122,3 +122,34 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
 {
     return float_rel_to_flags(float64_compare(x, y, fp_status));
 }
+
+uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
+                          uint32_t rn, uint32_t numregs)
+{
+    /* Helper function for SIMD TBL and TBX. We have to do the table
+     * lookup part for the 64 bits worth of indices we're passed in.
+     * result is the initial results vector (either zeroes for TBL
+     * or some guest values for TBX), rn the register number where
+     * the table starts, and numregs the number of registers in the table.
+     * We return the results of the lookups.
+     */
+    int shift;
+
+    for (shift = 0; shift < 64; shift += 8) {
+        int index = extract64(indices, shift, 8);
+        if (index < 16 * numregs) {
+            /* Convert index (a byte offset into the virtual table
+             * which is a series of 128-bit vectors concatenated)
+             * into the correct vfp.regs[] element plus a bit offset
+             * into that element, bearing in mind that the table
+             * can wrap around from V31 to V0.
+             */
+            int elt = (rn * 2 + (index >> 3)) % 64;
+            int bitidx = (index & 7) * 8;
+            uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8);
+
+            result = deposit64(result, shift, 8, val);
+        }
+    }
+    return result;
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index bca19f3dea..99832ee55e 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -26,3 +26,4 @@ DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
 DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
 DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
+DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index c708f15e27..ca5b0000ad 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -4048,6 +4048,23 @@ uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env)
     return prev_rmode;
 }
 
+/* Set the current fp rounding mode in the standard fp status and return
+ * the old one. This is for NEON instructions that need to change the
+ * rounding mode but wish to use the standard FPSCR values for everything
+ * else. Always set the rounding mode back to the correct value after
+ * modifying it.
+ * The argument is a softfloat float_round_ value.
+ */
+uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
+{
+    float_status *fp_status = &env->vfp.standard_fp_status;
+
+    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
+    set_float_rounding_mode(rmode, fp_status);
+
+    return prev_rmode;
+}
+
 /* Half precision conversions.  */
 static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s)
 {
@@ -4418,3 +4435,31 @@ float64 HELPER(rintd)(float64 x, void *fp_status)
 
     return ret;
 }
+
+/* Convert ARM rounding mode to softfloat */
+int arm_rmode_to_sf(int rmode)
+{
+    switch (rmode) {
+    case FPROUNDING_TIEAWAY:
+        rmode = float_round_ties_away;
+        break;
+    case FPROUNDING_ODD:
+        /* FIXME: add support for TIEAWAY and ODD */
+        qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
+                      rmode);
+    case FPROUNDING_TIEEVEN:
+    default:
+        rmode = float_round_nearest_even;
+        break;
+    case FPROUNDING_POSINF:
+        rmode = float_round_up;
+        break;
+    case FPROUNDING_NEGINF:
+        rmode = float_round_down;
+        break;
+    case FPROUNDING_ZERO:
+        rmode = float_round_to_zero;
+        break;
+    }
+    return rmode;
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 70872dffc6..71b8411120 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -149,6 +149,7 @@ DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
 DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
 
 DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env)
+DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
 
 DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
 DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index cf80c46b90..6c1ec1edc6 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -61,6 +61,20 @@ enum a64_shift_type {
     A64_SHIFT_TYPE_ROR = 3
 };
 
+/* Table based decoder typedefs - used when the relevant bits for decode
+ * are too awkwardly scattered across the instruction (eg SIMD).
+ */
+typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
+
+typedef struct AArch64DecodeTable {
+    uint32_t pattern;
+    uint32_t mask;
+    AArch64DecodeFn *disas_fn;
+} AArch64DecodeTable;
+
+/* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
+
 /* initialize TCG globals.  */
 void a64_translate_init(void)
 {
@@ -308,6 +322,28 @@ static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
     return v;
 }
 
+/* Return the offset into CPUARMState of an element of specified
+ * size, 'element' places in from the least significant end of
+ * the FP/vector register Qn.
+ */
+static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
+{
+    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
+#ifdef HOST_WORDS_BIGENDIAN
+    /* This is complicated slightly because vfp.regs[2n] is
+     * still the low half and  vfp.regs[2n+1] the high half
+     * of the 128 bit vector, even on big endian systems.
+     * Calculate the offset assuming a fully bigendian 128 bits,
+     * then XOR to account for the order of the two 64 bit halves.
+     */
+    offs += (16 - ((element + 1) * (1 << size)));
+    offs ^= 8;
+#else
+    offs += element * (1 << size);
+#endif
+    return offs;
+}
+
 /* Return the offset into CPUARMState of a slice (from
  * the least significant end) of FP register Qn (ie
  * Dn, Sn, Hn or Bn).
@@ -661,6 +697,156 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 }
 
 /*
+ * Vector load/store helpers.
+ *
+ * The principal difference between this and a FP load is that we don't
+ * zero extend as we are filling a partial chunk of the vector register.
+ * These functions don't support 128 bit loads/stores, which would be
+ * normal load/store operations.
+ *
+ * The _i32 versions are useful when operating on 32 bit quantities
+ * (eg for floating point single or using Neon helper functions).
+ */
+
+/* Get value of an element within a vector register */
+static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
+                             int element, TCGMemOp memop)
+{
+    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+    switch (memop) {
+    case MO_8:
+        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_16:
+        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_32:
+        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_8|MO_SIGN:
+        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_16|MO_SIGN:
+        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_32|MO_SIGN:
+        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_64:
+    case MO_64|MO_SIGN:
+        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
+                                 int element, TCGMemOp memop)
+{
+    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+    switch (memop) {
+    case MO_8:
+        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_16:
+        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_8|MO_SIGN:
+        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_16|MO_SIGN:
+        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
+        break;
+    case MO_32:
+    case MO_32|MO_SIGN:
+        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* Set value of an element within a vector register */
+static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
+                              int element, TCGMemOp memop)
+{
+    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+    switch (memop) {
+    case MO_8:
+        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
+        break;
+    case MO_16:
+        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
+        break;
+    case MO_32:
+        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
+        break;
+    case MO_64:
+        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
+                                  int destidx, int element, TCGMemOp memop)
+{
+    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+    switch (memop) {
+    case MO_8:
+        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
+        break;
+    case MO_16:
+        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
+        break;
+    case MO_32:
+        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* Clear the high 64 bits of a 128 bit vector (in general non-quad
+ * vector ops all need to do this).
+ */
+static void clear_vec_high(DisasContext *s, int rd)
+{
+    TCGv_i64 tcg_zero = tcg_const_i64(0);
+
+    write_vec_element(s, tcg_zero, rd, 1, MO_64);
+    tcg_temp_free_i64(tcg_zero);
+}
+
+/* Store from vector register to memory */
+static void do_vec_st(DisasContext *s, int srcidx, int element,
+                      TCGv_i64 tcg_addr, int size)
+{
+    TCGMemOp memop = MO_TE + size;
+    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+    read_vec_element(s, tcg_tmp, srcidx, element, size);
+    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+
+    tcg_temp_free_i64(tcg_tmp);
+}
+
+/* Load from memory to vector register */
+static void do_vec_ld(DisasContext *s, int destidx, int element,
+                      TCGv_i64 tcg_addr, int size)
+{
+    TCGMemOp memop = MO_TE + size;
+    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
+    write_vec_element(s, tcg_tmp, destidx, element, size);
+
+    tcg_temp_free_i64(tcg_tmp);
+}
+
+/*
  * This utility function is for doing register extension with an
  * optional shift. You will likely want to pass a temporary for the
  * destination register. See DecodeRegExtend() in the ARM ARM.
@@ -722,6 +908,31 @@ static inline void gen_check_sp_alignment(DisasContext *s)
 }
 
 /*
+ * This provides a simple table based table lookup decoder. It is
+ * intended to be used when the relevant bits for decode are too
+ * awkwardly placed and switch/if based logic would be confusing and
+ * deeply nested. Since it's a linear search through the table, tables
+ * should be kept small.
+ *
+ * It returns the first handler where insn & mask == pattern, or
+ * NULL if there is no match.
+ * The table is terminated by an empty mask (i.e. 0)
+ */
+static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
+                                               uint32_t insn)
+{
+    const AArch64DecodeTable *tptr = table;
+
+    while (tptr->mask) {
+        if ((insn & tptr->mask) == tptr->pattern) {
+            return tptr->disas_fn;
+        }
+        tptr++;
+    }
+    return NULL;
+}
+
+/*
  * the instruction disassembly implemented here matches
  * the instruction encoding classifications in chapter 3 (C3)
  * of the ARM Architecture Reference Manual (DDI0487A_a)
@@ -1835,16 +2046,278 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
     }
 }
 
-/* AdvSIMD load/store multiple structures */
+/* C3.3.1 AdvSIMD load/store multiple structures
+ *
+ *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
+ * +---+---+---------------+---+-------------+--------+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
+ * +---+---+---------------+---+-------------+--------+------+------+------+
+ *
+ * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
+ *
+ *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
+ * +---+---+---------------+---+---+---------+--------+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
+ * +---+---+---------------+---+---+---------+--------+------+------+------+
+ *
+ * Rt: first (or only) SIMD&FP register to be transferred
+ * Rn: base address or SP
+ * Rm (post-index only): post-index register (when !31) or size dependent #imm
+ */
 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int size = extract32(insn, 10, 2);
+    int opcode = extract32(insn, 12, 4);
+    bool is_store = !extract32(insn, 22, 1);
+    bool is_postidx = extract32(insn, 23, 1);
+    bool is_q = extract32(insn, 30, 1);
+    TCGv_i64 tcg_addr, tcg_rn;
+
+    int ebytes = 1 << size;
+    int elements = (is_q ? 128 : 64) / (8 << size);
+    int rpt;    /* num iterations */
+    int selem;  /* structure elements */
+    int r;
+
+    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* From the shared decode logic */
+    switch (opcode) {
+    case 0x0:
+        rpt = 1;
+        selem = 4;
+        break;
+    case 0x2:
+        rpt = 4;
+        selem = 1;
+        break;
+    case 0x4:
+        rpt = 1;
+        selem = 3;
+        break;
+    case 0x6:
+        rpt = 3;
+        selem = 1;
+        break;
+    case 0x7:
+        rpt = 1;
+        selem = 1;
+        break;
+    case 0x8:
+        rpt = 1;
+        selem = 2;
+        break;
+    case 0xa:
+        rpt = 2;
+        selem = 1;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (size == 3 && !is_q && selem != 1) {
+        /* reserved */
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+
+    tcg_rn = cpu_reg_sp(s, rn);
+    tcg_addr = tcg_temp_new_i64();
+    tcg_gen_mov_i64(tcg_addr, tcg_rn);
+
+    for (r = 0; r < rpt; r++) {
+        int e;
+        for (e = 0; e < elements; e++) {
+            int tt = (rt + r) % 32;
+            int xs;
+            for (xs = 0; xs < selem; xs++) {
+                if (is_store) {
+                    do_vec_st(s, tt, e, tcg_addr, size);
+                } else {
+                    do_vec_ld(s, tt, e, tcg_addr, size);
+
+                    /* For non-quad operations, setting a slice of the low
+                     * 64 bits of the register clears the high 64 bits (in
+                     * the ARM ARM pseudocode this is implicit in the fact
+                     * that 'rval' is a 64 bit wide variable). We optimize
+                     * by noticing that we only need to do this the first
+                     * time we touch a register.
+                     */
+                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
+                        clear_vec_high(s, tt);
+                    }
+                }
+                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
+                tt = (tt + 1) % 32;
+            }
+        }
+    }
+
+    if (is_postidx) {
+        int rm = extract32(insn, 16, 5);
+        if (rm == 31) {
+            tcg_gen_mov_i64(tcg_rn, tcg_addr);
+        } else {
+            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+        }
+    }
+    tcg_temp_free_i64(tcg_addr);
 }
 
-/* AdvSIMD load/store single structure */
+/* C3.3.3 AdvSIMD load/store single structure
+ *
+ *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ *
+ * C3.3.4 AdvSIMD load/store single structure (post-indexed)
+ *
+ *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
+ * +---+---+---------------+-----+-----------+-----+---+------+------+------+
+ *
+ * Rt: first (or only) SIMD&FP register to be transferred
+ * Rn: base address or SP
+ * Rm (post-index only): post-index register (when !31) or size dependent #imm
+ * index = encoded in Q:S:size dependent on size
+ *
+ * lane_size = encoded in R, opc
+ * transfer width = encoded in opc, S, size
+ */
 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
 {
-    unsupported_encoding(s, insn);
+    int rt = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int size = extract32(insn, 10, 2);
+    int S = extract32(insn, 12, 1);
+    int opc = extract32(insn, 13, 3);
+    int R = extract32(insn, 21, 1);
+    int is_load = extract32(insn, 22, 1);
+    int is_postidx = extract32(insn, 23, 1);
+    int is_q = extract32(insn, 30, 1);
+
+    int scale = extract32(opc, 1, 2);
+    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
+    bool replicate = false;
+    int index = is_q << 3 | S << 2 | size;
+    int ebytes, xs;
+    TCGv_i64 tcg_addr, tcg_rn;
+
+    switch (scale) {
+    case 3:
+        if (!is_load || S) {
+            unallocated_encoding(s);
+            return;
+        }
+        scale = size;
+        replicate = true;
+        break;
+    case 0:
+        break;
+    case 1:
+        if (extract32(size, 0, 1)) {
+            unallocated_encoding(s);
+            return;
+        }
+        index >>= 1;
+        break;
+    case 2:
+        if (extract32(size, 1, 1)) {
+            unallocated_encoding(s);
+            return;
+        }
+        if (!extract32(size, 0, 1)) {
+            index >>= 2;
+        } else {
+            if (S) {
+                unallocated_encoding(s);
+                return;
+            }
+            index >>= 3;
+            scale = 3;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    ebytes = 1 << scale;
+
+    if (rn == 31) {
+        gen_check_sp_alignment(s);
+    }
+
+    tcg_rn = cpu_reg_sp(s, rn);
+    tcg_addr = tcg_temp_new_i64();
+    tcg_gen_mov_i64(tcg_addr, tcg_rn);
+
+    for (xs = 0; xs < selem; xs++) {
+        if (replicate) {
+            /* Load and replicate to all elements */
+            uint64_t mulconst;
+            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
+                                get_mem_index(s), MO_TE + scale);
+            switch (scale) {
+            case 0:
+                mulconst = 0x0101010101010101ULL;
+                break;
+            case 1:
+                mulconst = 0x0001000100010001ULL;
+                break;
+            case 2:
+                mulconst = 0x0000000100000001ULL;
+                break;
+            case 3:
+                mulconst = 0;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+            if (mulconst) {
+                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
+            }
+            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
+            if (is_q) {
+                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
+            } else {
+                clear_vec_high(s, rt);
+            }
+            tcg_temp_free_i64(tcg_tmp);
+        } else {
+            /* Load/store one element per register */
+            if (is_load) {
+                do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
+            } else {
+                do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
+            }
+        }
+        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
+        rt = (rt + 1) % 32;
+    }
+
+    if (is_postidx) {
+        int rm = extract32(insn, 16, 5);
+        if (rm == 31) {
+            tcg_gen_mov_i64(tcg_rn, tcg_addr);
+        } else {
+            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+        }
+    }
+    tcg_temp_free_i64(tcg_addr);
 }
 
 /* C3.3 Loads and stores */
@@ -3186,34 +3659,6 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Convert ARM rounding mode to softfloat */
-static inline int arm_rmode_to_sf(int rmode)
-{
-    switch (rmode) {
-    case FPROUNDING_TIEAWAY:
-        rmode = float_round_ties_away;
-        break;
-    case FPROUNDING_ODD:
-        /* FIXME: add support for TIEAWAY and ODD */
-        qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
-                      rmode);
-    case FPROUNDING_TIEEVEN:
-    default:
-        rmode = float_round_nearest_even;
-        break;
-    case FPROUNDING_POSINF:
-        rmode = float_round_up;
-        break;
-    case FPROUNDING_NEGINF:
-        rmode = float_round_down;
-        break;
-    case FPROUNDING_ZERO:
-        rmode = float_round_to_zero;
-        break;
-    }
-    return rmode;
-}
-
 static void handle_fp_compare(DisasContext *s, bool is_double,
                               unsigned int rn, unsigned int rm,
                               bool cmp_with_zero, bool signal_all_nans)
@@ -4224,13 +4669,2201 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
     }
 }
 
+static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
+                     int pos)
+{
+    /* Extract 64 bits from the middle of two concatenated 64 bit
+     * vector register slices left:right. The extracted bits start
+     * at 'pos' bits into the right (least significant) side.
+     * We return the result in tcg_right, and guarantee not to
+     * trash tcg_left.
+     */
+    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+    assert(pos > 0 && pos < 64);
+
+    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
+    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
+    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
+
+    tcg_temp_free_i64(tcg_tmp);
+}
+
+/* C3.6.1 EXT
+ *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
+ * +---+---+-------------+-----+---+------+---+------+---+------+------+
+ * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
+ * +---+---+-------------+-----+---+------+---+------+---+------+------+
+ */
+static void disas_simd_ext(DisasContext *s, uint32_t insn)
+{
+    int is_q = extract32(insn, 30, 1);
+    int op2 = extract32(insn, 22, 2);
+    int imm4 = extract32(insn, 11, 4);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    int pos = imm4 << 3;
+    TCGv_i64 tcg_resl, tcg_resh;
+
+    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_resh = tcg_temp_new_i64();
+    tcg_resl = tcg_temp_new_i64();
+
+    /* Vd gets bits starting at pos bits into Vm:Vn. This is
+     * either extracting 128 bits from a 128:128 concatenation, or
+     * extracting 64 bits from a 64:64 concatenation.
+     */
+    if (!is_q) {
+        read_vec_element(s, tcg_resl, rn, 0, MO_64);
+        if (pos != 0) {
+            read_vec_element(s, tcg_resh, rm, 0, MO_64);
+            do_ext64(s, tcg_resh, tcg_resl, pos);
+        }
+        tcg_gen_movi_i64(tcg_resh, 0);
+    } else {
+        TCGv_i64 tcg_hh;
+        typedef struct {
+            int reg;
+            int elt;
+        } EltPosns;
+        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
+        EltPosns *elt = eltposns;
+
+        if (pos >= 64) {
+            elt++;
+            pos -= 64;
+        }
+
+        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
+        elt++;
+        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
+        elt++;
+        if (pos != 0) {
+            do_ext64(s, tcg_resh, tcg_resl, pos);
+            tcg_hh = tcg_temp_new_i64();
+            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
+            do_ext64(s, tcg_hh, tcg_resh, pos);
+            tcg_temp_free_i64(tcg_hh);
+        }
+    }
+
+    write_vec_element(s, tcg_resl, rd, 0, MO_64);
+    tcg_temp_free_i64(tcg_resl);
+    write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    tcg_temp_free_i64(tcg_resh);
+}
+
+/* C3.6.2 TBL/TBX
+ *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
+ * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
+ * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
+ */
+static void disas_simd_tb(DisasContext *s, uint32_t insn)
+{
+    int op2 = extract32(insn, 22, 2);
+    int is_q = extract32(insn, 30, 1);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    int is_tblx = extract32(insn, 12, 1);
+    int len = extract32(insn, 13, 2);
+    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
+    TCGv_i32 tcg_regno, tcg_numregs;
+
+    if (op2 != 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* This does a table lookup: for every byte element in the input
+     * we index into a table formed from up to four vector registers,
+     * and then the output is the result of the lookups. Our helper
+     * function does the lookup operation for a single 64 bit part of
+     * the input.
+     */
+    tcg_resl = tcg_temp_new_i64();
+    tcg_resh = tcg_temp_new_i64();
+
+    if (is_tblx) {
+        read_vec_element(s, tcg_resl, rd, 0, MO_64);
+    } else {
+        tcg_gen_movi_i64(tcg_resl, 0);
+    }
+    if (is_tblx && is_q) {
+        read_vec_element(s, tcg_resh, rd, 1, MO_64);
+    } else {
+        tcg_gen_movi_i64(tcg_resh, 0);
+    }
+
+    tcg_idx = tcg_temp_new_i64();
+    tcg_regno = tcg_const_i32(rn);
+    tcg_numregs = tcg_const_i32(len + 1);
+    read_vec_element(s, tcg_idx, rm, 0, MO_64);
+    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
+                        tcg_regno, tcg_numregs);
+    if (is_q) {
+        read_vec_element(s, tcg_idx, rm, 1, MO_64);
+        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
+                            tcg_regno, tcg_numregs);
+    }
+    tcg_temp_free_i64(tcg_idx);
+    tcg_temp_free_i32(tcg_regno);
+    tcg_temp_free_i32(tcg_numregs);
+
+    write_vec_element(s, tcg_resl, rd, 0, MO_64);
+    tcg_temp_free_i64(tcg_resl);
+    write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    tcg_temp_free_i64(tcg_resh);
+}
+
+/* C3.6.3 ZIP/UZP/TRN
+ *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
+ * +---+---+-------------+------+---+------+---+------------------+------+
+ * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
+ * +---+---+-------------+------+---+------+---+------------------+------+
+ */
+static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int rm = extract32(insn, 16, 5);
+    int size = extract32(insn, 22, 2);
+    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
+     * bit 2 indicates 1 vs 2 variant of the insn.
+     */
+    int opcode = extract32(insn, 12, 2);
+    bool part = extract32(insn, 14, 1);
+    bool is_q = extract32(insn, 30, 1);
+    int esize = 8 << size;
+    int i, ofs;
+    int datasize = is_q ? 128 : 64;
+    int elements = datasize / esize;
+    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
+
+    if (opcode == 0 || (size == 3 && !is_q)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_resl = tcg_const_i64(0);
+    tcg_resh = tcg_const_i64(0);
+    tcg_res = tcg_temp_new_i64();
+
+    for (i = 0; i < elements; i++) {
+        switch (opcode) {
+        case 1: /* UZP1/2 */
+        {
+            int midpoint = elements / 2;
+            if (i < midpoint) {
+                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
+            } else {
+                read_vec_element(s, tcg_res, rm,
+                                 2 * (i - midpoint) + part, size);
+            }
+            break;
+        }
+        case 2: /* TRN1/2 */
+            if (i & 1) {
+                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
+            } else {
+                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
+            }
+            break;
+        case 3: /* ZIP1/2 */
+        {
+            int base = part * elements / 2;
+            if (i & 1) {
+                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
+            } else {
+                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
+            }
+            break;
+        }
+        default:
+            g_assert_not_reached();
+        }
+
+        ofs = i * esize;
+        if (ofs < 64) {
+            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
+            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
+        } else {
+            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
+            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
+        }
+    }
+
+    tcg_temp_free_i64(tcg_res);
+
+    write_vec_element(s, tcg_resl, rd, 0, MO_64);
+    tcg_temp_free_i64(tcg_resl);
+    write_vec_element(s, tcg_resh, rd, 1, MO_64);
+    tcg_temp_free_i64(tcg_resh);
+}
+
+static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
+                        int opc, bool is_min, TCGv_ptr fpst)
+{
+    /* Helper function for disas_simd_across_lanes: do a single precision
+     * min/max operation on the specified two inputs,
+     * and return the result in tcg_elt1.
+     */
+    if (opc == 0xc) {
+        if (is_min) {
+            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+        } else {
+            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+        }
+    } else {
+        assert(opc == 0xf);
+        if (is_min) {
+            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+        } else {
+            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+        }
+    }
+}
+
+/* C3.6.4 AdvSIMD across lanes
+ *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 5);
+    bool is_q = extract32(insn, 30, 1);
+    bool is_u = extract32(insn, 29, 1);
+    bool is_fp = false;
+    bool is_min = false;
+    int esize;
+    int elements;
+    int i;
+    TCGv_i64 tcg_res, tcg_elt;
+
+    switch (opcode) {
+    case 0x1b: /* ADDV */
+        if (is_u) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* fall through */
+    case 0x3: /* SADDLV, UADDLV */
+    case 0xa: /* SMAXV, UMAXV */
+    case 0x1a: /* SMINV, UMINV */
+        if (size == 3 || (size == 2 && !is_q)) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    case 0xc: /* FMAXNMV, FMINNMV */
+    case 0xf: /* FMAXV, FMINV */
+        if (!is_u || !is_q || extract32(size, 0, 1)) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* Bit 1 of size field encodes min vs max, and actual size is always
+         * 32 bits: adjust the size variable so following code can rely on it
+         */
+        is_min = extract32(size, 1, 1);
+        is_fp = true;
+        size = 2;
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    esize = 8 << size;
+    elements = (is_q ? 128 : 64) / esize;
+
+    tcg_res = tcg_temp_new_i64();
+    tcg_elt = tcg_temp_new_i64();
+
+    /* These instructions operate across all lanes of a vector
+     * to produce a single result. We can guarantee that a 64
+     * bit intermediate is sufficient:
+     *  + for [US]ADDLV the maximum element size is 32 bits, and
+     *    the result type is 64 bits
+     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
+     *    same as the element size, which is 32 bits at most
+     * For the integer operations we can choose to work at 64
+     * or 32 bits and truncate at the end; for simplicity
+     * we use 64 bits always. The floating point
+     * ops do require 32 bit intermediates, though.
+     */
+    if (!is_fp) {
+        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
+
+        for (i = 1; i < elements; i++) {
+            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
+
+            switch (opcode) {
+            case 0x03: /* SADDLV / UADDLV */
+            case 0x1b: /* ADDV */
+                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
+                break;
+            case 0x0a: /* SMAXV / UMAXV */
+                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
+                                    tcg_res,
+                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
+                break;
+            case 0x1a: /* SMINV / UMINV */
+                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
+                                    tcg_res,
+                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
+                break;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+        }
+    } else {
+        /* Floating point ops which work on 32 bit (single) intermediates.
+         * Note that correct NaN propagation requires that we do these
+         * operations in exactly the order specified by the pseudocode.
+         */
+        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
+        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
+        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
+        TCGv_ptr fpst = get_fpstatus_ptr();
+
+        assert(esize == 32);
+        assert(elements == 4);
+
+        read_vec_element(s, tcg_elt, rn, 0, MO_32);
+        tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt);
+        read_vec_element(s, tcg_elt, rn, 1, MO_32);
+        tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
+
+        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
+
+        read_vec_element(s, tcg_elt, rn, 2, MO_32);
+        tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
+        read_vec_element(s, tcg_elt, rn, 3, MO_32);
+        tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt);
+
+        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
+
+        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
+
+        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
+        tcg_temp_free_i32(tcg_elt1);
+        tcg_temp_free_i32(tcg_elt2);
+        tcg_temp_free_i32(tcg_elt3);
+        tcg_temp_free_ptr(fpst);
+    }
+
+    tcg_temp_free_i64(tcg_elt);
+
+    /* Now truncate the result to the width required for the final output */
+    if (opcode == 0x03) {
+        /* SADDLV, UADDLV: result is 2*esize */
+        size++;
+    }
+
+    switch (size) {
+    case 0:
+        tcg_gen_ext8u_i64(tcg_res, tcg_res);
+        break;
+    case 1:
+        tcg_gen_ext16u_i64(tcg_res, tcg_res);
+        break;
+    case 2:
+        tcg_gen_ext32u_i64(tcg_res, tcg_res);
+        break;
+    case 3:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    write_fp_dreg(s, rd, tcg_res);
+    tcg_temp_free_i64(tcg_res);
+}
+
+/* C6.3.31 DUP (Element, Vector)
+ *
+ *  31  30   29              21 20    16 15        10  9    5 4    0
+ * +---+---+-------------------+--------+-------------+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
+ * +---+---+-------------------+--------+-------------+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ */
+static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
+                             int imm5)
+{
+    int size = ctz32(imm5);
+    int esize = 8 << size;
+    int elements = (is_q ? 128 : 64) / esize;
+    int index, i;
+    TCGv_i64 tmp;
+
+    if (size > 3 || (size == 3 && !is_q)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    index = imm5 >> (size + 1);
+
+    tmp = tcg_temp_new_i64();
+    read_vec_element(s, tmp, rn, index, size);
+
+    for (i = 0; i < elements; i++) {
+        write_vec_element(s, tmp, rd, i, size);
+    }
+
+    if (!is_q) {
+        clear_vec_high(s, rd);
+    }
+
+    tcg_temp_free_i64(tmp);
+}
+
+/* C6.3.31 DUP (element, scalar)
+ *  31                   21 20    16 15        10  9    5 4    0
+ * +-----------------------+--------+-------------+------+------+
+ * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
+ * +-----------------------+--------+-------------+------+------+
+ */
+static void handle_simd_dupes(DisasContext *s, int rd, int rn,
+                              int imm5)
+{
+    int size = ctz32(imm5);
+    int index;
+    TCGv_i64 tmp;
+
+    if (size > 3) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    index = imm5 >> (size + 1);
+
+    /* This instruction just extracts the specified element and
+     * zero-extends it into the bottom of the destination register.
+     */
+    tmp = tcg_temp_new_i64();
+    read_vec_element(s, tmp, rn, index, size);
+    write_fp_dreg(s, rd, tmp);
+    tcg_temp_free_i64(tmp);
+}
+
+/* C6.3.32 DUP (General)
+ *
+ *  31  30   29              21 20    16 15        10  9    5 4    0
+ * +---+---+-------------------+--------+-------------+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
+ * +---+---+-------------------+--------+-------------+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ */
+static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
+                             int imm5)
+{
+    int size = ctz32(imm5);
+    int esize = 8 << size;
+    int elements = (is_q ? 128 : 64)/esize;
+    int i = 0;
+
+    if (size > 3 || ((size == 3) && !is_q)) {
+        unallocated_encoding(s);
+        return;
+    }
+    for (i = 0; i < elements; i++) {
+        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
+    }
+    if (!is_q) {
+        clear_vec_high(s, rd);
+    }
+}
+
+/* C6.3.150 INS (Element)
+ *
+ *  31                   21 20    16 15  14    11  10 9    5 4    0
+ * +-----------------------+--------+------------+---+------+------+
+ * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
+ * +-----------------------+--------+------------+---+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ * index: encoded in imm5<4:size+1>
+ */
+static void handle_simd_inse(DisasContext *s, int rd, int rn,
+                             int imm4, int imm5)
+{
+    int size = ctz32(imm5);
+    int src_index, dst_index;
+    TCGv_i64 tmp;
+
+    if (size > 3) {
+        unallocated_encoding(s);
+        return;
+    }
+    dst_index = extract32(imm5, 1+size, 5);
+    src_index = extract32(imm4, size, 4);
+
+    tmp = tcg_temp_new_i64();
+
+    read_vec_element(s, tmp, rn, src_index, size);
+    write_vec_element(s, tmp, rd, dst_index, size);
+
+    tcg_temp_free_i64(tmp);
+}
+
+
+/* C6.3.151 INS (General)
+ *
+ *  31                   21 20    16 15        10  9    5 4    0
+ * +-----------------------+--------+-------------+------+------+
+ * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
+ * +-----------------------+--------+-------------+------+------+
+ *
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ * index: encoded in imm5<4:size+1>
+ */
+static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
+{
+    int size = ctz32(imm5);
+    int idx;
+
+    if (size > 3) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    idx = extract32(imm5, 1 + size, 4 - size);
+    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
+}
+
+/*
+ * C6.3.321 UMOV (General)
+ * C6.3.237 SMOV (General)
+ *
+ *  31  30   29              21 20    16 15    12   10 9    5 4    0
+ * +---+---+-------------------+--------+-------------+------+------+
+ * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
+ * +---+---+-------------------+--------+-------------+------+------+
+ *
+ * U: unsigned when set
+ * size: encoded in imm5 (see ARM ARM LowestSetBit())
+ */
+static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
+                                  int rn, int rd, int imm5)
+{
+    int size = ctz32(imm5);
+    int element;
+    TCGv_i64 tcg_rd;
+
+    /* Check for UnallocatedEncodings */
+    if (is_signed) {
+        if (size > 2 || (size == 2 && !is_q)) {
+            unallocated_encoding(s);
+            return;
+        }
+    } else {
+        if (size > 3
+            || (size < 3 && is_q)
+            || (size == 3 && !is_q)) {
+            unallocated_encoding(s);
+            return;
+        }
+    }
+    element = extract32(imm5, 1+size, 4);
+
+    tcg_rd = cpu_reg(s, rd);
+    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
+    if (is_signed && !is_q) {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+    }
+}
+
+/* C3.6.5 AdvSIMD copy
+ *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
+ * +---+---+----+-----------------+------+---+------+---+------+------+
+ * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
+ * +---+---+----+-----------------+------+---+------+---+------+------+
+ */
+static void disas_simd_copy(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int imm4 = extract32(insn, 11, 4);
+    int op = extract32(insn, 29, 1);
+    int is_q = extract32(insn, 30, 1);
+    int imm5 = extract32(insn, 16, 5);
+
+    if (op) {
+        if (is_q) {
+            /* INS (element) */
+            handle_simd_inse(s, rd, rn, imm4, imm5);
+        } else {
+            unallocated_encoding(s);
+        }
+    } else {
+        switch (imm4) {
+        case 0:
+            /* DUP (element - vector) */
+            handle_simd_dupe(s, is_q, rd, rn, imm5);
+            break;
+        case 1:
+            /* DUP (general) */
+            handle_simd_dupg(s, is_q, rd, rn, imm5);
+            break;
+        case 3:
+            if (is_q) {
+                /* INS (general) */
+                handle_simd_insg(s, rd, rn, imm5);
+            } else {
+                unallocated_encoding(s);
+            }
+            break;
+        case 5:
+        case 7:
+            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
+            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
+            break;
+        default:
+            unallocated_encoding(s);
+            break;
+        }
+    }
+}
+
+/* C3.6.6 AdvSIMD modified immediate
+ *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
+ * +---+---+----+---------------------+-----+-------+----+---+-------+------+
+ * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
+ * +---+---+----+---------------------+-----+-------+----+---+-------+------+
+ *
+ * There are a number of operations that can be carried out here:
+ *   MOVI - move (shifted) imm into register
+ *   MVNI - move inverted (shifted) imm into register
+ *   ORR  - bitwise OR of (shifted) imm with register
+ *   BIC  - bitwise clear of (shifted) imm with register
+ */
+static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int cmode = extract32(insn, 12, 4);
+    int cmode_3_1 = extract32(cmode, 1, 3);
+    int cmode_0 = extract32(cmode, 0, 1);
+    int o2 = extract32(insn, 11, 1);
+    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
+    bool is_neg = extract32(insn, 29, 1);
+    bool is_q = extract32(insn, 30, 1);
+    uint64_t imm = 0;
+    TCGv_i64 tcg_rd, tcg_imm;
+    int i;
+
+    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* See AdvSIMDExpandImm() in ARM ARM */
+    switch (cmode_3_1) {
+    case 0: /* Replicate(Zeros(24):imm8, 2) */
+    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
+    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
+    case 3: /* Replicate(imm8:Zeros(24), 2) */
+    {
+        int shift = cmode_3_1 * 8;
+        imm = bitfield_replicate(abcdefgh << shift, 32);
+        break;
+    }
+    case 4: /* Replicate(Zeros(8):imm8, 4) */
+    case 5: /* Replicate(imm8:Zeros(8), 4) */
+    {
+        int shift = (cmode_3_1 & 0x1) * 8;
+        imm = bitfield_replicate(abcdefgh << shift, 16);
+        break;
+    }
+    case 6:
+        if (cmode_0) {
+            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
+            imm = (abcdefgh << 16) | 0xffff;
+        } else {
+            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
+            imm = (abcdefgh << 8) | 0xff;
+        }
+        imm = bitfield_replicate(imm, 32);
+        break;
+    case 7:
+        if (!cmode_0 && !is_neg) {
+            imm = bitfield_replicate(abcdefgh, 8);
+        } else if (!cmode_0 && is_neg) {
+            int i;
+            imm = 0;
+            for (i = 0; i < 8; i++) {
+                if ((abcdefgh) & (1 << i)) {
+                    imm |= 0xffULL << (i * 8);
+                }
+            }
+        } else if (cmode_0) {
+            if (is_neg) {
+                imm = (abcdefgh & 0x3f) << 48;
+                if (abcdefgh & 0x80) {
+                    imm |= 0x8000000000000000ULL;
+                }
+                if (abcdefgh & 0x40) {
+                    imm |= 0x3fc0000000000000ULL;
+                } else {
+                    imm |= 0x4000000000000000ULL;
+                }
+            } else {
+                imm = (abcdefgh & 0x3f) << 19;
+                if (abcdefgh & 0x80) {
+                    imm |= 0x80000000;
+                }
+                if (abcdefgh & 0x40) {
+                    imm |= 0x3e000000;
+                } else {
+                    imm |= 0x40000000;
+                }
+                imm |= (imm << 32);
+            }
+        }
+        break;
+    }
+
+    if (cmode_3_1 != 7 && is_neg) {
+        imm = ~imm;
+    }
+
+    tcg_imm = tcg_const_i64(imm);
+    tcg_rd = new_tmp_a64(s);
+
+    for (i = 0; i < 2; i++) {
+        int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
+
+        if (i == 1 && !is_q) {
+            /* non-quad ops clear high half of vector */
+            tcg_gen_movi_i64(tcg_rd, 0);
+        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
+            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
+            if (is_neg) {
+                /* AND (BIC) */
+                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
+            } else {
+                /* ORR */
+                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
+            }
+        } else {
+            /* MOVI */
+            tcg_gen_mov_i64(tcg_rd, tcg_imm);
+        }
+        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
+    }
+
+    tcg_temp_free_i64(tcg_imm);
+}
+
+/* C3.6.7 AdvSIMD scalar copy
+ *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
+ * +-----+----+-----------------+------+---+------+---+------+------+
+ * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
+ * +-----+----+-----------------+------+---+------+---+------+------+
+ */
+static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int imm4 = extract32(insn, 11, 4);
+    int imm5 = extract32(insn, 16, 5);
+    int op = extract32(insn, 29, 1);
+
+    if (op != 0 || imm4 != 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* DUP (element, scalar) */
+    handle_simd_dupes(s, rd, rn, imm5);
+}
+
+/* C3.6.8 AdvSIMD scalar pairwise
+ *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/*
+ * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
+ *
+ * This code is handles the common shifting code and is used by both
+ * the vector and scalar code.
+ */
+static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+                                    TCGv_i64 tcg_rnd, bool accumulate,
+                                    bool is_u, int size, int shift)
+{
+    bool extended_result = false;
+    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
+    int ext_lshift = 0;
+    TCGv_i64 tcg_src_hi;
+
+    if (round && size == 3) {
+        extended_result = true;
+        ext_lshift = 64 - shift;
+        tcg_src_hi = tcg_temp_new_i64();
+    } else if (shift == 64) {
+        if (!accumulate && is_u) {
+            /* result is zero */
+            tcg_gen_movi_i64(tcg_res, 0);
+            return;
+        }
+    }
+
+    /* Deal with the rounding step */
+    if (round) {
+        if (extended_result) {
+            TCGv_i64 tcg_zero = tcg_const_i64(0);
+            if (!is_u) {
+                /* take care of sign extending tcg_res */
+                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
+                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+                                 tcg_src, tcg_src_hi,
+                                 tcg_rnd, tcg_zero);
+            } else {
+                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+                                 tcg_src, tcg_zero,
+                                 tcg_rnd, tcg_zero);
+            }
+            tcg_temp_free_i64(tcg_zero);
+        } else {
+            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
+        }
+    }
+
+    /* Now do the shift right */
+    if (round && extended_result) {
+        /* extended case, >64 bit precision required */
+        if (ext_lshift == 0) {
+            /* special case, only high bits matter */
+            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
+        } else {
+            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
+            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
+        }
+    } else {
+        if (is_u) {
+            if (shift == 64) {
+                /* essentially shifting in 64 zeros */
+                tcg_gen_movi_i64(tcg_src, 0);
+            } else {
+                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+            }
+        } else {
+            if (shift == 64) {
+                /* effectively extending the sign-bit */
+                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
+            } else {
+                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
+            }
+        }
+    }
+
+    if (accumulate) {
+        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
+    } else {
+        tcg_gen_mov_i64(tcg_res, tcg_src);
+    }
+
+    if (extended_result) {
+        tcg_temp_free_i64(tcg_src_hi);
+    }
+}
+
+/* Common SHL/SLI - Shift left with an optional insert */
+static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+                                 bool insert, int shift)
+{
+    if (insert) { /* SLI */
+        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
+    } else { /* SHL */
+        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
+    }
+}
+
+/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
+static void handle_scalar_simd_shri(DisasContext *s,
+                                    bool is_u, int immh, int immb,
+                                    int opcode, int rn, int rd)
+{
+    const int size = 3;
+    int immhb = immh << 3 | immb;
+    int shift = 2 * (8 << size) - immhb;
+    bool accumulate = false;
+    bool round = false;
+    TCGv_i64 tcg_rn;
+    TCGv_i64 tcg_rd;
+    TCGv_i64 tcg_round;
+
+    if (!extract32(immh, 3, 1)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0x02: /* SSRA / USRA (accumulate) */
+        accumulate = true;
+        break;
+    case 0x04: /* SRSHR / URSHR (rounding) */
+        round = true;
+        break;
+    case 0x06: /* SRSRA / URSRA (accum + rounding) */
+        accumulate = round = true;
+        break;
+    }
+
+    if (round) {
+        uint64_t round_const = 1ULL << (shift - 1);
+        tcg_round = tcg_const_i64(round_const);
+    } else {
+        TCGV_UNUSED_I64(tcg_round);
+    }
+
+    tcg_rn = read_fp_dreg(s, rn);
+    tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+
+    handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+                               accumulate, is_u, size, shift);
+
+    write_fp_dreg(s, rd, tcg_rd);
+
+    tcg_temp_free_i64(tcg_rn);
+    tcg_temp_free_i64(tcg_rd);
+    if (round) {
+        tcg_temp_free_i64(tcg_round);
+    }
+}
+
+/* SHL/SLI - Scalar shift left */
+static void handle_scalar_simd_shli(DisasContext *s, bool insert,
+                                    int immh, int immb, int opcode,
+                                    int rn, int rd)
+{
+    int size = 32 - clz32(immh) - 1;
+    int immhb = immh << 3 | immb;
+    int shift = immhb - (8 << size);
+    TCGv_i64 tcg_rn = new_tmp_a64(s);
+    TCGv_i64 tcg_rd = new_tmp_a64(s);
+
+    if (!extract32(immh, 3, 1)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rn = read_fp_dreg(s, rn);
+    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+
+    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+
+    write_fp_dreg(s, rd, tcg_rd);
+
+    tcg_temp_free_i64(tcg_rn);
+    tcg_temp_free_i64(tcg_rd);
+}
+
+/* C3.6.9 AdvSIMD scalar shift by immediate
+ *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
+ * +-----+---+-------------+------+------+--------+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
+ * +-----+---+-------------+------+------+--------+---+------+------+
+ *
+ * This is the scalar version so it works on a fixed sized registers
+ */
+static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int opcode = extract32(insn, 11, 5);
+    int immb = extract32(insn, 16, 3);
+    int immh = extract32(insn, 19, 4);
+    bool is_u = extract32(insn, 29, 1);
+
+    switch (opcode) {
+    case 0x00: /* SSHR / USHR */
+    case 0x02: /* SSRA / USRA */
+    case 0x04: /* SRSHR / URSHR */
+    case 0x06: /* SRSRA / URSRA */
+        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
+        break;
+    case 0x0a: /* SHL / SLI */
+        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
+        break;
+    default:
+        unsupported_encoding(s, insn);
+        break;
+    }
+}
+
+/* C3.6.10 AdvSIMD scalar three different
+ *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
+ * +-----+---+-----------+------+---+------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+---+------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+static void handle_3same_64(DisasContext *s, int opcode, bool u,
+                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
+{
+    /* Handle 64x64->64 opcodes which are shared between the scalar
+     * and vector 3-same groups. We cover every opcode where size == 3
+     * is valid in either the three-reg-same (integer, not pairwise)
+     * or scalar-three-reg-same groups. (Some opcodes are not yet
+     * implemented.)
+     */
+    TCGCond cond;
+
+    switch (opcode) {
+    case 0x6: /* CMGT, CMHI */
+        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
+         * We implement this using setcond (test) and then negating.
+         */
+        cond = u ? TCG_COND_GTU : TCG_COND_GT;
+    do_cmop:
+        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
+        tcg_gen_neg_i64(tcg_rd, tcg_rd);
+        break;
+    case 0x7: /* CMGE, CMHS */
+        cond = u ? TCG_COND_GEU : TCG_COND_GE;
+        goto do_cmop;
+    case 0x11: /* CMTST, CMEQ */
+        if (u) {
+            cond = TCG_COND_EQ;
+            goto do_cmop;
+        }
+        /* CMTST : test is "if (X & Y != 0)". */
+        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
+        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
+        tcg_gen_neg_i64(tcg_rd, tcg_rd);
+        break;
+    case 0x10: /* ADD, SUB */
+        if (u) {
+            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
+        } else {
+            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
+        }
+        break;
+    case 0x1: /* SQADD */
+    case 0x5: /* SQSUB */
+    case 0x8: /* SSHL, USHL */
+    case 0x9: /* SQSHL, UQSHL */
+    case 0xa: /* SRSHL, URSHL */
+    case 0xb: /* SQRSHL, UQRSHL */
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* Handle the 3-same-operands float operations; shared by the scalar
+ * and vector encodings. The caller must filter out any encodings
+ * not allocated for the encoding it is dealing with.
+ */
+static void handle_3same_float(DisasContext *s, int size, int elements,
+                               int fpopcode, int rd, int rn, int rm)
+{
+    int pass;
+    TCGv_ptr fpst = get_fpstatus_ptr();
+
+    for (pass = 0; pass < elements; pass++) {
+        if (size) {
+            /* Double */
+            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+            TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+            read_vec_element(s, tcg_op1, rn, pass, MO_64);
+            read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+            switch (fpopcode) {
+            case 0x18: /* FMAXNM */
+                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x1a: /* FADD */
+                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x1e: /* FMAX */
+                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x38: /* FMINNM */
+                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x3a: /* FSUB */
+                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x3e: /* FMIN */
+                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5b: /* FMUL */
+                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5f: /* FDIV */
+                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7a: /* FABD */
+                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_absd(tcg_res, tcg_res);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+            tcg_temp_free_i64(tcg_res);
+            tcg_temp_free_i64(tcg_op1);
+            tcg_temp_free_i64(tcg_op2);
+        } else {
+            /* Single */
+            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+            TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
+            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
+
+            switch (fpopcode) {
+            case 0x1a: /* FADD */
+                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x1e: /* FMAX */
+                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x18: /* FMAXNM */
+                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x38: /* FMINNM */
+                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x3a: /* FSUB */
+                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x3e: /* FMIN */
+                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5b: /* FMUL */
+                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5f: /* FDIV */
+                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7a: /* FABD */
+                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+                gen_helper_vfp_abss(tcg_res, tcg_res);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            if (elements == 1) {
+                /* scalar single so clear high part */
+                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
+                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
+                tcg_temp_free_i64(tcg_tmp);
+            } else {
+                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            }
+
+            tcg_temp_free_i32(tcg_res);
+            tcg_temp_free_i32(tcg_op1);
+            tcg_temp_free_i32(tcg_op2);
+        }
+    }
+
+    tcg_temp_free_ptr(fpst);
+
+    if ((elements << size) < 4) {
+        /* scalar, or non-quad vector op */
+        clear_vec_high(s, rd);
+    }
+}
+
+/* C3.6.11 AdvSIMD scalar three same
+ *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
+ * +-----+---+-----------+------+---+------+--------+---+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+---+------+--------+---+------+------+
+ */
+static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int opcode = extract32(insn, 11, 5);
+    int rm = extract32(insn, 16, 5);
+    int size = extract32(insn, 22, 2);
+    bool u = extract32(insn, 29, 1);
+    TCGv_i64 tcg_rn;
+    TCGv_i64 tcg_rm;
+    TCGv_i64 tcg_rd;
+
+    if (opcode >= 0x18) {
+        /* Floating point: U, size[1] and opcode indicate operation */
+        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
+        switch (fpopcode) {
+        case 0x1b: /* FMULX */
+        case 0x1c: /* FCMEQ */
+        case 0x1f: /* FRECPS */
+        case 0x3f: /* FRSQRTS */
+        case 0x5c: /* FCMGE */
+        case 0x5d: /* FACGE */
+        case 0x7c: /* FCMGT */
+        case 0x7d: /* FACGT */
+            unsupported_encoding(s, insn);
+            return;
+        case 0x7a: /* FABD */
+            break;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+
+        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
+        return;
+    }
+
+    switch (opcode) {
+    case 0x1: /* SQADD, UQADD */
+    case 0x5: /* SQSUB, UQSUB */
+    case 0x8: /* SSHL, USHL */
+    case 0xa: /* SRSHL, URSHL */
+        unsupported_encoding(s, insn);
+        return;
+    case 0x6: /* CMGT, CMHI */
+    case 0x7: /* CMGE, CMHS */
+    case 0x11: /* CMTST, CMEQ */
+    case 0x10: /* ADD, SUB (vector) */
+        if (size != 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    case 0x9: /* SQSHL, UQSHL */
+    case 0xb: /* SQRSHL, UQRSHL */
+        unsupported_encoding(s, insn);
+        return;
+    case 0x16: /* SQDMULH, SQRDMULH (vector) */
+        if (size != 1 && size != 2) {
+            unallocated_encoding(s);
+            return;
+        }
+        unsupported_encoding(s, insn);
+        return;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rn = read_fp_dreg(s, rn);       /* op1 */
+    tcg_rm = read_fp_dreg(s, rm);       /* op2 */
+    tcg_rd = tcg_temp_new_i64();
+
+    /* For the moment we only support the opcodes which are
+     * 64-bit-width only. The size != 3 cases will
+     * be handled later when the relevant ops are implemented.
+     */
+    handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
+
+    write_fp_dreg(s, rd, tcg_rd);
+
+    tcg_temp_free_i64(tcg_rn);
+    tcg_temp_free_i64(tcg_rm);
+    tcg_temp_free_i64(tcg_rd);
+}
+
+/* C3.6.12 AdvSIMD scalar two reg misc
+ *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* C3.6.13 AdvSIMD scalar x indexed element
+ *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ */
+static void disas_simd_scalar_indexed(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
+static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
+                                 int immh, int immb, int opcode, int rn, int rd)
+{
+    int size = 32 - clz32(immh) - 1;
+    int immhb = immh << 3 | immb;
+    int shift = 2 * (8 << size) - immhb;
+    bool accumulate = false;
+    bool round = false;
+    int dsize = is_q ? 128 : 64;
+    int esize = 8 << size;
+    int elements = dsize/esize;
+    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
+    TCGv_i64 tcg_rn = new_tmp_a64(s);
+    TCGv_i64 tcg_rd = new_tmp_a64(s);
+    TCGv_i64 tcg_round;
+    int i;
+
+    if (extract32(immh, 3, 1) && !is_q) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (size > 3 && !is_q) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0x02: /* SSRA / USRA (accumulate) */
+        accumulate = true;
+        break;
+    case 0x04: /* SRSHR / URSHR (rounding) */
+        round = true;
+        break;
+    case 0x06: /* SRSRA / URSRA (accum + rounding) */
+        accumulate = round = true;
+        break;
+    }
+
+    if (round) {
+        uint64_t round_const = 1ULL << (shift - 1);
+        tcg_round = tcg_const_i64(round_const);
+    } else {
+        TCGV_UNUSED_I64(tcg_round);
+    }
+
+    for (i = 0; i < elements; i++) {
+        read_vec_element(s, tcg_rn, rn, i, memop);
+        if (accumulate) {
+            read_vec_element(s, tcg_rd, rd, i, memop);
+        }
+
+        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+                                accumulate, is_u, size, shift);
+
+        write_vec_element(s, tcg_rd, rd, i, size);
+    }
+
+    if (!is_q) {
+        clear_vec_high(s, rd);
+    }
+
+    if (round) {
+        tcg_temp_free_i64(tcg_round);
+    }
+}
+
+/* SHL/SLI - Vector shift left */
+static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
+                                int immh, int immb, int opcode, int rn, int rd)
+{
+    int size = 32 - clz32(immh) - 1;
+    int immhb = immh << 3 | immb;
+    int shift = immhb - (8 << size);
+    int dsize = is_q ? 128 : 64;
+    int esize = 8 << size;
+    int elements = dsize/esize;
+    TCGv_i64 tcg_rn = new_tmp_a64(s);
+    TCGv_i64 tcg_rd = new_tmp_a64(s);
+    int i;
+
+    if (extract32(immh, 3, 1) && !is_q) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (size > 3 && !is_q) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    for (i = 0; i < elements; i++) {
+        read_vec_element(s, tcg_rn, rn, i, size);
+        if (insert) {
+            read_vec_element(s, tcg_rd, rd, i, size);
+        }
+
+        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+
+        write_vec_element(s, tcg_rd, rd, i, size);
+    }
+
+    if (!is_q) {
+        clear_vec_high(s, rd);
+    }
+}
+
+/* USHLL/SHLL - Vector shift left with widening */
+static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
+                                 int immh, int immb, int opcode, int rn, int rd)
+{
+    int size = 32 - clz32(immh) - 1;
+    int immhb = immh << 3 | immb;
+    int shift = immhb - (8 << size);
+    int dsize = 64;
+    int esize = 8 << size;
+    int elements = dsize/esize;
+    TCGv_i64 tcg_rn = new_tmp_a64(s);
+    TCGv_i64 tcg_rd = new_tmp_a64(s);
+    int i;
+
+    if (size >= 3) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* For the LL variants the store is larger than the load,
+     * so if rd == rn we would overwrite parts of our input.
+     * So load everything right now and use shifts in the main loop.
+     */
+    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
+
+    for (i = 0; i < elements; i++) {
+        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
+        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
+        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
+        write_vec_element(s, tcg_rd, rd, i, size + 1);
+    }
+}
+
+
+/* C3.6.14 AdvSIMD shift by immediate
+ *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
+ * +---+---+---+-------------+------+------+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
+ * +---+---+---+-------------+------+------+--------+---+------+------+
+ */
+static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int opcode = extract32(insn, 11, 5);
+    int immb = extract32(insn, 16, 3);
+    int immh = extract32(insn, 19, 4);
+    bool is_u = extract32(insn, 29, 1);
+    bool is_q = extract32(insn, 30, 1);
+
+    switch (opcode) {
+    case 0x00: /* SSHR / USHR */
+    case 0x02: /* SSRA / USRA (accumulate) */
+    case 0x04: /* SRSHR / URSHR (rounding) */
+    case 0x06: /* SRSRA / URSRA (accum + rounding) */
+        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
+        break;
+    case 0x0a: /* SHL / SLI */
+        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
+        break;
+    case 0x14: /* SSHLL / USHLL */
+        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
+        break;
+    default:
+        /* We don't currently implement any of the Narrow or saturating shifts;
+         * nor do we implement the fixed-point conversions in this
+         * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
+         */
+        unsupported_encoding(s, insn);
+        return;
+    }
+}
+
+static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
+                                int opcode, int rd, int rn, int rm)
+{
+    /* 3-reg-different widening insns: 64 x 64 -> 128 */
+    TCGv_i64 tcg_res[2];
+    int pass, accop;
+
+    tcg_res[0] = tcg_temp_new_i64();
+    tcg_res[1] = tcg_temp_new_i64();
+
+    /* Does this op do an adding accumulate, a subtracting accumulate,
+     * or no accumulate at all?
+     */
+    switch (opcode) {
+    case 5:
+    case 8:
+    case 9:
+        accop = 1;
+        break;
+    case 10:
+    case 11:
+        accop = -1;
+        break;
+    default:
+        accop = 0;
+        break;
+    }
+
+    if (accop != 0) {
+        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
+        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
+    }
+
+    /* size == 2 means two 32x32->64 operations; this is worth special
+     * casing because we can generally handle it inline.
+     */
+    if (size == 2) {
+        for (pass = 0; pass < 2; pass++) {
+            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+            TCGv_i64 tcg_passres;
+            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
+
+            int elt = pass + is_q * 2;
+
+            read_vec_element(s, tcg_op1, rn, elt, memop);
+            read_vec_element(s, tcg_op2, rm, elt, memop);
+
+            if (accop == 0) {
+                tcg_passres = tcg_res[pass];
+            } else {
+                tcg_passres = tcg_temp_new_i64();
+            }
+
+            switch (opcode) {
+            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+            {
+                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
+                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
+
+                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
+                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
+                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
+                                    tcg_passres,
+                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
+                tcg_temp_free_i64(tcg_tmp1);
+                tcg_temp_free_i64(tcg_tmp2);
+                break;
+            }
+            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
+                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            if (accop > 0) {
+                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+                tcg_temp_free_i64(tcg_passres);
+            } else if (accop < 0) {
+                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+                tcg_temp_free_i64(tcg_passres);
+            }
+
+            tcg_temp_free_i64(tcg_op1);
+            tcg_temp_free_i64(tcg_op2);
+        }
+    } else {
+        /* size 0 or 1, generally helper functions */
+        for (pass = 0; pass < 2; pass++) {
+            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+            TCGv_i64 tcg_passres;
+            int elt = pass + is_q * 2;
+
+            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
+            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
+
+            if (accop == 0) {
+                tcg_passres = tcg_res[pass];
+            } else {
+                tcg_passres = tcg_temp_new_i64();
+            }
+
+            switch (opcode) {
+            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
+                if (size == 0) {
+                    if (is_u) {
+                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
+                    } else {
+                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
+                    }
+                } else {
+                    if (is_u) {
+                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
+                    } else {
+                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
+                    }
+                }
+                break;
+            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
+                if (size == 0) {
+                    if (is_u) {
+                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
+                    } else {
+                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
+                    }
+                } else {
+                    if (is_u) {
+                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
+                    } else {
+                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
+                    }
+                }
+                break;
+            default:
+                g_assert_not_reached();
+            }
+            tcg_temp_free_i32(tcg_op1);
+            tcg_temp_free_i32(tcg_op2);
+
+            if (accop > 0) {
+                if (size == 0) {
+                    gen_helper_neon_addl_u16(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                } else {
+                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                }
+                tcg_temp_free_i64(tcg_passres);
+            } else if (accop < 0) {
+                if (size == 0) {
+                    gen_helper_neon_subl_u16(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                } else {
+                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                }
+                tcg_temp_free_i64(tcg_passres);
+            }
+        }
+    }
+
+    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+    tcg_temp_free_i64(tcg_res[0]);
+    tcg_temp_free_i64(tcg_res[1]);
+}
+
+/* C3.6.15 AdvSIMD three different
+ *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
+ * +---+---+---+-----------+------+---+------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+---+------+--------+-----+------+------+
+ */
+static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
+{
+    /* Instructions in this group fall into three basic classes
+     * (in each case with the operation working on each element in
+     * the input vectors):
+     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
+     *     128 bit input)
+     * (2) wide 64 x 128 -> 128
+     * (3) narrowing 128 x 128 -> 64
+     * Here we do initial decode, catch unallocated cases and
+     * dispatch to separate functions for each class.
+     */
+    int is_q = extract32(insn, 30, 1);
+    int is_u = extract32(insn, 29, 1);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 4);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+
+    switch (opcode) {
+    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
+    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
+        /* 64 x 128 -> 128 */
+        unsupported_encoding(s, insn);
+        break;
+    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
+    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
+        /* 128 x 128 -> 64 */
+        unsupported_encoding(s, insn);
+        break;
+    case 9:
+    case 11:
+    case 13:
+    case 14:
+        if (is_u) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* fall through */
+    case 0:
+    case 2:
+        unsupported_encoding(s, insn);
+        break;
+    case 5:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+        /* 64 x 64 -> 128 */
+        if (size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
+        break;
+    default:
+        /* opcode 15 not allocated */
+        unallocated_encoding(s);
+        break;
+    }
+}
+
+/* Logic op (opcode == 3) subgroup of C3.6.16. */
+static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
+{
+    int rd = extract32(insn, 0, 5);
+    int rn = extract32(insn, 5, 5);
+    int rm = extract32(insn, 16, 5);
+    int size = extract32(insn, 22, 2);
+    bool is_u = extract32(insn, 29, 1);
+    bool is_q = extract32(insn, 30, 1);
+    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+    TCGv_i64 tcg_res[2];
+    int pass;
+
+    tcg_res[0] = tcg_temp_new_i64();
+    tcg_res[1] = tcg_temp_new_i64();
+
+    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+        read_vec_element(s, tcg_op1, rn, pass, MO_64);
+        read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+        if (!is_u) {
+            switch (size) {
+            case 0: /* AND */
+                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            case 1: /* BIC */
+                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            case 2: /* ORR */
+                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            case 3: /* ORN */
+                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            }
+        } else {
+            if (size != 0) {
+                /* B* ops need res loaded to operate on */
+                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+            }
+
+            switch (size) {
+            case 0: /* EOR */
+                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            case 1: /* BSL bitwise select */
+                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
+                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
+                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
+                break;
+            case 2: /* BIT, bitwise insert if true */
+                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
+                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
+                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+                break;
+            case 3: /* BIF, bitwise insert if false */
+                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
+                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
+                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+                break;
+            }
+        }
+    }
+
+    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
+    if (!is_q) {
+        tcg_gen_movi_i64(tcg_res[1], 0);
+    }
+    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
+
+    tcg_temp_free_i64(tcg_op1);
+    tcg_temp_free_i64(tcg_op2);
+    tcg_temp_free_i64(tcg_res[0]);
+    tcg_temp_free_i64(tcg_res[1]);
+}
+
+/* Pairwise op subgroup of C3.6.16. */
+static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* Floating point op subgroup of C3.6.16. */
+static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
+{
+    /* For floating point ops, the U, size[1] and opcode bits
+     * together indicate the operation. size[0] indicates single
+     * or double.
+     */
+    int fpopcode = extract32(insn, 11, 5)
+        | (extract32(insn, 23, 1) << 5)
+        | (extract32(insn, 29, 1) << 6);
+    int is_q = extract32(insn, 30, 1);
+    int size = extract32(insn, 22, 1);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+
+    int datasize = is_q ? 128 : 64;
+    int esize = 32 << size;
+    int elements = datasize / esize;
+
+    if (size == 1 && !is_q) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (fpopcode) {
+    case 0x58: /* FMAXNMP */
+    case 0x5a: /* FADDP */
+    case 0x5e: /* FMAXP */
+    case 0x78: /* FMINNMP */
+    case 0x7e: /* FMINP */
+        /* pairwise ops */
+        unsupported_encoding(s, insn);
+        return;
+    case 0x1b: /* FMULX */
+    case 0x1c: /* FCMEQ */
+    case 0x1f: /* FRECPS */
+    case 0x3f: /* FRSQRTS */
+    case 0x5c: /* FCMGE */
+    case 0x5d: /* FACGE */
+    case 0x7c: /* FCMGT */
+    case 0x7d: /* FACGT */
+    case 0x19: /* FMLA */
+    case 0x39: /* FMLS */
+        unsupported_encoding(s, insn);
+        return;
+    case 0x18: /* FMAXNM */
+    case 0x1a: /* FADD */
+    case 0x1e: /* FMAX */
+    case 0x38: /* FMINNM */
+    case 0x3a: /* FSUB */
+    case 0x3e: /* FMIN */
+    case 0x5b: /* FMUL */
+    case 0x5f: /* FDIV */
+    case 0x7a: /* FABD */
+        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
+        return;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+}
+
+/* Integer op subgroup of C3.6.16. */
+static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
+{
+    int is_q = extract32(insn, 30, 1);
+    int u = extract32(insn, 29, 1);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 11, 5);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    int pass;
+
+    switch (opcode) {
+    case 0x13: /* MUL, PMUL */
+        if (u && size != 0) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* fall through */
+    case 0x0: /* SHADD, UHADD */
+    case 0x2: /* SRHADD, URHADD */
+    case 0x4: /* SHSUB, UHSUB */
+    case 0xc: /* SMAX, UMAX */
+    case 0xd: /* SMIN, UMIN */
+    case 0xe: /* SABD, UABD */
+    case 0xf: /* SABA, UABA */
+    case 0x12: /* MLA, MLS */
+        if (size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        unsupported_encoding(s, insn);
+        return;
+    case 0x1: /* SQADD */
+    case 0x5: /* SQSUB */
+    case 0x8: /* SSHL, USHL */
+    case 0x9: /* SQSHL, UQSHL */
+    case 0xa: /* SRSHL, URSHL */
+    case 0xb: /* SQRSHL, UQRSHL */
+        if (size == 3 && !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        unsupported_encoding(s, insn);
+        return;
+    case 0x16: /* SQDMULH, SQRDMULH */
+        if (size == 0 || size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        unsupported_encoding(s, insn);
+        return;
+    default:
+        if (size == 3 && !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    }
+
+    if (size == 3) {
+        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+            TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+            read_vec_element(s, tcg_op1, rn, pass, MO_64);
+            read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
+
+            write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+            tcg_temp_free_i64(tcg_res);
+            tcg_temp_free_i64(tcg_op1);
+            tcg_temp_free_i64(tcg_op2);
+        }
+    } else {
+        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+            TCGv_i32 tcg_res = tcg_temp_new_i32();
+            NeonGenTwoOpFn *genfn;
+
+            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
+            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
+
+            switch (opcode) {
+            case 0x6: /* CMGT, CMHI */
+            {
+                static NeonGenTwoOpFn * const fns[3][2] = {
+                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
+                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
+                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
+                };
+                genfn = fns[size][u];
+                break;
+            }
+            case 0x7: /* CMGE, CMHS */
+            {
+                static NeonGenTwoOpFn * const fns[3][2] = {
+                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
+                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
+                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
+                };
+                genfn = fns[size][u];
+                break;
+            }
+            case 0x10: /* ADD, SUB */
+            {
+                static NeonGenTwoOpFn * const fns[3][2] = {
+                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
+                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
+                };
+                genfn = fns[size][u];
+                break;
+            }
+            case 0x11: /* CMTST, CMEQ */
+            {
+                static NeonGenTwoOpFn * const fns[3][2] = {
+                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
+                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
+                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
+                };
+                genfn = fns[size][u];
+                break;
+            }
+            default:
+                g_assert_not_reached();
+            }
+
+            genfn(tcg_res, tcg_op1, tcg_op2);
+
+            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+
+            tcg_temp_free_i32(tcg_res);
+            tcg_temp_free_i32(tcg_op1);
+            tcg_temp_free_i32(tcg_op2);
+        }
+    }
+
+    if (!is_q) {
+        clear_vec_high(s, rd);
+    }
+}
+
+/* C3.6.16 AdvSIMD three same
+ *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
+ * +---+---+---+-----------+------+---+------+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+---+------+--------+---+------+------+
+ */
+static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
+{
+    int opcode = extract32(insn, 11, 5);
+
+    switch (opcode) {
+    case 0x3: /* logic ops */
+        disas_simd_3same_logic(s, insn);
+        break;
+    case 0x17: /* ADDP */
+    case 0x14: /* SMAXP, UMAXP */
+    case 0x15: /* SMINP, UMINP */
+        /* Pairwise operations */
+        disas_simd_3same_pair(s, insn);
+        break;
+    case 0x18 ... 0x31:
+        /* floating point ops, sz[1] and U are part of opcode */
+        disas_simd_3same_float(s, insn);
+        break;
+    default:
+        disas_simd_3same_int(s, insn);
+        break;
+    }
+}
+
+/* C3.6.17 AdvSIMD two reg misc
+ *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* C3.6.18 AdvSIMD vector x indexed element
+ *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
+ * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
+ * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
+ */
+static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* C3.6.19 Crypto AES
+ *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
+ * +-----------------+------+-----------+--------+-----+------+------+
+ * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
+ * +-----------------+------+-----------+--------+-----+------+------+
+ */
+static void disas_crypto_aes(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* C3.6.20 Crypto three-reg SHA
+ *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
+ * +-----------------+------+---+------+---+--------+-----+------+------+
+ * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
+ * +-----------------+------+---+------+---+--------+-----+------+------+
+ */
+static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* C3.6.21 Crypto two-reg SHA
+ *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
+ * +-----------------+------+-----------+--------+-----+------+------+
+ * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
+ * +-----------------+------+-----------+--------+-----+------+------+
+ */
+static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
+{
+    unsupported_encoding(s, insn);
+}
+
+/* C3.6 Data processing - SIMD, inc Crypto
+ *
+ * As the decode gets a little complex we are using a table based
+ * approach for this part of the decode.
+ */
+static const AArch64DecodeTable data_proc_simd[] = {
+    /* pattern  ,  mask     ,  fn                        */
+    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
+    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
+    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
+    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
+    { 0x0e000400, 0x9fe08400, disas_simd_copy },
+    { 0x0f000000, 0x9f000400, disas_simd_indexed_vector },
+    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
+    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
+    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
+    { 0x0e000000, 0xbf208c00, disas_simd_tb },
+    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
+    { 0x2e000000, 0xbf208400, disas_simd_ext },
+    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
+    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
+    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
+    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
+    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
+    { 0x5f000000, 0xdf000400, disas_simd_scalar_indexed },
+    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
+    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
+    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
+    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
+    { 0x00000000, 0x00000000, NULL }
+};
+
 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
 {
     /* Note that this is called with all non-FP cases from
      * table C3-6 so it must UNDEF for entries not specifically
      * allocated to instructions in that table.
      */
-    unsupported_encoding(s, insn);
+    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
+    if (fn) {
+        fn(s, insn);
+    } else {
+        unallocated_encoding(s);
+    }
 }
 
 /* C3.6 Data processing - SIMD and floating point */
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 8d240e160d..e701c0f9e1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2759,6 +2759,113 @@ static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
     return 0;
 }
 
+static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
+                        int rounding)
+{
+    TCGv_ptr fpst = get_fpstatus_ptr(0);
+    TCGv_i32 tcg_rmode;
+
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+    if (dp) {
+        TCGv_i64 tcg_op;
+        TCGv_i64 tcg_res;
+        tcg_op = tcg_temp_new_i64();
+        tcg_res = tcg_temp_new_i64();
+        tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
+        gen_helper_rintd(tcg_res, tcg_op, fpst);
+        tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i64(tcg_op);
+        tcg_temp_free_i64(tcg_res);
+    } else {
+        TCGv_i32 tcg_op;
+        TCGv_i32 tcg_res;
+        tcg_op = tcg_temp_new_i32();
+        tcg_res = tcg_temp_new_i32();
+        tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
+        gen_helper_rints(tcg_res, tcg_op, fpst);
+        tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
+        tcg_temp_free_i32(tcg_op);
+        tcg_temp_free_i32(tcg_res);
+    }
+
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    tcg_temp_free_i32(tcg_rmode);
+
+    tcg_temp_free_ptr(fpst);
+    return 0;
+}
+
+static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
+                       int rounding)
+{
+    bool is_signed = extract32(insn, 7, 1);
+    TCGv_ptr fpst = get_fpstatus_ptr(0);
+    TCGv_i32 tcg_rmode, tcg_shift;
+
+    tcg_shift = tcg_const_i32(0);
+
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+
+    if (dp) {
+        TCGv_i64 tcg_double, tcg_res;
+        TCGv_i32 tcg_tmp;
+        /* Rd is encoded as a single precision register even when the source
+         * is double precision.
+         */
+        rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
+        tcg_double = tcg_temp_new_i64();
+        tcg_res = tcg_temp_new_i64();
+        tcg_tmp = tcg_temp_new_i32();
+        tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
+        if (is_signed) {
+            gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
+        }
+        tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res);
+        tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
+        tcg_temp_free_i32(tcg_tmp);
+        tcg_temp_free_i64(tcg_res);
+        tcg_temp_free_i64(tcg_double);
+    } else {
+        TCGv_i32 tcg_single, tcg_res;
+        tcg_single = tcg_temp_new_i32();
+        tcg_res = tcg_temp_new_i32();
+        tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
+        if (is_signed) {
+            gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+        }
+        tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
+        tcg_temp_free_i32(tcg_res);
+        tcg_temp_free_i32(tcg_single);
+    }
+
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+    tcg_temp_free_i32(tcg_rmode);
+
+    tcg_temp_free_i32(tcg_shift);
+
+    tcg_temp_free_ptr(fpst);
+
+    return 0;
+}
+
+/* Table for converting the most common AArch32 encoding of
+ * rounding mode to arm_fprounding order (which matches the
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
+ */
+static const uint8_t fp_decode_rm[] = {
+    FPROUNDING_TIEAWAY,
+    FPROUNDING_TIEEVEN,
+    FPROUNDING_POSINF,
+    FPROUNDING_NEGINF,
+};
+
 static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
 {
     uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
@@ -2781,6 +2888,14 @@ static int disas_vfp_v8_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
         return handle_vsel(insn, rd, rn, rm, dp);
     } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
         return handle_vminmaxnm(insn, rd, rn, rm, dp);
+    } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
+        /* VRINTA, VRINTN, VRINTP, VRINTM */
+        int rounding = fp_decode_rm[extract32(insn, 16, 2)];
+        return handle_vrint(insn, rd, rm, dp, rounding);
+    } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
+        /* VCVTA, VCVTN, VCVTP, VCVTM */
+        int rounding = fp_decode_rm[extract32(insn, 16, 2)];
+        return handle_vcvt(insn, rd, rm, dp, rounding);
     }
     return 1;
 }
@@ -3325,6 +3440,44 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                         gen_vfp_F1_ld0(dp);
                         gen_vfp_cmpe(dp);
                         break;
+                    case 12: /* vrintr */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(0);
+                        if (dp) {
+                            gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
+                        } else {
+                            gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
+                        }
+                        tcg_temp_free_ptr(fpst);
+                        break;
+                    }
+                    case 13: /* vrintz */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(0);
+                        TCGv_i32 tcg_rmode;
+                        tcg_rmode = tcg_const_i32(float_round_to_zero);
+                        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+                        if (dp) {
+                            gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
+                        } else {
+                            gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
+                        }
+                        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
+                        tcg_temp_free_i32(tcg_rmode);
+                        tcg_temp_free_ptr(fpst);
+                        break;
+                    }
+                    case 14: /* vrintx */
+                    {
+                        TCGv_ptr fpst = get_fpstatus_ptr(0);
+                        if (dp) {
+                            gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
+                        } else {
+                            gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
+                        }
+                        tcg_temp_free_ptr(fpst);
+                        break;
+                    }
                     case 15: /* single<->double conversion */
                         if (dp)
                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
@@ -4617,8 +4770,22 @@ static const uint8_t neon_3r_sizes[] = {
 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
 #define NEON_2RM_VSHLL 38
+#define NEON_2RM_VRINTN 40
+#define NEON_2RM_VRINTX 41
+#define NEON_2RM_VRINTA 42
+#define NEON_2RM_VRINTZ 43
 #define NEON_2RM_VCVT_F16_F32 44
+#define NEON_2RM_VRINTM 45
 #define NEON_2RM_VCVT_F32_F16 46
+#define NEON_2RM_VRINTP 47
+#define NEON_2RM_VCVTAU 48
+#define NEON_2RM_VCVTAS 49
+#define NEON_2RM_VCVTNU 50
+#define NEON_2RM_VCVTNS 51
+#define NEON_2RM_VCVTPU 52
+#define NEON_2RM_VCVTPS 53
+#define NEON_2RM_VCVTMU 54
+#define NEON_2RM_VCVTMS 55
 #define NEON_2RM_VRECPE 56
 #define NEON_2RM_VRSQRTE 57
 #define NEON_2RM_VRECPE_F 58
@@ -4632,6 +4799,9 @@ static int neon_2rm_is_float_op(int op)
 {
     /* Return true if this neon 2reg-misc op is float-to-float */
     return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
+            (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
+            op == NEON_2RM_VRINTM ||
+            (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
             op >= NEON_2RM_VRECPE_F);
 }
 
@@ -4676,8 +4846,22 @@ static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VMOVN] = 0x7,
     [NEON_2RM_VQMOVN] = 0x7,
     [NEON_2RM_VSHLL] = 0x7,
+    [NEON_2RM_VRINTN] = 0x4,
+    [NEON_2RM_VRINTX] = 0x4,
+    [NEON_2RM_VRINTA] = 0x4,
+    [NEON_2RM_VRINTZ] = 0x4,
     [NEON_2RM_VCVT_F16_F32] = 0x2,
+    [NEON_2RM_VRINTM] = 0x4,
     [NEON_2RM_VCVT_F32_F16] = 0x2,
+    [NEON_2RM_VRINTP] = 0x4,
+    [NEON_2RM_VCVTAU] = 0x4,
+    [NEON_2RM_VCVTAS] = 0x4,
+    [NEON_2RM_VCVTNU] = 0x4,
+    [NEON_2RM_VCVTNS] = 0x4,
+    [NEON_2RM_VCVTPU] = 0x4,
+    [NEON_2RM_VCVTPS] = 0x4,
+    [NEON_2RM_VCVTMU] = 0x4,
+    [NEON_2RM_VCVTMS] = 0x4,
     [NEON_2RM_VRECPE] = 0x4,
     [NEON_2RM_VRSQRTE] = 0x4,
     [NEON_2RM_VRECPE_F] = 0x4,
@@ -6388,6 +6572,73 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                             }
                             neon_store_reg(rm, pass, tmp2);
                             break;
+                        case NEON_2RM_VRINTN:
+                        case NEON_2RM_VRINTA:
+                        case NEON_2RM_VRINTM:
+                        case NEON_2RM_VRINTP:
+                        case NEON_2RM_VRINTZ:
+                        {
+                            TCGv_i32 tcg_rmode;
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            int rmode;
+
+                            if (op == NEON_2RM_VRINTZ) {
+                                rmode = FPROUNDING_ZERO;
+                            } else {
+                                rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
+                            }
+
+                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+                            gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+                            tcg_temp_free_ptr(fpstatus);
+                            tcg_temp_free_i32(tcg_rmode);
+                            break;
+                        }
+                        case NEON_2RM_VRINTX:
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
+                            break;
+                        }
+                        case NEON_2RM_VCVTAU:
+                        case NEON_2RM_VCVTAS:
+                        case NEON_2RM_VCVTNU:
+                        case NEON_2RM_VCVTNS:
+                        case NEON_2RM_VCVTPU:
+                        case NEON_2RM_VCVTPS:
+                        case NEON_2RM_VCVTMU:
+                        case NEON_2RM_VCVTMS:
+                        {
+                            bool is_signed = !extract32(insn, 7, 1);
+                            TCGv_ptr fpst = get_fpstatus_ptr(1);
+                            TCGv_i32 tcg_rmode, tcg_shift;
+                            int rmode = fp_decode_rm[extract32(insn, 8, 2)];
+
+                            tcg_shift = tcg_const_i32(0);
+                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+
+                            if (is_signed) {
+                                gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
+                                                     tcg_shift, fpst);
+                            } else {
+                                gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
+                                                     tcg_shift, fpst);
+                            }
+
+                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
+                                                      cpu_env);
+                            tcg_temp_free_i32(tcg_rmode);
+                            tcg_temp_free_i32(tcg_shift);
+                            tcg_temp_free_ptr(fpst);
+                            break;
+                        }
                         case NEON_2RM_VRECPE:
                             gen_helper_recpe_u32(tmp, tmp, cpu_env);
                             break;
diff --git a/target-cris/cpu.c b/target-cris/cpu.c
index 44301a4b10..1ac8124d8c 100644
--- a/target-cris/cpu.c
+++ b/target-cris/cpu.c
@@ -66,6 +66,12 @@ static ObjectClass *cris_cpu_class_by_name(const char *cpu_model)
         return NULL;
     }
 
+#if defined(CONFIG_USER_ONLY)
+    if (strcasecmp(cpu_model, "any") == 0) {
+        return object_class_by_name("crisv32-" TYPE_CRIS_CPU);
+    }
+#endif
+
     typename = g_strdup_printf("%s-" TYPE_CRIS_CPU, cpu_model);
     oc = object_class_by_name(typename);
     g_free(typename);
@@ -146,6 +152,21 @@ static void cris_cpu_realizefn(DeviceState *dev, Error **errp)
     ccc->parent_realize(dev, errp);
 }
 
+#ifndef CONFIG_USER_ONLY
+static void cris_cpu_set_irq(void *opaque, int irq, int level)
+{
+    CRISCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    int type = irq == CRIS_CPU_IRQ ? CPU_INTERRUPT_HARD : CPU_INTERRUPT_NMI;
+
+    if (level) {
+        cpu_interrupt(cs, type);
+    } else {
+        cpu_reset_interrupt(cs, type);
+    }
+}
+#endif
+
 static void cris_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -159,6 +180,11 @@ static void cris_cpu_initfn(Object *obj)
 
     env->pregs[PR_VR] = ccc->vr;
 
+#ifndef CONFIG_USER_ONLY
+    /* IRQ and NMI lines.  */
+    qdev_init_gpio_in(DEVICE(cpu), cris_cpu_set_irq, 2);
+#endif
+
     if (tcg_enabled() && !tcg_initialized) {
         tcg_initialized = true;
         if (env->pregs[PR_VR] < 32) {
diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index 4b9fc4cb45..1d7d80d3dc 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -42,6 +42,10 @@
 /* CRIS-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_NMI       CPU_INTERRUPT_TGT_EXT_3
 
+/* CRUS CPU device objects interrupt lines.  */
+#define CRIS_CPU_IRQ 0
+#define CRIS_CPU_NMI 1
+
 /* Register aliases. R0 - R15 */
 #define R_FP  8
 #define R_SP  14
diff --git a/target-cris/helper.c b/target-cris/helper.c
index d274b388b8..c940582132 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -126,6 +126,11 @@ void crisv10_cpu_do_interrupt(CPUState *cs)
           env->exception_index,
           cs->interrupt_request);
 
+    if (env->dslot) {
+        /* CRISv10 never takes interrupts while in a delay-slot.  */
+        cpu_abort(env, "CRIS: Interrupt on delay-slot\n");
+    }
+
     assert(!(env->pregs[PR_CCS] & PFIX_FLAG));
     switch (env->exception_index) {
     case EXCP_BREAK:
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 68b5ab7056..96c2b4a7e9 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -78,11 +78,6 @@ typedef struct MchkQueue {
     uint16_t type;
 } MchkQueue;
 
-/* Defined values for CPUS390XState.runtime_reg_dirty_mask */
-#define KVM_S390_RUNTIME_DIRTY_NONE     0
-#define KVM_S390_RUNTIME_DIRTY_PARTIAL  1
-#define KVM_S390_RUNTIME_DIRTY_FULL     2
-
 typedef struct CPUS390XState {
     uint64_t regs[16];     /* GP registers */
     CPU_DoubleU fregs[16]; /* FP registers */
@@ -126,13 +121,6 @@ typedef struct CPUS390XState {
     uint64_t cputm;
     uint32_t todpr;
 
-    /* on S390 the runtime register set has two dirty states:
-     * a partial dirty state in which only the registers that
-     * are needed all the time are fetched. And a fully dirty
-     * state in which all runtime registers are fetched.
-     */
-    uint32_t runtime_reg_dirty_mask;
-
     CPU_COMMON
 
     /* reset does memset(0) up to here */
@@ -1076,7 +1064,6 @@ void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
                            uint32_t io_int_word);
 void kvm_s390_crw_mchk(S390CPU *cpu);
 void kvm_s390_enable_css_support(S390CPU *cpu);
-int kvm_s390_get_registers_partial(CPUState *cpu);
 int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
                                     int vq, bool assign);
 int kvm_s390_cpu_restart(S390CPU *cpu);
@@ -1094,10 +1081,6 @@ static inline void kvm_s390_crw_mchk(S390CPU *cpu)
 static inline void kvm_s390_enable_css_support(S390CPU *cpu)
 {
 }
-static inline int kvm_s390_get_registers_partial(CPUState *cpu)
-{
-    return -ENOSYS;
-}
 static inline int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier,
                                                   uint32_t sch, int vq,
                                                   bool assign)
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index f7b772668c..f60ccdc326 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -152,33 +152,30 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         }
     }
 
-    if (env->runtime_reg_dirty_mask == KVM_S390_RUNTIME_DIRTY_FULL) {
-        reg.id = KVM_REG_S390_CPU_TIMER;
-        reg.addr = (__u64)&(env->cputm);
-        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
-        if (ret < 0) {
-            return ret;
-        }
+    /* Do we need to save more than that? */
+    if (level == KVM_PUT_RUNTIME_STATE) {
+        return 0;
+    }
 
-        reg.id = KVM_REG_S390_CLOCK_COMP;
-        reg.addr = (__u64)&(env->ckc);
-        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
-        if (ret < 0) {
-            return ret;
-        }
+    reg.id = KVM_REG_S390_CPU_TIMER;
+    reg.addr = (__u64)&(env->cputm);
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret < 0) {
+        return ret;
+    }
 
-        reg.id = KVM_REG_S390_TODPR;
-        reg.addr = (__u64)&(env->todpr);
-        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
-        if (ret < 0) {
-            return ret;
-        }
+    reg.id = KVM_REG_S390_CLOCK_COMP;
+    reg.addr = (__u64)&(env->ckc);
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret < 0) {
+        return ret;
     }
-    env->runtime_reg_dirty_mask = KVM_S390_RUNTIME_DIRTY_NONE;
 
-    /* Do we need to save more than that? */
-    if (level == KVM_PUT_RUNTIME_STATE) {
-        return 0;
+    reg.id = KVM_REG_S390_TODPR;
+    reg.addr = (__u64)&(env->todpr);
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret < 0) {
+        return ret;
     }
 
     if (cap_sync_regs &&
@@ -216,50 +213,9 @@ int kvm_arch_get_registers(CPUState *cs)
     S390CPU *cpu = S390_CPU(cs);
     CPUS390XState *env = &cpu->env;
     struct kvm_one_reg reg;
-    int r;
-
-    r = kvm_s390_get_registers_partial(cs);
-    if (r < 0) {
-        return r;
-    }
-
-    reg.id = KVM_REG_S390_CPU_TIMER;
-    reg.addr = (__u64)&(env->cputm);
-    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
-    if (r < 0) {
-        return r;
-    }
-
-    reg.id = KVM_REG_S390_CLOCK_COMP;
-    reg.addr = (__u64)&(env->ckc);
-    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
-    if (r < 0) {
-        return r;
-    }
-
-    reg.id = KVM_REG_S390_TODPR;
-    reg.addr = (__u64)&(env->todpr);
-    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
-    if (r < 0) {
-        return r;
-    }
-
-    env->runtime_reg_dirty_mask = KVM_S390_RUNTIME_DIRTY_FULL;
-    return 0;
-}
-
-int kvm_s390_get_registers_partial(CPUState *cs)
-{
-    S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
     struct kvm_sregs sregs;
     struct kvm_regs regs;
-    int ret;
-    int i;
-
-    if (env->runtime_reg_dirty_mask) {
-        return 0;
-    }
+    int i, r;
 
     /* get the PSW */
     env->psw.addr = cs->kvm_run->psw_addr;
@@ -271,9 +227,9 @@ int kvm_s390_get_registers_partial(CPUState *cs)
             env->regs[i] = cs->kvm_run->s.regs.gprs[i];
         }
     } else {
-        ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
-        if (ret < 0) {
-            return ret;
+        r = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
+        if (r < 0) {
+            return r;
         }
          for (i = 0; i < 16; i++) {
             env->regs[i] = regs.gprs[i];
@@ -289,9 +245,9 @@ int kvm_s390_get_registers_partial(CPUState *cs)
             env->cregs[i] = cs->kvm_run->s.regs.crs[i];
         }
     } else {
-        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
-        if (ret < 0) {
-            return ret;
+        r = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
+        if (r < 0) {
+            return r;
         }
          for (i = 0; i < 16; i++) {
             env->aregs[i] = sregs.acrs[i];
@@ -299,14 +255,33 @@ int kvm_s390_get_registers_partial(CPUState *cs)
         }
     }
 
-    /* Finally the prefix */
+    /* The prefix */
     if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
         env->psa = cs->kvm_run->s.regs.prefix;
-    } else {
-        /* no prefix without sync regs */
     }
 
-    env->runtime_reg_dirty_mask = KVM_S390_RUNTIME_DIRTY_PARTIAL;
+    /* One Regs */
+    reg.id = KVM_REG_S390_CPU_TIMER;
+    reg.addr = (__u64)&(env->cputm);
+    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (r < 0) {
+        return r;
+    }
+
+    reg.id = KVM_REG_S390_CLOCK_COMP;
+    reg.addr = (__u64)&(env->ckc);
+    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (r < 0) {
+        return r;
+    }
+
+    reg.id = KVM_REG_S390_TODPR;
+    reg.addr = (__u64)&(env->todpr);
+    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (r < 0) {
+        return r;
+    }
+
     return 0;
 }
 
@@ -442,15 +417,13 @@ static int kvm_handle_css_inst(S390CPU *cpu, struct kvm_run *run,
                                uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
 {
     CPUS390XState *env = &cpu->env;
-    CPUState *cs = CPU(cpu);
 
     if (ipa0 != 0xb2) {
         /* Not handled for now. */
         return -1;
     }
 
-    kvm_s390_get_registers_partial(cs);
-    cs->kvm_vcpu_dirty = true;
+    cpu_synchronize_state(CPU(cpu));
 
     switch (ipa1) {
     case PRIV_XSCH:
@@ -537,11 +510,9 @@ static int handle_priv(S390CPU *cpu, struct kvm_run *run,
 
 static int handle_hypercall(S390CPU *cpu, struct kvm_run *run)
 {
-    CPUState *cs = CPU(cpu);
     CPUS390XState *env = &cpu->env;
 
-    kvm_s390_get_registers_partial(cs);
-    cs->kvm_vcpu_dirty = true;
+    cpu_synchronize_state(CPU(cpu));
     env->regs[2] = s390_virtio_hypercall(env);
 
     return 0;
@@ -767,8 +738,7 @@ static int handle_tsch(S390CPU *cpu)
     struct kvm_run *run = cs->kvm_run;
     int ret;
 
-    kvm_s390_get_registers_partial(cs);
-    cs->kvm_vcpu_dirty = true;
+    cpu_synchronize_state(cs);
 
     ret = ioinst_handle_tsch(env, env->regs[1], run->s390_tsch.ipb);
     if (ret >= 0) {
diff --git a/translate-all.c b/translate-all.c
index 105c25aff3..543e1ffe77 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -289,17 +289,15 @@ static inline void map_exec(void *addr, long size)
 }
 #endif
 
-static void page_init(void)
+void page_size_init(void)
 {
     /* NOTE: we can always suppose that qemu_host_page_size >=
        TARGET_PAGE_SIZE */
 #ifdef _WIN32
-    {
-        SYSTEM_INFO system_info;
+    SYSTEM_INFO system_info;
 
-        GetSystemInfo(&system_info);
-        qemu_real_host_page_size = system_info.dwPageSize;
-    }
+    GetSystemInfo(&system_info);
+    qemu_real_host_page_size = system_info.dwPageSize;
 #else
     qemu_real_host_page_size = getpagesize();
 #endif
@@ -310,7 +308,11 @@ static void page_init(void)
         qemu_host_page_size = TARGET_PAGE_SIZE;
     }
     qemu_host_page_mask = ~(qemu_host_page_size - 1);
+}
 
+static void page_init(void)
+{
+    page_size_init();
 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
     {
 #ifdef HAVE_KINFO_GETVMMAP
diff --git a/vl.c b/vl.c
index c73462e811..383be1b617 100644
--- a/vl.c
+++ b/vl.c
@@ -170,6 +170,7 @@ int main(int argc, char **argv)
 
 #include "ui/qemu-spice.h"
 #include "qapi/string-input-visitor.h"
+#include "qom/object_interfaces.h"
 
 //#define DEBUG_NET
 //#define DEBUG_SLIRP
@@ -2800,6 +2801,7 @@ static int object_create(QemuOpts *opts, void *opaque)
 {
     const char *type = qemu_opt_get(opts, "qom-type");
     const char *id = qemu_opts_id(opts);
+    Error *local_err = NULL;
     Object *obj;
 
     g_assert(type != NULL);
@@ -2815,9 +2817,27 @@ static int object_create(QemuOpts *opts, void *opaque)
         return -1;
     }
 
+    if (!object_dynamic_cast(obj, TYPE_USER_CREATABLE)) {
+        error_setg(&local_err, "object '%s' isn't supported by -object",
+                   id);
+        goto out;
+    }
+
+    user_creatable_complete(obj, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
     object_property_add_child(container_get(object_get_root(), "/objects"),
-                              id, obj, NULL);
+                              id, obj, &local_err);
+
+out:
     object_unref(obj);
+    if (local_err) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return -1;
+    }
     return 0;
 }