250 files changed, 12016 insertions, 4627 deletions
diff --git a/Kconfig.host b/Kconfig.host
index d763d89269..2ee71578f3 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -46,3 +46,6 @@ config FUZZ
 config VFIO_USER_SERVER_ALLOWED
     bool
     imply VFIO_USER_SERVER
+
+config HV_BALLOON_POSSIBLE
+    bool
diff --git a/MAINTAINERS b/MAINTAINERS
index 4fad272d73..b86ea7f75a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -323,7 +323,7 @@ RISC-V TCG CPUs
 M: Palmer Dabbelt <palmer@dabbelt.com>
 M: Alistair Francis <alistair.francis@wdc.com>
 M: Bin Meng <bin.meng@windriver.com>
-R: Weiwei Li <liweiwei@iscas.ac.cn>
+R: Weiwei Li <liwei1518@gmail.com>
 R: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
 R: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
 L: qemu-riscv@nongnu.org
@@ -490,7 +490,7 @@ S: Supported
 F: include/sysemu/kvm_xen.h
 F: target/i386/kvm/xen*
 F: hw/i386/kvm/xen*
-F: tests/avocado/xen_guest.py
+F: tests/avocado/kvm_xen_guest.py
 
 Guest CPU Cores (other accelerators)
 ------------------------------------
@@ -2665,6 +2665,14 @@ F: hw/usb/canokey.c
 F: hw/usb/canokey.h
 F: docs/system/devices/canokey.rst
 
+Hyper-V Dynamic Memory Protocol
+M: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+S: Supported
+F: hw/hyperv/hv-balloon*.c
+F: hw/hyperv/hv-balloon*.h
+F: include/hw/hyperv/dynmem-proto.h
+F: include/hw/hyperv/hv-balloon.h
+
 Subsystems
 ----------
 Overall Audio backends
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index b8c5e345b8..f35c5f359b 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -321,21 +321,6 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
     }
 }
 
-void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
-{
-    CPUState *cpu;
-    size_t full = 0, part = 0, elide = 0;
-
-    CPU_FOREACH(cpu) {
-        full += qatomic_read(&cpu->neg.tlb.c.full_flush_count);
-        part += qatomic_read(&cpu->neg.tlb.c.part_flush_count);
-        elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count);
-    }
-    *pfull = full;
-    *ppart = part;
-    *pelide = elide;
-}
-
 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     uint16_t asked = data.host_int;
@@ -2706,7 +2691,7 @@ static uint64_t do_st16_leN(CPUState *cpu, MMULookupPageData *p,
 
     case MO_ATOM_WITHIN16_PAIR:
         /* Since size > 8, this is the half that must be atomic. */
-        if (!HAVE_ATOMIC128_RW) {
+        if (!HAVE_CMPXCHG128) {
             cpu_loop_exit_atomic(cpu, ra);
         }
         return store_whole_le16(p->haddr, p->size, val_le);
diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h
index 3b2277e6e9..edefd0dcb7 100644
--- a/accel/tcg/internal-common.h
+++ b/accel/tcg/internal-common.h
@@ -14,8 +14,6 @@
 extern int64_t max_delay;
 extern int64_t max_advance;
 
-void dump_exec_info(GString *buf);
-
 /*
  * Return true if CS is not running in parallel with other cpus, either
  * because there are no other cpus or we are within an exclusive context.
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index 1cf5b92166..33a04dec52 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -825,7 +825,7 @@ static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
     int sh = o * 8;
     Int128 m, v;
 
-    qemu_build_assert(HAVE_ATOMIC128_RW);
+    qemu_build_assert(HAVE_CMPXCHG128);
 
     /* Like MAKE_64BIT_MASK(0, sz), but larger. */
     if (sz <= 64) {
@@ -887,7 +887,7 @@ static void store_atom_2(CPUState *cpu, uintptr_t ra,
             return;
         }
     } else if ((pi & 15) == 7) {
-        if (HAVE_ATOMIC128_RW) {
+        if (HAVE_CMPXCHG128) {
             Int128 v = int128_lshift(int128_make64(val), 56);
             Int128 m = int128_lshift(int128_make64(0xffff), 56);
             store_atom_insert_al16(pv - 7, v, m);
@@ -956,7 +956,7 @@ static void store_atom_4(CPUState *cpu, uintptr_t ra,
                 return;
             }
         } else {
-            if (HAVE_ATOMIC128_RW) {
+            if (HAVE_CMPXCHG128) {
                 store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
                 return;
             }
@@ -1021,7 +1021,7 @@ static void store_atom_8(CPUState *cpu, uintptr_t ra,
         }
         break;
     case MO_64:
-        if (HAVE_ATOMIC128_RW) {
+        if (HAVE_CMPXCHG128) {
             store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
             return;
         }
@@ -1076,7 +1076,7 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra,
         }
         break;
     case -MO_64:
-        if (HAVE_ATOMIC128_RW) {
+        if (HAVE_CMPXCHG128) {
             uint64_t val_le;
             int s2 = pi & 15;
             int s1 = 16 - s2;
@@ -1103,10 +1103,6 @@ static void store_atom_16(CPUState *cpu, uintptr_t ra,
         }
         break;
     case MO_128:
-        if (HAVE_ATOMIC128_RW) {
-            atomic16_set(pv, val);
-            return;
-        }
         break;
     default:
         g_assert_not_reached();
diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index caf1189e0b..093efe9714 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/qht.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -17,6 +18,7 @@
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
 #include "internal-common.h"
+#include "tb-context.h"
 
 
 static void dump_drift_info(GString *buf)
@@ -50,6 +52,153 @@ static void dump_accel_info(GString *buf)
                            one_insn_per_tb ? "on" : "off");
 }
 
+static void print_qht_statistics(struct qht_stats hst, GString *buf)
+{
+    uint32_t hgram_opts;
+    size_t hgram_bins;
+    char *hgram;
+
+    if (!hst.head_buckets) {
+        return;
+    }
+    g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
+                           "(%0.2f%% head buckets used)\n",
+                           hst.used_head_buckets, hst.head_buckets,
+                           (double)hst.used_head_buckets /
+                           hst.head_buckets * 100);
+
+    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
+    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
+    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
+        hgram_opts |= QDIST_PR_NODECIMAL;
+    }
+    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
+    g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
+                           "Histogram: %s\n",
+                           qdist_avg(&hst.occupancy) * 100, hgram);
+    g_free(hgram);
+
+    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
+    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
+    if (hgram_bins > 10) {
+        hgram_bins = 10;
+    } else {
+        hgram_bins = 0;
+        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
+    }
+    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
+    g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
+                           "Histogram: %s\n",
+                           qdist_avg(&hst.chain), hgram);
+    g_free(hgram);
+}
+
+struct tb_tree_stats {
+    size_t nb_tbs;
+    size_t host_size;
+    size_t target_size;
+    size_t max_target_size;
+    size_t direct_jmp_count;
+    size_t direct_jmp2_count;
+    size_t cross_page;
+};
+
+static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
+{
+    const TranslationBlock *tb = value;
+    struct tb_tree_stats *tst = data;
+
+    tst->nb_tbs++;
+    tst->host_size += tb->tc.size;
+    tst->target_size += tb->size;
+    if (tb->size > tst->max_target_size) {
+        tst->max_target_size = tb->size;
+    }
+    if (tb->page_addr[1] != -1) {
+        tst->cross_page++;
+    }
+    if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
+        tst->direct_jmp_count++;
+        if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
+            tst->direct_jmp2_count++;
+        }
+    }
+    return false;
+}
+
+static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
+{
+    CPUState *cpu;
+    size_t full = 0, part = 0, elide = 0;
+
+    CPU_FOREACH(cpu) {
+        full += qatomic_read(&cpu->neg.tlb.c.full_flush_count);
+        part += qatomic_read(&cpu->neg.tlb.c.part_flush_count);
+        elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count);
+    }
+    *pfull = full;
+    *ppart = part;
+    *pelide = elide;
+}
+
+static void tcg_dump_info(GString *buf)
+{
+    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+}
+
+static void dump_exec_info(GString *buf)
+{
+    struct tb_tree_stats tst = {};
+    struct qht_stats hst;
+    size_t nb_tbs, flush_full, flush_part, flush_elide;
+
+    tcg_tb_foreach(tb_tree_stats_iter, &tst);
+    nb_tbs = tst.nb_tbs;
+    /* XXX: avoid using doubles ? */
+    g_string_append_printf(buf, "Translation buffer state:\n");
+    /*
+     * Report total code size including the padding and TB structs;
+     * otherwise users might think "-accel tcg,tb-size" is not honoured.
+     * For avg host size we use the precise numbers from tb_tree_stats though.
+     */
+    g_string_append_printf(buf, "gen code size       %zu/%zu\n",
+                           tcg_code_size(), tcg_code_capacity());
+    g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
+    g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
+                           nb_tbs ? tst.target_size / nb_tbs : 0,
+                           tst.max_target_size);
+    g_string_append_printf(buf, "TB avg host size    %zu bytes "
+                           "(expansion ratio: %0.1f)\n",
+                           nb_tbs ? tst.host_size / nb_tbs : 0,
+                           tst.target_size ?
+                           (double)tst.host_size / tst.target_size : 0);
+    g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
+                           tst.cross_page,
+                           nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
+    g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
+                           "(2 jumps=%zu %zu%%)\n",
+                           tst.direct_jmp_count,
+                           nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
+                           tst.direct_jmp2_count,
+                           nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
+
+    qht_statistics_init(&tb_ctx.htable, &hst);
+    print_qht_statistics(hst, buf);
+    qht_statistics_destroy(&hst);
+
+    g_string_append_printf(buf, "\nStatistics:\n");
+    g_string_append_printf(buf, "TB flush count      %u\n",
+                           qatomic_read(&tb_ctx.tb_flush_count));
+    g_string_append_printf(buf, "TB invalidate count %u\n",
+                           qatomic_read(&tb_ctx.tb_phys_invalidate_count));
+
+    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
+    g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
+    g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
+    g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
+    tcg_dump_info(buf);
+}
+
 HumanReadableText *qmp_x_query_jit(Error **errp)
 {
     g_autoptr(GString) buf = g_string_new("");
@@ -66,6 +215,11 @@ HumanReadableText *qmp_x_query_jit(Error **errp)
     return human_readable_text_from_str(buf);
 }
 
+static void tcg_dump_op_count(GString *buf)
+{
+    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+}
+
 HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
     g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 8cb6ad3511..e579b0891d 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -645,133 +645,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     cpu_loop_exit_noexc(cpu);
 }
 
-static void print_qht_statistics(struct qht_stats hst, GString *buf)
-{
-    uint32_t hgram_opts;
-    size_t hgram_bins;
-    char *hgram;
-
-    if (!hst.head_buckets) {
-        return;
-    }
-    g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
-                           "(%0.2f%% head buckets used)\n",
-                           hst.used_head_buckets, hst.head_buckets,
-                           (double)hst.used_head_buckets /
-                           hst.head_buckets * 100);
-
-    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
-    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
-    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
-        hgram_opts |= QDIST_PR_NODECIMAL;
-    }
-    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
-    g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
-                           "Histogram: %s\n",
-                           qdist_avg(&hst.occupancy) * 100, hgram);
-    g_free(hgram);
-
-    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
-    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
-    if (hgram_bins > 10) {
-        hgram_bins = 10;
-    } else {
-        hgram_bins = 0;
-        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
-    }
-    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
-    g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
-                           "Histogram: %s\n",
-                           qdist_avg(&hst.chain), hgram);
-    g_free(hgram);
-}
-
-struct tb_tree_stats {
-    size_t nb_tbs;
-    size_t host_size;
-    size_t target_size;
-    size_t max_target_size;
-    size_t direct_jmp_count;
-    size_t direct_jmp2_count;
-    size_t cross_page;
-};
-
-static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
-{
-    const TranslationBlock *tb = value;
-    struct tb_tree_stats *tst = data;
-
-    tst->nb_tbs++;
-    tst->host_size += tb->tc.size;
-    tst->target_size += tb->size;
-    if (tb->size > tst->max_target_size) {
-        tst->max_target_size = tb->size;
-    }
-    if (tb_page_addr1(tb) != -1) {
-        tst->cross_page++;
-    }
-    if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
-        tst->direct_jmp_count++;
-        if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
-            tst->direct_jmp2_count++;
-        }
-    }
-    return false;
-}
-
-void dump_exec_info(GString *buf)
-{
-    struct tb_tree_stats tst = {};
-    struct qht_stats hst;
-    size_t nb_tbs, flush_full, flush_part, flush_elide;
-
-    tcg_tb_foreach(tb_tree_stats_iter, &tst);
-    nb_tbs = tst.nb_tbs;
-    /* XXX: avoid using doubles ? */
-    g_string_append_printf(buf, "Translation buffer state:\n");
-    /*
-     * Report total code size including the padding and TB structs;
-     * otherwise users might think "-accel tcg,tb-size" is not honoured.
-     * For avg host size we use the precise numbers from tb_tree_stats though.
-     */
-    g_string_append_printf(buf, "gen code size       %zu/%zu\n",
-                           tcg_code_size(), tcg_code_capacity());
-    g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
-    g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
-                           nb_tbs ? tst.target_size / nb_tbs : 0,
-                           tst.max_target_size);
-    g_string_append_printf(buf, "TB avg host size    %zu bytes "
-                           "(expansion ratio: %0.1f)\n",
-                           nb_tbs ? tst.host_size / nb_tbs : 0,
-                           tst.target_size ?
-                           (double)tst.host_size / tst.target_size : 0);
-    g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
-                           tst.cross_page,
-                           nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
-    g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
-                           "(2 jumps=%zu %zu%%)\n",
-                           tst.direct_jmp_count,
-                           nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
-                           tst.direct_jmp2_count,
-                           nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
-
-    qht_statistics_init(&tb_ctx.htable, &hst);
-    print_qht_statistics(hst, buf);
-    qht_statistics_destroy(&hst);
-
-    g_string_append_printf(buf, "\nStatistics:\n");
-    g_string_append_printf(buf, "TB flush count      %u\n",
-                           qatomic_read(&tb_ctx.tb_flush_count));
-    g_string_append_printf(buf, "TB invalidate count %u\n",
-                           qatomic_read(&tb_ctx.tb_phys_invalidate_count));
-
-    tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
-    g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
-    g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
-    g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
-    tcg_dump_info(buf);
-}
-
 #else /* CONFIG_USER_ONLY */
 
 void cpu_interrupt(CPUState *cpu, int mask)
diff --git a/block/file-posix.c b/block/file-posix.c
index 50e2b20d5c..b862406c71 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -160,7 +160,6 @@ typedef struct BDRVRawState {
     bool has_write_zeroes:1;
     bool use_linux_aio:1;
     bool use_linux_io_uring:1;
-    int64_t *offset; /* offset of zone append operation */
     int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
@@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return true;
 }
 
-static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
                                    uint64_t bytes, QEMUIOVector *qiov, int type)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
     int ret;
+    uint64_t offset = *offset_ptr;
 
     if (fd_open(bs) < 0)
         return -EIO;
@@ -2513,8 +2513,8 @@ out:
             uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
             if (!BDRV_ZT_IS_CONV(*wp)) {
                 if (type & QEMU_AIO_ZONE_APPEND) {
-                    *s->offset = *wp;
-                    trace_zbd_zone_append_complete(bs, *s->offset
+                    *offset_ptr = *wp;
+                    trace_zbd_zone_append_complete(bs, *offset_ptr
                         >> BDRV_SECTOR_BITS);
                 }
                 /* Advance the wp if needed */
@@ -2523,7 +2523,10 @@ out:
                 }
             }
         } else {
-            update_zones_wp(bs, s->fd, 0, 1);
+            /*
+             * write and append write are not allowed to cross zone boundaries
+             */
+            update_zones_wp(bs, s->fd, offset, 1);
         }
 
         qemu_co_mutex_unlock(&wps->colock);
@@ -2536,14 +2539,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
                                       int64_t bytes, QEMUIOVector *qiov,
                                       BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
 }
 
 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
                                        int64_t bytes, QEMUIOVector *qiov,
                                        BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
 }
 
 static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
@@ -3470,7 +3473,7 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
                         len >> BDRV_SECTOR_BITS);
     ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb);
     if (ret != 0) {
-        update_zones_wp(bs, s->fd, offset, i);
+        update_zones_wp(bs, s->fd, offset, nrz);
         error_report("ioctl %s failed %d", op_name, ret);
         return ret;
     }
@@ -3506,8 +3509,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
     int64_t zone_size_mask = bs->bl.zone_size - 1;
     int64_t iov_len = 0;
     int64_t len = 0;
-    BDRVRawState *s = bs->opaque;
-    s->offset = offset;
 
     if (*offset & zone_size_mask) {
         error_report("sector offset %" PRId64 " is not aligned to zone size "
@@ -3528,7 +3529,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
     }
 
     trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
-    return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
 }
 #endif
 
diff --git a/block/nvme.c b/block/nvme.c
index 96b3f8f2fa..0a0a0a6b36 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -417,9 +417,10 @@ static bool nvme_process_completion(NVMeQueuePair *q)
             q->cq_phase = !q->cq_phase;
         }
         cid = le16_to_cpu(c->cid);
-        if (cid == 0 || cid > NVME_QUEUE_SIZE) {
-            warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
-                        "queue size: %u", cid, NVME_QUEUE_SIZE);
+        if (cid == 0 || cid > NVME_NUM_REQS) {
+            warn_report("NVMe: Unexpected CID in completion queue: %" PRIu32
+                        ", should be within: 1..%u inclusively", cid,
+                        NVME_NUM_REQS);
             continue;
         }
         trace_nvme_complete_command(s, q->index, cid);
diff --git a/block/parallels-ext.c b/block/parallels-ext.c
index 8a109f005a..4d8ecf5047 100644
--- a/block/parallels-ext.c
+++ b/block/parallels-ext.c
@@ -130,7 +130,7 @@ static BdrvDirtyBitmap *parallels_load_bitmap(BlockDriverState *bs,
     g_autofree uint64_t *l1_table = NULL;
     BdrvDirtyBitmap *bitmap;
     QemuUUID uuid;
-    char uuidstr[UUID_FMT_LEN + 1];
+    char uuidstr[UUID_STR_LEN];
     int i;
 
     if (data_size < sizeof(bf)) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 904f00d1b3..5af439bd11 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1983,7 +1983,7 @@ discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
             /* If we keep the reference, pass on the discard still */
             bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
                           s->cluster_size);
-       }
+        }
     }
 
     qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
@@ -2061,9 +2061,15 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
         QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
         bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
             ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
-        uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
+        bool keep_reference =
+            (s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
+        uint64_t new_l2_entry = old_l2_entry;
         uint64_t new_l2_bitmap = old_l2_bitmap;
 
+        if (unmap && !keep_reference) {
+            new_l2_entry = 0;
+        }
+
         if (has_subclusters(s)) {
             new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
         } else {
@@ -2081,9 +2087,17 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
             set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
         }
 
-        /* Then decrease the refcount */
         if (unmap) {
-            qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+            if (!keep_reference) {
+                /* Then decrease the refcount */
+                qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+            } else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
+                       (type == QCOW2_CLUSTER_NORMAL ||
+                        type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+                /* If we keep the reference, pass on the discard still */
+                bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
+                            s->cluster_size);
+            }
         }
     }
 
diff --git a/block/vdi.c b/block/vdi.c
index c647d72895..7cfd12b50d 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -239,7 +239,7 @@ static void vdi_header_to_le(VdiHeader *header)
 
 static void vdi_header_print(VdiHeader *header)
 {
-    char uuidstr[37];
+    char uuidstr[UUID_STR_LEN];
     QemuUUID uuid;
     logout("text        %s", header->text);
     logout("signature   0x%08x\n", header->signature);
diff --git a/blockdev.c b/blockdev.c
index 1517dc6210..e9b7e38dc4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -255,13 +255,13 @@ void drive_check_orphaned(void)
          * Ignore default drives, because we create certain default
          * drives unconditionally, then leave them unclaimed.  Not the
          * users fault.
-         * Ignore IF_VIRTIO, because it gets desugared into -device,
-         * so we can leave failing to -device.
+         * Ignore IF_VIRTIO or IF_XEN, because it gets desugared into
+         * -device, so we can leave failing to -device.
          * Ignore IF_NONE, because leaving unclaimed IF_NONE remains
          * available for device_add is a feature.
          */
         if (dinfo->is_default || dinfo->type == IF_VIRTIO
-            || dinfo->type == IF_NONE) {
+            || dinfo->type == IF_XEN || dinfo->type == IF_NONE) {
             continue;
         }
         if (!blk_get_attached_dev(blk)) {
@@ -977,6 +977,15 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
         qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
         qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
                      &error_abort);
+    } else if (type == IF_XEN) {
+        QemuOpts *devopts;
+        devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
+                                   &error_abort);
+        qemu_opt_set(devopts, "driver",
+                     (media == MEDIA_CDROM) ? "xen-cdrom" : "xen-disk",
+                     &error_abort);
+        qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
+                     &error_abort);
     }
 
     filename = qemu_opt_get(legacy_opts, "file");
diff --git a/configs/targets/hppa-linux-user.mak b/configs/targets/hppa-linux-user.mak
index 361ea39d71..8e0a80492f 100644
--- a/configs/targets/hppa-linux-user.mak
+++ b/configs/targets/hppa-linux-user.mak
@@ -1,4 +1,5 @@
 TARGET_ARCH=hppa
+TARGET_ABI32=y
 TARGET_SYSTBL_ABI=common,32
 TARGET_SYSTBL=syscall.tbl
 TARGET_BIG_ENDIAN=y
diff --git a/disas/riscv.c b/disas/riscv.c
index 8e89e1d115..e9458e574b 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -862,6 +862,47 @@ typedef enum {
     rv_op_fltq_q = 831,
     rv_op_fleq_h = 832,
     rv_op_fltq_h = 833,
+    rv_op_vaesdf_vv = 834,
+    rv_op_vaesdf_vs = 835,
+    rv_op_vaesdm_vv = 836,
+    rv_op_vaesdm_vs = 837,
+    rv_op_vaesef_vv = 838,
+    rv_op_vaesef_vs = 839,
+    rv_op_vaesem_vv = 840,
+    rv_op_vaesem_vs = 841,
+    rv_op_vaeskf1_vi = 842,
+    rv_op_vaeskf2_vi = 843,
+    rv_op_vaesz_vs = 844,
+    rv_op_vandn_vv = 845,
+    rv_op_vandn_vx = 846,
+    rv_op_vbrev_v = 847,
+    rv_op_vbrev8_v = 848,
+    rv_op_vclmul_vv = 849,
+    rv_op_vclmul_vx = 850,
+    rv_op_vclmulh_vv = 851,
+    rv_op_vclmulh_vx = 852,
+    rv_op_vclz_v = 853,
+    rv_op_vcpop_v = 854,
+    rv_op_vctz_v = 855,
+    rv_op_vghsh_vv = 856,
+    rv_op_vgmul_vv = 857,
+    rv_op_vrev8_v = 858,
+    rv_op_vrol_vv = 859,
+    rv_op_vrol_vx = 860,
+    rv_op_vror_vv = 861,
+    rv_op_vror_vx = 862,
+    rv_op_vror_vi = 863,
+    rv_op_vsha2ch_vv = 864,
+    rv_op_vsha2cl_vv = 865,
+    rv_op_vsha2ms_vv = 866,
+    rv_op_vsm3c_vi = 867,
+    rv_op_vsm3me_vv = 868,
+    rv_op_vsm4k_vi = 869,
+    rv_op_vsm4r_vv = 870,
+    rv_op_vsm4r_vs = 871,
+    rv_op_vwsll_vv = 872,
+    rv_op_vwsll_vx = 873,
+    rv_op_vwsll_vi = 874,
 } rv_op;
 
 /* register names */
@@ -2008,6 +2049,47 @@ const rv_opcode_data rvi_opcode_data[] = {
     { "fltq.q", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
     { "fleq.h", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
     { "fltq.h", rv_codec_r, rv_fmt_rd_frs1_frs2, NULL, 0, 0, 0 },
+    { "vaesdf.vv", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesdf.vs", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesdm.vv", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesdm.vs", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesef.vv", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesef.vs", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesem.vv", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaesem.vs", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vaeskf1.vi", rv_codec_v_i, rv_fmt_vd_vs2_uimm, NULL, 0, 0, 0 },
+    { "vaeskf2.vi", rv_codec_v_i, rv_fmt_vd_vs2_uimm, NULL, 0, 0, 0 },
+    { "vaesz.vs", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vandn.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1_vm, NULL, 0, 0, 0 },
+    { "vandn.vx", rv_codec_v_r, rv_fmt_vd_vs2_rs1_vm, NULL, 0, 0, 0 },
+    { "vbrev.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vbrev8.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vclmul.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1_vm, NULL, 0, 0, 0 },
+    { "vclmul.vx", rv_codec_v_r, rv_fmt_vd_vs2_rs1_vm, NULL, 0, 0, 0 },
+    { "vclmulh.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1_vm, NULL, 0, 0, 0 },
+    { "vclmulh.vx", rv_codec_v_r, rv_fmt_vd_vs2_rs1_vm, NULL, 0, 0, 0 },
+    { "vclz.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vcpop.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vctz.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vghsh.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1, NULL, 0, 0, 0 },
+    { "vgmul.vv", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vrev8.v", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, 0, 0, 0 },
+    { "vrol.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1_vm, NULL, 0, 0, 0 },
+    { "vrol.vx", rv_codec_v_r, rv_fmt_vd_vs2_rs1_vm, NULL, 0, 0, 0 },
+    { "vror.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1_vm, NULL, 0, 0, 0 },
+    { "vror.vx", rv_codec_v_r, rv_fmt_vd_vs2_rs1_vm, NULL, 0, 0, 0 },
+    { "vror.vi", rv_codec_vror_vi, rv_fmt_vd_vs2_uimm_vm, NULL, 0, 0, 0 },
+    { "vsha2ch.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1, NULL, 0, 0, 0 },
+    { "vsha2cl.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1, NULL, 0, 0, 0 },
+    { "vsha2ms.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1, NULL, 0, 0, 0 },
+    { "vsm3c.vi", rv_codec_v_i, rv_fmt_vd_vs2_uimm, NULL, 0, 0, 0 },
+    { "vsm3me.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1, NULL, 0, 0, 0 },
+    { "vsm4k.vi", rv_codec_v_i, rv_fmt_vd_vs2_uimm, NULL, 0, 0, 0 },
+    { "vsm4r.vv", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vsm4r.vs", rv_codec_v_r, rv_fmt_vd_vs2, NULL, 0, 0, 0 },
+    { "vwsll.vv", rv_codec_v_r, rv_fmt_vd_vs2_vs1_vm, NULL, 0, 0, 0 },
+    { "vwsll.vx", rv_codec_v_r, rv_fmt_vd_vs2_rs1_vm, NULL, 0, 0, 0 },
+    { "vwsll.vi", rv_codec_v_i, rv_fmt_vd_vs2_uimm_vm, NULL, 0, 0, 0 },
 };
 
 /* CSR names */
@@ -3054,12 +3136,12 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 }
                 break;
             case 89:
-		switch (((inst >> 12) & 0b111)) {
+                switch (((inst >> 12) & 0b111)) {
                 case 0: op = rv_op_fmvp_d_x; break;
                 }
                 break;
             case 91:
-		switch (((inst >> 12) & 0b111)) {
+                switch (((inst >> 12) & 0b111)) {
                 case 0: op = rv_op_fmvp_q_x; break;
                 }
                 break;
@@ -3176,6 +3258,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             case 0:
                 switch ((inst >> 26) & 0b111111) {
                 case 0: op = rv_op_vadd_vv; break;
+                case 1: op = rv_op_vandn_vv; break;
                 case 2: op = rv_op_vsub_vv; break;
                 case 4: op = rv_op_vminu_vv; break;
                 case 5: op = rv_op_vmin_vv; break;
@@ -3198,6 +3281,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                     }
                     break;
                 case 19: op = rv_op_vmsbc_vvm; break;
+                case 20: op = rv_op_vror_vv; break;
+                case 21: op = rv_op_vrol_vv; break;
                 case 23:
                     if (((inst >> 20) & 0b111111) == 32)
                         op = rv_op_vmv_v_v;
@@ -3226,6 +3311,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 47: op = rv_op_vnclip_wv; break;
                 case 48: op = rv_op_vwredsumu_vs; break;
                 case 49: op = rv_op_vwredsum_vs; break;
+                case 53: op = rv_op_vwsll_vv; break;
                 }
                 break;
             case 1:
@@ -3323,6 +3409,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 9: op = rv_op_vaadd_vv; break;
                 case 10: op = rv_op_vasubu_vv; break;
                 case 11: op = rv_op_vasub_vv; break;
+                case 12: op = rv_op_vclmul_vv; break;
+                case 13: op = rv_op_vclmulh_vv; break;
                 case 16:
                     switch ((inst >> 15) & 0b11111) {
                     case 0: if ((inst >> 25) & 1) op = rv_op_vmv_x_s; break;
@@ -3338,6 +3426,12 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                     case 5: op = rv_op_vsext_vf4; break;
                     case 6: op = rv_op_vzext_vf2; break;
                     case 7: op = rv_op_vsext_vf2; break;
+                    case 8: op = rv_op_vbrev8_v; break;
+                    case 9: op = rv_op_vrev8_v; break;
+                    case 10: op = rv_op_vbrev_v; break;
+                    case 12: op = rv_op_vclz_v; break;
+                    case 13: op = rv_op_vctz_v; break;
+                    case 14: op = rv_op_vcpop_v; break;
                     }
                     break;
                 case 20:
@@ -3406,6 +3500,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                     }
                     break;
                 case 17: op = rv_op_vmadc_vim; break;
+                case 20: case 21: op = rv_op_vror_vi; break;
                 case 23:
                     if (((inst >> 20) & 0b111111) == 32)
                         op = rv_op_vmv_v_i;
@@ -3437,11 +3532,13 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 45: op = rv_op_vnsra_wi; break;
                 case 46: op = rv_op_vnclipu_wi; break;
                 case 47: op = rv_op_vnclip_wi; break;
+                case 53: op = rv_op_vwsll_vi; break;
                 }
                 break;
             case 4:
                 switch ((inst >> 26) & 0b111111) {
                 case 0: op = rv_op_vadd_vx; break;
+                case 1: op = rv_op_vandn_vx; break;
                 case 2: op = rv_op_vsub_vx; break;
                 case 3: op = rv_op_vrsub_vx; break;
                 case 4: op = rv_op_vminu_vx; break;
@@ -3466,6 +3563,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                     }
                     break;
                 case 19: op = rv_op_vmsbc_vxm; break;
+                case 20: op = rv_op_vror_vx; break;
+                case 21: op = rv_op_vrol_vx; break;
                 case 23:
                     if (((inst >> 20) & 0b111111) == 32)
                         op = rv_op_vmv_v_x;
@@ -3494,6 +3593,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 45: op = rv_op_vnsra_wx; break;
                 case 46: op = rv_op_vnclipu_wx; break;
                 case 47: op = rv_op_vnclip_wx; break;
+                case 53: op = rv_op_vwsll_vx; break;
                 }
                 break;
             case 5:
@@ -3554,6 +3654,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
                 case 9: op = rv_op_vaadd_vx; break;
                 case 10: op = rv_op_vasubu_vx; break;
                 case 11: op = rv_op_vasub_vx; break;
+                case 12: op = rv_op_vclmul_vx; break;
+                case 13: op = rv_op_vclmulh_vx; break;
                 case 14: op = rv_op_vslide1up_vx; break;
                 case 15: op = rv_op_vslide1down_vx; break;
                 case 16:
@@ -3686,6 +3788,41 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa)
             case 7: op = rv_op_csrrci; break;
             }
             break;
+        case 29:
+            if (((inst >> 25) & 1) == 1 && ((inst >> 12) & 0b111) == 2) {
+                switch ((inst >> 26) & 0b111111) {
+                case 32: op = rv_op_vsm3me_vv; break;
+                case 33: op = rv_op_vsm4k_vi; break;
+                case 34: op = rv_op_vaeskf1_vi; break;
+                case 40:
+                    switch ((inst >> 15) & 0b11111) {
+                    case 0: op = rv_op_vaesdm_vv; break;
+                    case 1: op = rv_op_vaesdf_vv; break;
+                    case 2: op = rv_op_vaesem_vv; break;
+                    case 3: op = rv_op_vaesef_vv; break;
+                    case 16: op = rv_op_vsm4r_vv; break;
+                    case 17: op = rv_op_vgmul_vv; break;
+                    }
+                    break;
+                case 41:
+                    switch ((inst >> 15) & 0b11111) {
+                    case 0: op = rv_op_vaesdm_vs; break;
+                    case 1: op = rv_op_vaesdf_vs; break;
+                    case 2: op = rv_op_vaesem_vs; break;
+                    case 3: op = rv_op_vaesef_vs; break;
+                    case 7: op = rv_op_vaesz_vs; break;
+                    case 16: op = rv_op_vsm4r_vs; break;
+                    }
+                    break;
+                case 42: op = rv_op_vaeskf2_vi; break;
+                case 43: op = rv_op_vsm3c_vi; break;
+                case 44: op = rv_op_vghsh_vv; break;
+                case 45: op = rv_op_vsha2ms_vv; break;
+                case 46: op = rv_op_vsha2ch_vv; break;
+                case 47: op = rv_op_vsha2cl_vv; break;
+                }
+            }
+            break;
         case 30:
             switch (((inst >> 22) & 0b1111111000) |
                     ((inst >> 12) & 0b0000000111)) {
@@ -4011,6 +4148,12 @@ static uint32_t operand_vzimm10(rv_inst inst)
     return (inst << 34) >> 54;
 }
 
+static uint32_t operand_vzimm6(rv_inst inst)
+{
+    return ((inst << 37) >> 63) << 5 |
+        ((inst << 44) >> 59);
+}
+
 static uint32_t operand_bs(rv_inst inst)
 {
     return (inst << 32) >> 62;
@@ -4393,6 +4536,12 @@ static void decode_inst_operands(rv_decode *dec, rv_isa isa)
         dec->imm = operand_vimm(inst);
         dec->vm = operand_vm(inst);
         break;
+    case rv_codec_vror_vi:
+        dec->rd = operand_rd(inst);
+        dec->rs2 = operand_rs2(inst);
+        dec->imm = operand_vzimm6(inst);
+        dec->vm = operand_vm(inst);
+        break;
     case rv_codec_vsetvli:
         dec->rd = operand_rd(inst);
         dec->rs1 = operand_rs1(inst);
@@ -4430,7 +4579,7 @@ static void decode_inst_operands(rv_decode *dec, rv_isa isa)
         break;
     case rv_codec_zcmt_jt:
         dec->imm = operand_tbl_index(inst);
-	break;
+        break;
     case rv_codec_fli:
         dec->rd = operand_rd(inst);
         dec->imm = operand_rs1(inst);
@@ -4677,7 +4826,7 @@ static void format_inst(char *buf, size_t buflen, size_t tab, rv_decode *dec)
             append(buf, tmp, buflen);
             break;
         case 'u':
-            snprintf(tmp, sizeof(tmp), "%u", ((uint32_t)dec->imm & 0b11111));
+            snprintf(tmp, sizeof(tmp), "%u", ((uint32_t)dec->imm & 0b111111));
             append(buf, tmp, buflen);
             break;
         case 'j':
diff --git a/disas/riscv.h b/disas/riscv.h
index 8abb578b51..19e5ed2ce6 100644
--- a/disas/riscv.h
+++ b/disas/riscv.h
@@ -152,6 +152,7 @@ typedef enum {
     rv_codec_v_i,
     rv_codec_vsetvli,
     rv_codec_vsetivli,
+    rv_codec_vror_vi,
     rv_codec_zcb_ext,
     rv_codec_zcb_mul,
     rv_codec_zcb_lb,
@@ -274,6 +275,7 @@ enum {
 #define rv_fmt_vd_vs2_fs1_vm          "O\tD,F,4m"
 #define rv_fmt_vd_vs2_imm_vl          "O\tD,F,il"
 #define rv_fmt_vd_vs2_imm_vm          "O\tD,F,im"
+#define rv_fmt_vd_vs2_uimm            "O\tD,F,u"
 #define rv_fmt_vd_vs2_uimm_vm         "O\tD,F,um"
 #define rv_fmt_vd_vs1_vs2_vm          "O\tD,E,Fm"
 #define rv_fmt_vd_rs1_vs2_vm          "O\tD,1,Fm"
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index ecccd5d3fc..78550c07bf 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -413,6 +413,18 @@ Specifying the iSCSI password in plain text on the command line using the
 used instead, to refer to a ``--object secret...`` instance that provides
 a password via a file, or encrypted.
 
+CPU device properties
+'''''''''''''''''''''
+
+``pmu-num=n`` on RISC-V CPUs (since 8.2)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In order to support more flexible counter configurations this has been replaced
+by a ``pmu-mask`` property. If set of counters is continuous then the mask can
+be calculated with ``((2 ^ n) - 1) << 3``. The least significant three bits
+must be left clear.
+
+
 Backwards compatibility
 -----------------------
 
diff --git a/docs/devel/index-api.rst b/docs/devel/index-api.rst
index 539ad29c21..fe01b2b488 100644
--- a/docs/devel/index-api.rst
+++ b/docs/devel/index-api.rst
@@ -11,6 +11,7 @@ generated from in-code annotations to function prototypes.
    loads-stores
    memory
    modules
+   pci
    qom-api
    qdev-api
    ui
diff --git a/docs/devel/pci.rst b/docs/devel/pci.rst
new file mode 100644
index 0000000000..68739334f3
--- /dev/null
+++ b/docs/devel/pci.rst
@@ -0,0 +1,8 @@
+=============
+PCI subsystem
+=============
+
+API Reference
+-------------
+
+.. kernel-doc:: include/hw/pci/pci.h
diff --git a/docs/system/arm/vexpress.rst b/docs/system/arm/vexpress.rst
index 3e3839e923..38f29c73e7 100644
--- a/docs/system/arm/vexpress.rst
+++ b/docs/system/arm/vexpress.rst
@@ -58,6 +58,9 @@ Other differences between the hardware and the QEMU model:
   ``vexpress-a15``, and have IRQs from 40 upwards. If a dtb is
   provided on the command line then QEMU will edit it to include
   suitable entries describing these transports for the guest.
+- QEMU does not currently support either dynamic or static remapping
+  of the area of memory at address 0: it is always mapped to alias
+  the first flash bank
 
 Booting a Linux kernel
 ----------------------
diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst
index f06765e88c..81898768ba 100644
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -15,46 +15,24 @@ Setup
 -----
 
 Xen mode is enabled by setting the ``xen-version`` property of the KVM
-accelerator, for example for Xen 4.10:
+accelerator, for example for Xen 4.17:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split
+  |qemu_system| --accel kvm,xen-version=0x40011,kernel-irqchip=split
 
 Additionally, virtual APIC support can be advertised to the guest through the
 ``xen-vapic`` CPU flag:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split --cpu host,+xen_vapic
+  |qemu_system| --accel kvm,xen-version=0x40011,kernel-irqchip=split --cpu host,+xen-vapic
 
 When Xen support is enabled, QEMU changes hypervisor identification (CPUID
 0x40000000..0x4000000A) to Xen. The KVM identification and features are not
 advertised to a Xen guest. If Hyper-V is also enabled, the Xen identification
 moves to leaves 0x40000100..0x4000010A.
 
-The Xen platform device is enabled automatically for a Xen guest. This allows
-a guest to unplug all emulated devices, in order to use Xen PV block and network
-drivers instead. Under Xen, the boot disk is typically available both via IDE
-emulation, and as a PV block device. Guest bootloaders typically use IDE to load
-the guest kernel, which then unplugs the IDE and continues with the Xen PV block
-device.
-
-This configuration can be achieved as follows
-
-.. parsed-literal::
-
-  |qemu_system| -M pc --accel kvm,xen-version=0x4000a,kernel-irqchip=split \\
-       -drive file=${GUEST_IMAGE},if=none,id=disk,file.locking=off -device xen-disk,drive=disk,vdev=xvda \\
-       -drive file=${GUEST_IMAGE},index=2,media=disk,file.locking=off,if=ide
-
-It is necessary to use the pc machine type, as the q35 machine uses AHCI instead
-of legacy IDE, and AHCI disks are not unplugged through the Xen PV unplug
-mechanism.
-
-VirtIO devices can also be used; Linux guests may need to be dissuaded from
-umplugging them by adding 'xen_emul_unplug=never' on their command line.
-
 Properties
 ----------
 
@@ -63,7 +41,10 @@ The following properties exist on the KVM accelerator object:
 ``xen-version``
   This property contains the Xen version in ``XENVER_version`` form, with the
   major version in the top 16 bits and the minor version in the low 16 bits.
-  Setting this property enables the Xen guest support.
+  Setting this property enables the Xen guest support. If Xen version 4.5 or
+  greater is specified, the HVM leaf in Xen CPUID is populated. Xen version
+  4.6 enables the vCPU ID in CPUID, and version 4.17 advertises vCPU upcall
+  vector support to the guest.
 
 ``xen-evtchn-max-pirq``
   Xen PIRQs represent an emulated physical interrupt, either GSI or MSI, which
@@ -83,8 +64,78 @@ The following properties exist on the KVM accelerator object:
   through simultaneous grants. For guests with large numbers of PV devices and
   high throughput, it may be desirable to increase this value.
 
-OS requirements
----------------
+Xen paravirtual devices
+-----------------------
+
+The Xen PCI platform device is enabled automatically for a Xen guest. This
+allows a guest to unplug all emulated devices, in order to use paravirtual
+block and network drivers instead.
+
+Those paravirtual Xen block, network (and console) devices can be created
+through the command line, and/or hot-plugged.
+
+To provide a Xen console device, define a character device and then a device
+of type ``xen-console`` to connect to it. For the Xen console equivalent of
+the handy ``-serial mon:stdio`` option, for example:
+
+.. parsed-literal::
+   -chardev stdio,mux=on,id=char0,signal=off -mon char0 \\
+   -device xen-console,chardev=char0
+
+The Xen network device is ``xen-net-device``, which becomes the default NIC
+model for emulated Xen guests, meaning that just the default NIC provided
+by QEMU should automatically work and present a Xen network device to the
+guest.
+
+Disks can be configured with '``-drive file=${GUEST_IMAGE},if=xen``' and will
+appear to the guest as ``xvda`` onwards.
+
+Under Xen, the boot disk is typically available both via IDE emulation, and
+as a PV block device. Guest bootloaders typically use IDE to load the guest
+kernel, which then unplugs the IDE and continues with the Xen PV block device.
+
+This configuration can be achieved as follows:
+
+.. parsed-literal::
+
+  |qemu_system| --accel kvm,xen-version=0x40011,kernel-irqchip=split \\
+       -drive file=${GUEST_IMAGE},if=xen \\
+       -drive file=${GUEST_IMAGE},file.locking=off,if=ide
+
+VirtIO devices can also be used; Linux guests may need to be dissuaded from
+umplugging them by adding '``xen_emul_unplug=never``' on their command line.
+
+Booting Xen PV guests
+---------------------
+
+Booting PV guest kernels is possible by using the Xen PV shim (a version of Xen
+itself, designed to run inside a Xen HVM guest and provide memory management
+services for one guest alone).
+
+The Xen binary is provided as the ``-kernel`` and the guest kernel itself (or
+PV Grub image) as the ``-initrd`` image, which actually just means the first
+multiboot "module". For example:
+
+.. parsed-literal::
+
+  |qemu_system| --accel kvm,xen-version=0x40011,kernel-irqchip=split \\
+       -chardev stdio,id=char0 -device xen-console,chardev=char0 \\
+       -display none  -m 1G  -kernel xen -initrd bzImage \\
+       -append "pv-shim console=xen,pv -- console=hvc0 root=/dev/xvda1" \\
+       -drive file=${GUEST_IMAGE},if=xen
+
+The Xen image must be built with the ``CONFIG_XEN_GUEST`` and ``CONFIG_PV_SHIM``
+options, and as of Xen 4.17, Xen's PV shim mode does not support using a serial
+port; it must have a Xen console or it will panic.
+
+The example above provides the guest kernel command line after a separator
+(" ``--`` ") on the Xen command line, and does not provide the guest kernel
+with an actual initramfs, which would need to listed as a second multiboot
+module. For more complicated alternatives, see the command line
+documentation for the ``-initrd`` option.
+
+Host OS requirements
+--------------------
 
 The minimal Xen support in the KVM accelerator requires the host to be running
 Linux v5.12 or newer. Later versions add optimisations: Linux v5.17 added
diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
index f9a2eac544..f5fa7b8b29 100644
--- a/docs/system/riscv/virt.rst
+++ b/docs/system/riscv/virt.rst
@@ -12,7 +12,7 @@ Supported devices
 
 The ``virt`` machine supports the following devices:
 
-* Up to 8 generic RV32GC/RV64GC cores, with optional extensions
+* Up to 512 generic RV32GC/RV64GC cores, with optional extensions
 * Core Local Interruptor (CLINT)
 * Platform-Level Interrupt Controller (PLIC)
 * CFI parallel NOR flash memory
diff --git a/host/include/loongarch64/host/atomic128-ldst.h b/host/include/loongarch64/host/atomic128-ldst.h
new file mode 100644
index 0000000000..9a4a8f8b9e
--- /dev/null
+++ b/host/include/loongarch64/host/atomic128-ldst.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, LoongArch version.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef LOONGARCH_ATOMIC128_LDST_H
+#define LOONGARCH_ATOMIC128_LDST_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+#define HAVE_ATOMIC128_RO  likely(cpuinfo & CPUINFO_LSX)
+#define HAVE_ATOMIC128_RW  HAVE_ATOMIC128_RO
+
+/*
+ * As of gcc 13 and clang 16, there is no compiler support for LSX at all.
+ * Use inline assembly throughout.
+ */
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vld $vr0, %2, 0\n\t"
+        "vpickve2gr.d %0, $vr0, 0\n\t"
+        "vpickve2gr.d %1, $vr0, 1"
+	: "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+
+    return int128_make128(l, h);
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    return atomic16_read_ro(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l = int128_getlo(val), h = int128_gethi(val);
+
+    tcg_debug_assert(HAVE_ATOMIC128_RW);
+    asm("vinsgr2vr.d $vr0, %1, 0\n\t"
+        "vinsgr2vr.d $vr0, %2, 1\n\t"
+        "vst $vr0, %3, 0"
+	: "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+}
+
+#endif /* LOONGARCH_ATOMIC128_LDST_H */
diff --git a/host/include/loongarch64/host/cpuinfo.h b/host/include/loongarch64/host/cpuinfo.h
new file mode 100644
index 0000000000..fab664a10b
--- /dev/null
+++ b/host/include/loongarch64/host/cpuinfo.h
@@ -0,0 +1,21 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for LoongArch
+ */
+
+#ifndef HOST_CPUINFO_H
+#define HOST_CPUINFO_H
+
+#define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_LSX             (1u << 1)
+
+/* Initialized with a constructor. */
+extern unsigned cpuinfo;
+
+/*
+ * We cannot rely on constructor ordering, so other constructors must
+ * use the function interface rather than the variable above.
+ */
+unsigned cpuinfo_init(void);
+
+#endif /* HOST_CPUINFO_H */
diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h b/host/include/loongarch64/host/load-extract-al16-al8.h
new file mode 100644
index 0000000000..d1fb59d8af
--- /dev/null
+++ b/host/include/loongarch64/host/load-extract-al16-al8.h
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, LoongArch version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
+{
+    uintptr_t pi = (uintptr_t)pv;
+    Int128 *ptr_align = (Int128 *)(pi & ~7);
+    int shr = (pi & 7) * 8;
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vld $vr0, %2, 0\n\t"
+        "vpickve2gr.d %0, $vr0, 0\n\t"
+        "vpickve2gr.d %1, $vr0, 1"
+	: "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+
+    return (l >> shr) | (h << (-shr & 63));
+}
+
+#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */
diff --git a/host/include/loongarch64/host/store-insert-al16.h b/host/include/loongarch64/host/store-insert-al16.h
new file mode 100644
index 0000000000..919fd8d744
--- /dev/null
+++ b/host/include/loongarch64/host/store-insert-al16.h
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic store insert into 128-bit, LoongArch version.
+ */
+
+#ifndef LOONGARCH_STORE_INSERT_AL16_H
+#define LOONGARCH_STORE_INSERT_AL16_H
+
+void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
+    QEMU_ERROR("unsupported atomic");
+
+#endif /* LOONGARCH_STORE_INSERT_AL16_H */
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 49a80550c5..e8711ae16a 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -738,6 +738,10 @@ static AddressSpace *typhoon_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &s->pchip.iommu_as;
 }
 
+static const PCIIOMMUOps typhoon_iommu_ops = {
+    .get_address_space = typhoon_pci_dma_iommu,
+};
+
 static void typhoon_set_irq(void *opaque, int irq, int level)
 {
     TyphoonState *s = opaque;
@@ -897,7 +901,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, qemu_irq *p_isa_irq,
                              "iommu-typhoon", UINT64_MAX);
     address_space_init(&s->pchip.iommu_as, MEMORY_REGION(&s->pchip.iommu),
                        "pchip0-pci");
-    pci_setup_iommu(b, typhoon_pci_dma_iommu, s);
+    pci_setup_iommu(b, &typhoon_iommu_ops, s);
 
     /* Pchip0 PCI special/interrupt acknowledge, 0x801.F800.0000, 64MB.  */
     memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), &alpha_pci_iack_ops,
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index f35ae9aa22..9a8ac45431 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -605,6 +605,10 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn)
     return &sdev->as;
 }
 
+static const PCIIOMMUOps smmu_ops = {
+    .get_address_space = smmu_find_add_as,
+};
+
 IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
 {
     uint8_t bus_n, devfn;
@@ -661,7 +665,7 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
     s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL);
 
     if (s->primary_bus) {
-        pci_setup_iommu(s->primary_bus, smmu_find_add_as, s);
+        pci_setup_iommu(s->primary_bus, &smmu_ops, s);
     } else {
         error_setg(errp, "SMMU is not attached to any PCI bus!");
     }
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index 8ff37f52ca..c08ea34e92 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -177,7 +177,6 @@ struct VexpressMachineState {
     MemoryRegion vram;
     MemoryRegion sram;
     MemoryRegion flashalias;
-    MemoryRegion lowram;
     MemoryRegion a15sram;
     bool secure;
     bool virt;
@@ -276,7 +275,6 @@ static void a9_daughterboard_init(VexpressMachineState *vms,
 {
     MachineState *machine = MACHINE(vms);
     MemoryRegion *sysmem = get_system_memory();
-    ram_addr_t low_ram_size;
 
     if (ram_size > 0x40000000) {
         /* 1GB is the maximum the address space permits */
@@ -284,17 +282,11 @@ static void a9_daughterboard_init(VexpressMachineState *vms,
         exit(1);
     }
 
-    low_ram_size = ram_size;
-    if (low_ram_size > 0x4000000) {
-        low_ram_size = 0x4000000;
-    }
-    /* RAM is from 0x60000000 upwards. The bottom 64MB of the
+    /*
+     * RAM is from 0x60000000 upwards. The bottom 64MB of the
      * address space should in theory be remappable to various
-     * things including ROM or RAM; we always map the RAM there.
+     * things including ROM or RAM; we always map the flash there.
      */
-    memory_region_init_alias(&vms->lowram, NULL, "vexpress.lowmem",
-                             machine->ram, 0, low_ram_size);
-    memory_region_add_subregion(sysmem, 0x0, &vms->lowram);
     memory_region_add_subregion(sysmem, 0x60000000, machine->ram);
 
     /* 0x1e000000 A9MPCore (SCU) private memory region */
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 9ce136cd88..8bc35a483c 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -482,7 +482,7 @@ build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     build_append_int_noprefix(table_data, 3, 1); /* ARM PL011 UART */
     build_append_int_noprefix(table_data, 0, 3); /* Reserved */
     /* Base Address */
-    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 8, 0, 1,
+    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 32, 0, 3,
                      vms->memmap[VIRT_UART].base);
     /* Interrupt Type */
     build_append_int_noprefix(table_data,
@@ -673,7 +673,7 @@ build_dbg2(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     build_append_int_noprefix(table_data, 34, 2);
 
     /* BaseAddressRegister[] */
-    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 8, 0, 1,
+    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 32, 0, 3,
                      vms->memmap[VIRT_UART].base);
 
     /* AddressSize[] */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 92085d2d8f..0a16ab3095 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -631,7 +631,8 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms)
         qemu_fdt_setprop(ms->fdt, "/pmu", "compatible",
                          compat, sizeof(compat));
         qemu_fdt_setprop_cells(ms->fdt, "/pmu", "interrupts",
-                               GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, irqflags);
+                               GIC_FDT_IRQ_TYPE_PPI,
+                               INTID_TO_PPI(VIRTUAL_PMU_IRQ), irqflags);
     }
 }
 
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index a07cd7eb5d..6d64ede94f 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -27,13 +27,119 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "dataplane/xen-block.h"
+#include "hw/xen/interface/io/xs_wire.h"
 #include "trace.h"
 
+#define XVDA_MAJOR 202
+#define XVDQ_MAJOR (1 << 20)
+#define XVDBGQCV_MAJOR ((1 << 21) - 1)
+#define HDA_MAJOR 3
+#define HDC_MAJOR 22
+#define SDA_MAJOR 8
+
+
+static int vdev_to_diskno(unsigned int vdev_nr)
+{
+    switch (vdev_nr >> 8) {
+    case XVDA_MAJOR:
+    case SDA_MAJOR:
+        return (vdev_nr >> 4) & 0x15;
+
+    case HDA_MAJOR:
+        return (vdev_nr >> 6) & 1;
+
+    case HDC_MAJOR:
+        return ((vdev_nr >> 6) & 1) + 2;
+
+    case XVDQ_MAJOR ... XVDBGQCV_MAJOR:
+        return (vdev_nr >> 8) & 0xfffff;
+
+    default:
+        return -1;
+    }
+}
+
+#define MAX_AUTO_VDEV 4096
+
+/*
+ * Find a free device name in the xvda → xvdfan range and set it in
+ * blockdev->props.vdev. Our definition of "free" is that there must
+ * be no other disk or partition with the same disk number.
+ *
+ * You are technically permitted to have all of hda, hda1, sda, sda1,
+ * xvda and xvda1 as *separate* PV block devices with separate backing
+ * stores. That doesn't make it a good idea. This code will skip xvda
+ * if *any* of those "conflicting" devices already exists.
+ *
+ * The limit of xvdfan (disk 4095) is fairly arbitrary just to avoid a
+ * stupidly sized bitmap, but Linux as of v6.6 doesn't support anything
+ * higher than that anyway.
+ */
+static bool xen_block_find_free_vdev(XenBlockDevice *blockdev, Error **errp)
+{
+    XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(blockdev)));
+    unsigned long used_devs[BITS_TO_LONGS(MAX_AUTO_VDEV)];
+    XenBlockVdev *vdev = &blockdev->props.vdev;
+    char fe_path[XENSTORE_ABS_PATH_MAX + 1];
+    char **existing_frontends;
+    unsigned int nr_existing = 0;
+    unsigned int vdev_nr;
+    int i, disk = 0;
+
+    snprintf(fe_path, sizeof(fe_path), "/local/domain/%u/device/vbd",
+             blockdev->xendev.frontend_id);
+
+    existing_frontends = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, fe_path,
+                                               &nr_existing);
+    if (!existing_frontends && errno != ENOENT) {
+        error_setg_errno(errp, errno, "cannot read %s", fe_path);
+        return false;
+    }
+
+    memset(used_devs, 0, sizeof(used_devs));
+    for (i = 0; i < nr_existing; i++) {
+        if (qemu_strtoui(existing_frontends[i], NULL, 10, &vdev_nr)) {
+            free(existing_frontends[i]);
+            continue;
+        }
+
+        free(existing_frontends[i]);
+
+        disk = vdev_to_diskno(vdev_nr);
+        if (disk < 0 || disk >= MAX_AUTO_VDEV) {
+            continue;
+        }
+
+        set_bit(disk, used_devs);
+    }
+    free(existing_frontends);
+
+    disk = find_first_zero_bit(used_devs, MAX_AUTO_VDEV);
+    if (disk == MAX_AUTO_VDEV) {
+        error_setg(errp, "cannot find device vdev for block device");
+        return false;
+    }
+
+    vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
+    vdev->partition = 0;
+    vdev->disk = disk;
+    if (disk < (1 << 4)) {
+        vdev->number = (XVDA_MAJOR << 8) | (disk << 4);
+    } else {
+        vdev->number = (XVDQ_MAJOR << 8) | (disk << 8);
+    }
+    return true;
+}
+
 static char *xen_block_get_name(XenDevice *xendev, Error **errp)
 {
     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
     XenBlockVdev *vdev = &blockdev->props.vdev;
 
+    if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID &&
+        !xen_block_find_free_vdev(blockdev, errp)) {
+        return NULL;
+    }
     return g_strdup_printf("%lu", vdev->number);
 }
 
@@ -115,9 +221,13 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
         return;
     }
 
-    if (xen_device_frontend_scanf(xendev, "protocol", "%ms",
-                                  &str) != 1) {
-        protocol = BLKIF_PROTOCOL_NATIVE;
+    if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) {
+        /* x86 defaults to the 32-bit protocol even for 64-bit guests. */
+        if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) {
+            protocol = BLKIF_PROTOCOL_X86_32;
+        } else {
+            protocol = BLKIF_PROTOCOL_NATIVE;
+        }
     } else {
         if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
             protocol = BLKIF_PROTOCOL_X86_32;
@@ -478,10 +588,10 @@ static void xen_block_set_vdev(Object *obj, Visitor *v, const char *name,
     case XEN_BLOCK_VDEV_TYPE_DP:
     case XEN_BLOCK_VDEV_TYPE_XVD:
         if (vdev->disk < (1 << 4) && vdev->partition < (1 << 4)) {
-            vdev->number = (202 << 8) | (vdev->disk << 4) |
+            vdev->number = (XVDA_MAJOR << 8) | (vdev->disk << 4) |
                 vdev->partition;
         } else if (vdev->disk < (1 << 20) && vdev->partition < (1 << 8)) {
-            vdev->number = (1 << 28) | (vdev->disk << 8) |
+            vdev->number = (XVDQ_MAJOR << 8) | (vdev->disk << 8) |
                 vdev->partition;
         } else {
             goto invalid;
@@ -491,10 +601,11 @@ static void xen_block_set_vdev(Object *obj, Visitor *v, const char *name,
     case XEN_BLOCK_VDEV_TYPE_HD:
         if ((vdev->disk == 0 || vdev->disk == 1) &&
             vdev->partition < (1 << 6)) {
-            vdev->number = (3 << 8) | (vdev->disk << 6) | vdev->partition;
+            vdev->number = (HDA_MAJOR << 8) | (vdev->disk << 6) |
+                vdev->partition;
         } else if ((vdev->disk == 2 || vdev->disk == 3) &&
                    vdev->partition < (1 << 6)) {
-            vdev->number = (22 << 8) | ((vdev->disk - 2) << 6) |
+            vdev->number = (HDC_MAJOR << 8) | ((vdev->disk - 2) << 6) |
                 vdev->partition;
         } else {
             goto invalid;
@@ -503,7 +614,8 @@ static void xen_block_set_vdev(Object *obj, Visitor *v, const char *name,
 
     case XEN_BLOCK_VDEV_TYPE_SD:
         if (vdev->disk < (1 << 4) && vdev->partition < (1 << 4)) {
-            vdev->number = (8 << 8) | (vdev->disk << 4) | vdev->partition;
+            vdev->number = (SDA_MAJOR << 8) | (vdev->disk << 4) |
+                vdev->partition;
         } else {
             goto invalid;
         }
diff --git a/hw/char/trace-events b/hw/char/trace-events
index babf4d35ea..7a398c82a5 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -105,3 +105,11 @@ cadence_uart_baudrate(unsigned baudrate) "baudrate %u"
 # sh_serial.c
 sh_serial_read(char *id, unsigned size, uint64_t offs, uint64_t val) " %s size %d offs 0x%02" PRIx64 " -> 0x%02" PRIx64
 sh_serial_write(char *id, unsigned size, uint64_t offs, uint64_t val) "%s size %d offs 0x%02" PRIx64 " <- 0x%02" PRIx64
+
+# xen_console.c
+xen_console_connect(unsigned int idx, unsigned int ring_ref, unsigned int port, unsigned int limit) "idx %u ring_ref %u port %u limit %u"
+xen_console_disconnect(unsigned int idx) "idx %u"
+xen_console_unrealize(unsigned int idx) "idx %u"
+xen_console_realize(unsigned int idx, const char *chrdev) "idx %u chrdev %s"
+xen_console_device_create(unsigned int idx) "idx %u"
+xen_console_device_destroy(unsigned int idx) "idx %u"
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 810dae3f44..5cbee2f184 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -20,15 +20,22 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include <sys/select.h>
 #include <termios.h>
 
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "chardev/char-fe.h"
-#include "hw/xen/xen-legacy-backend.h"
-
+#include "hw/xen/xen-backend.h"
+#include "hw/xen/xen-bus-helper.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/xen/interface/io/console.h"
+#include "hw/xen/interface/io/xs_wire.h"
+#include "hw/xen/interface/grant_table.h"
+#include "hw/i386/kvm/xen_primary_console.h"
+#include "trace.h"
 
 struct buffer {
     uint8_t *data;
@@ -39,16 +46,22 @@ struct buffer {
 };
 
 struct XenConsole {
-    struct XenLegacyDevice  xendev;  /* must be first */
+    struct XenDevice  xendev;  /* must be first */
+    XenEventChannel   *event_channel;
+    int               dev;
     struct buffer     buffer;
-    char              console[XEN_BUFSIZE];
-    int               ring_ref;
+    char              *fe_path;
+    unsigned int      ring_ref;
     void              *sring;
     CharBackend       chr;
     int               backlog;
 };
+typedef struct XenConsole XenConsole;
+
+#define TYPE_XEN_CONSOLE_DEVICE "xen-console"
+OBJECT_DECLARE_SIMPLE_TYPE(XenConsole, XEN_CONSOLE_DEVICE)
 
-static void buffer_append(struct XenConsole *con)
+static bool buffer_append(XenConsole *con)
 {
     struct buffer *buffer = &con->buffer;
     XENCONS_RING_IDX cons, prod, size;
@@ -60,7 +73,7 @@ static void buffer_append(struct XenConsole *con)
 
     size = prod - cons;
     if ((size == 0) || (size > sizeof(intf->out)))
-        return;
+        return false;
 
     if ((buffer->capacity - buffer->size) < size) {
         buffer->capacity += (size + 1024);
@@ -73,7 +86,7 @@ static void buffer_append(struct XenConsole *con)
 
     xen_mb();
     intf->out_cons = cons;
-    xen_pv_send_notify(&con->xendev);
+    xen_device_notify_event_channel(XEN_DEVICE(con), con->event_channel, NULL);
 
     if (buffer->max_capacity &&
         buffer->size > buffer->max_capacity) {
@@ -89,6 +102,7 @@ static void buffer_append(struct XenConsole *con)
         if (buffer->consumed > buffer->max_capacity - over)
             buffer->consumed = buffer->max_capacity - over;
     }
+    return true;
 }
 
 static void buffer_advance(struct buffer *buffer, size_t len)
@@ -100,7 +114,7 @@ static void buffer_advance(struct buffer *buffer, size_t len)
     }
 }
 
-static int ring_free_bytes(struct XenConsole *con)
+static int ring_free_bytes(XenConsole *con)
 {
     struct xencons_interface *intf = con->sring;
     XENCONS_RING_IDX cons, prod, space;
@@ -118,13 +132,13 @@ static int ring_free_bytes(struct XenConsole *con)
 
 static int xencons_can_receive(void *opaque)
 {
-    struct XenConsole *con = opaque;
+    XenConsole *con = opaque;
     return ring_free_bytes(con);
 }
 
 static void xencons_receive(void *opaque, const uint8_t *buf, int len)
 {
-    struct XenConsole *con = opaque;
+    XenConsole *con = opaque;
     struct xencons_interface *intf = con->sring;
     XENCONS_RING_IDX prod;
     int i, max;
@@ -141,10 +155,10 @@ static void xencons_receive(void *opaque, const uint8_t *buf, int len)
     }
     xen_wmb();
     intf->in_prod = prod;
-    xen_pv_send_notify(&con->xendev);
+    xen_device_notify_event_channel(XEN_DEVICE(con), con->event_channel, NULL);
 }
 
-static void xencons_send(struct XenConsole *con)
+static bool xencons_send(XenConsole *con)
 {
     ssize_t len, size;
 
@@ -159,174 +173,472 @@ static void xencons_send(struct XenConsole *con)
     if (len < 1) {
         if (!con->backlog) {
             con->backlog = 1;
-            xen_pv_printf(&con->xendev, 1,
-                          "backlog piling up, nobody listening?\n");
         }
     } else {
         buffer_advance(&con->buffer, len);
         if (con->backlog && len == size) {
             con->backlog = 0;
-            xen_pv_printf(&con->xendev, 1, "backlog is gone\n");
         }
     }
+    return len > 0;
 }
 
 /* -------------------------------------------------------------------- */
 
-static int store_con_info(struct XenConsole *con)
+static bool con_event(void *_xendev)
 {
-    Chardev *cs = qemu_chr_fe_get_driver(&con->chr);
-    char *pts = NULL;
-    char *dom_path;
-    g_autoptr(GString) path = NULL;
+    XenConsole *con = XEN_CONSOLE_DEVICE(_xendev);
+    bool done_something;
 
-    /* Only continue if we're talking to a pty. */
-    if (!CHARDEV_IS_PTY(cs)) {
-        return 0;
+    if (xen_device_backend_get_state(&con->xendev) != XenbusStateConnected) {
+        return false;
     }
-    pts = cs->filename + 4;
 
-    dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
-    if (!dom_path) {
-        return 0;
+    done_something = buffer_append(con);
+
+    if (con->buffer.size - con->buffer.consumed) {
+        done_something |= xencons_send(con);
     }
+    return done_something;
+}
 
-    path = g_string_new(dom_path);
-    free(dom_path);
+/* -------------------------------------------------------------------- */
 
-    if (con->xendev.dev) {
-        g_string_append_printf(path, "/device/console/%d", con->xendev.dev);
-    } else {
-        g_string_append(path, "/console");
+static bool xen_console_connect(XenDevice *xendev, Error **errp)
+{
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
+    unsigned int port, limit;
+
+    if (xen_device_frontend_scanf(xendev, "ring-ref", "%u",
+                                  &con->ring_ref) != 1) {
+        error_setg(errp, "failed to read ring-ref");
+        return false;
+    }
+
+    if (xen_device_frontend_scanf(xendev, "port", "%u", &port) != 1) {
+        error_setg(errp, "failed to read remote port");
+        return false;
+    }
+
+    if (xen_device_frontend_scanf(xendev, "limit", "%u", &limit) == 1) {
+        con->buffer.max_capacity = limit;
+    }
+
+    con->event_channel = xen_device_bind_event_channel(xendev, port,
+                                                       con_event,
+                                                       con,
+                                                       errp);
+    if (!con->event_channel) {
+        return false;
     }
-    g_string_append(path, "/tty");
 
-    if (xenstore_write_str(con->console, path->str, pts)) {
-        fprintf(stderr, "xenstore_write_str for '%s' fail", path->str);
-        return -1;
+    switch (con->dev) {
+    case 0:
+        /*
+         * The primary console is special. For real Xen the ring-ref is
+         * actually a GFN which needs to be mapped as foreignmem.
+         */
+        if (xen_mode != XEN_EMULATE) {
+            xen_pfn_t mfn = (xen_pfn_t)con->ring_ref;
+            con->sring = qemu_xen_foreignmem_map(xendev->frontend_id, NULL,
+                                                 PROT_READ | PROT_WRITE,
+                                                 1, &mfn, NULL);
+            if (!con->sring) {
+                error_setg(errp, "failed to map console page");
+                return false;
+            }
+            break;
+        }
+
+        /*
+         * For Xen emulation, we still follow the convention of ring-ref
+         * holding the GFN, but we map the fixed GNTTAB_RESERVED_CONSOLE
+         * grant ref because there is no implementation of foreignmem
+         * operations for emulated mode. The emulation code which handles
+         * the guest-side page and event channel also needs to be informed
+         * of the backend event channel port, in order to reconnect to it
+         * after a soft reset.
+         */
+        xen_primary_console_set_be_port(
+            xen_event_channel_get_local_port(con->event_channel));
+        con->ring_ref = GNTTAB_RESERVED_CONSOLE;
+        /* fallthrough */
+    default:
+        con->sring = xen_device_map_grant_refs(xendev,
+                                               &con->ring_ref, 1,
+                                               PROT_READ | PROT_WRITE,
+                                               errp);
+        if (!con->sring) {
+            error_prepend(errp, "failed to map console grant ref: ");
+            return false;
+        }
+        break;
     }
-    return 0;
+
+    trace_xen_console_connect(con->dev, con->ring_ref, port,
+                              con->buffer.max_capacity);
+
+    qemu_chr_fe_set_handlers(&con->chr, xencons_can_receive,
+                             xencons_receive, NULL, NULL, con, NULL,
+                             true);
+    return true;
 }
 
-static int con_init(struct XenLegacyDevice *xendev)
+static void xen_console_disconnect(XenDevice *xendev, Error **errp)
 {
-    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
-    char *type, *dom, label[32];
-    int ret = 0;
-    const char *output;
-
-    /* setup */
-    dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom);
-    if (!xendev->dev) {
-        snprintf(con->console, sizeof(con->console), "%s/console", dom);
-    } else {
-        snprintf(con->console, sizeof(con->console), "%s/device/console/%d", dom, xendev->dev);
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
+
+    trace_xen_console_disconnect(con->dev);
+
+    qemu_chr_fe_set_handlers(&con->chr, NULL, NULL, NULL, NULL,
+                             con, NULL, true);
+
+    if (con->event_channel) {
+        xen_device_unbind_event_channel(xendev, con->event_channel,
+                                        errp);
+        con->event_channel = NULL;
+
+        if (xen_mode == XEN_EMULATE && !con->dev) {
+            xen_primary_console_set_be_port(0);
+        }
     }
-    free(dom);
 
-    type = xenstore_read_str(con->console, "type");
-    if (!type || strcmp(type, "ioemu") != 0) {
-        xen_pv_printf(xendev, 1, "not for me (type=%s)\n", type);
-        ret = -1;
-        goto out;
+    if (con->sring) {
+        if (!con->dev && xen_mode != XEN_EMULATE) {
+            qemu_xen_foreignmem_unmap(con->sring, 1);
+        } else {
+            xen_device_unmap_grant_refs(xendev, con->sring,
+                                        &con->ring_ref, 1, errp);
+        }
+        con->sring = NULL;
     }
+}
+
+static void xen_console_frontend_changed(XenDevice *xendev,
+                                         enum xenbus_state frontend_state,
+                                         Error **errp)
+{
+    ERRP_GUARD();
+    enum xenbus_state backend_state = xen_device_backend_get_state(xendev);
+
+    switch (frontend_state) {
+    case XenbusStateInitialised:
+    case XenbusStateConnected:
+        if (backend_state == XenbusStateConnected) {
+            break;
+        }
 
-    output = xenstore_read_str(con->console, "output");
+        xen_console_disconnect(xendev, errp);
+        if (*errp) {
+            break;
+        }
 
-    /* no Xen override, use qemu output device */
-    if (output == NULL) {
-        if (con->xendev.dev) {
-            qemu_chr_fe_init(&con->chr, serial_hd(con->xendev.dev),
-                             &error_abort);
+        if (!xen_console_connect(xendev, errp)) {
+            xen_device_backend_set_state(xendev, XenbusStateClosing);
+            break;
         }
-    } else {
-        snprintf(label, sizeof(label), "xencons%d", con->xendev.dev);
-        qemu_chr_fe_init(&con->chr,
-                         /*
-                          * FIXME: sure we want to support implicit
-                          * muxed monitors here?
-                          */
-                         qemu_chr_new_mux_mon(label, output, NULL),
-                         &error_abort);
+
+        xen_device_backend_set_state(xendev, XenbusStateConnected);
+        break;
+
+    case XenbusStateClosing:
+        xen_device_backend_set_state(xendev, XenbusStateClosing);
+        break;
+
+    case XenbusStateClosed:
+    case XenbusStateUnknown:
+        xen_console_disconnect(xendev, errp);
+        if (*errp) {
+            break;
+        }
+
+        xen_device_backend_set_state(xendev, XenbusStateClosed);
+        break;
+
+    default:
+        break;
     }
+}
 
-    store_con_info(con);
+static char *xen_console_get_name(XenDevice *xendev, Error **errp)
+{
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
+
+    if (con->dev == -1) {
+        XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
+        char fe_path[XENSTORE_ABS_PATH_MAX + 1];
+        int idx = (xen_mode == XEN_EMULATE) ? 0 : 1;
+        char *value;
+
+        /* Theoretically we could go up to INT_MAX here but that's overkill */
+        while (idx < 100) {
+            if (!idx) {
+                snprintf(fe_path, sizeof(fe_path),
+                         "/local/domain/%u/console", xendev->frontend_id);
+            } else {
+                snprintf(fe_path, sizeof(fe_path),
+                         "/local/domain/%u/device/console/%u",
+                         xendev->frontend_id, idx);
+            }
+            value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL);
+            if (!value) {
+                if (errno == ENOENT) {
+                    con->dev = idx;
+                    goto found;
+                }
+                error_setg(errp, "cannot read %s: %s", fe_path,
+                           strerror(errno));
+                return NULL;
+            }
+            free(value);
+            idx++;
+        }
+        error_setg(errp, "cannot find device index for console device");
+        return NULL;
+    }
+ found:
+    return g_strdup_printf("%u", con->dev);
+}
 
-out:
-    g_free(type);
-    return ret;
+static void xen_console_unrealize(XenDevice *xendev)
+{
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
+
+    trace_xen_console_unrealize(con->dev);
+
+    /* Disconnect from the frontend in case this has not already happened */
+    xen_console_disconnect(xendev, NULL);
+
+    qemu_chr_fe_deinit(&con->chr, false);
 }
 
-static int con_initialise(struct XenLegacyDevice *xendev)
+static void xen_console_realize(XenDevice *xendev, Error **errp)
 {
-    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
-    int limit;
-
-    if (xenstore_read_int(con->console, "ring-ref", &con->ring_ref) == -1)
-        return -1;
-    if (xenstore_read_int(con->console, "port", &con->xendev.remote_port) == -1)
-        return -1;
-    if (xenstore_read_int(con->console, "limit", &limit) == 0)
-        con->buffer.max_capacity = limit;
+    ERRP_GUARD();
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
+    Chardev *cs = qemu_chr_fe_get_driver(&con->chr);
+    unsigned int u;
 
-    if (!xendev->dev) {
-        xen_pfn_t mfn = con->ring_ref;
-        con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL,
-                                             PROT_READ | PROT_WRITE,
-                                             1, &mfn, NULL);
+    if (!cs) {
+        error_setg(errp, "no backing character device");
+        return;
+    }
+
+    if (con->dev == -1) {
+        error_setg(errp, "no device index provided");
+        return;
+    }
+
+    /*
+     * The Xen primary console is special. The ring-ref is actually a GFN to
+     * be mapped directly as foreignmem (not a grant ref), and the guest port
+     * was allocated *for* the guest by the toolstack. The guest gets these
+     * through HVMOP_get_param and can use the console long before it's got
+     * XenStore up and running. We cannot create those for a true Xen guest,
+     * but we can for Xen emulation.
+     */
+    if (!con->dev) {
+        if (xen_mode == XEN_EMULATE) {
+            xen_primary_console_create();
+        } else if (xen_device_frontend_scanf(xendev, "ring-ref", "%u", &u)
+                   != 1 ||
+                   xen_device_frontend_scanf(xendev, "port", "%u", &u) != 1) {
+            error_setg(errp, "cannot create primary Xen console");
+            return;
+        }
+    }
+
+    trace_xen_console_realize(con->dev, object_get_typename(OBJECT(cs)));
+
+    if (CHARDEV_IS_PTY(cs)) {
+        /* Strip the leading 'pty:' */
+        xen_device_frontend_printf(xendev, "tty", "%s", cs->filename + 4);
+    }
+
+    /* No normal PV driver initialization for the primary console under Xen */
+    if (!con->dev && xen_mode != XEN_EMULATE) {
+        xen_console_connect(xendev, errp);
+    }
+}
+
+static char *console_frontend_path(struct qemu_xs_handle *xenstore,
+                                   unsigned int dom_id, unsigned int dev)
+{
+    if (!dev) {
+        return g_strdup_printf("/local/domain/%u/console", dom_id);
     } else {
-        con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
-                                          PROT_READ | PROT_WRITE);
+        return g_strdup_printf("/local/domain/%u/device/console/%u", dom_id,
+                               dev);
     }
-    if (!con->sring)
-        return -1;
+}
 
-    xen_be_bind_evtchn(&con->xendev);
-    qemu_chr_fe_set_handlers(&con->chr, xencons_can_receive,
-                             xencons_receive, NULL, NULL, con, NULL, true);
-
-    xen_pv_printf(xendev, 1,
-                  "ring mfn %d, remote port %d, local port %d, limit %zd\n",
-                  con->ring_ref,
-                  con->xendev.remote_port,
-                  con->xendev.local_port,
-                  con->buffer.max_capacity);
-    return 0;
+static char *xen_console_get_frontend_path(XenDevice *xendev, Error **errp)
+{
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
+    XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
+    char *ret = console_frontend_path(xenbus->xsh, xendev->frontend_id,
+                                      con->dev);
+
+    if (!ret) {
+        error_setg(errp, "failed to create frontend path");
+    }
+    return ret;
 }
 
-static void con_disconnect(struct XenLegacyDevice *xendev)
+
+static Property xen_console_properties[] = {
+    DEFINE_PROP_CHR("chardev", XenConsole, chr),
+    DEFINE_PROP_INT32("idx", XenConsole, dev, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xen_console_class_init(ObjectClass *class, void *data)
 {
-    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
+    DeviceClass *dev_class = DEVICE_CLASS(class);
+    XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);
+
+    xendev_class->backend = "console";
+    xendev_class->device = "console";
+    xendev_class->get_name = xen_console_get_name;
+    xendev_class->realize = xen_console_realize;
+    xendev_class->frontend_changed = xen_console_frontend_changed;
+    xendev_class->unrealize = xen_console_unrealize;
+    xendev_class->get_frontend_path = xen_console_get_frontend_path;
+
+    device_class_set_props(dev_class, xen_console_properties);
+}
 
-    qemu_chr_fe_deinit(&con->chr, false);
-    xen_pv_unbind_evtchn(&con->xendev);
+static const TypeInfo xen_console_type_info = {
+    .name = TYPE_XEN_CONSOLE_DEVICE,
+    .parent = TYPE_XEN_DEVICE,
+    .instance_size = sizeof(XenConsole),
+    .class_init = xen_console_class_init,
+};
 
-    if (con->sring) {
-        if (!xendev->dev) {
-            qemu_xen_foreignmem_unmap(con->sring, 1);
-        } else {
-            xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
+static void xen_console_register_types(void)
+{
+    type_register_static(&xen_console_type_info);
+}
+
+type_init(xen_console_register_types)
+
+/* Called to instantiate a XenConsole when the backend is detected. */
+static void xen_console_device_create(XenBackendInstance *backend,
+                                      QDict *opts, Error **errp)
+{
+    ERRP_GUARD();
+    XenBus *xenbus = xen_backend_get_bus(backend);
+    const char *name = xen_backend_get_name(backend);
+    unsigned long number;
+    char *fe = NULL, *type = NULL, *output = NULL;
+    char label[32];
+    XenDevice *xendev = NULL;
+    XenConsole *con;
+    Chardev *cd = NULL;
+    struct qemu_xs_handle *xsh = xenbus->xsh;
+
+    if (qemu_strtoul(name, NULL, 10, &number) || number > INT_MAX) {
+        error_setg(errp, "failed to parse name '%s'", name);
+        goto fail;
+    }
+
+    trace_xen_console_device_create(number);
+
+    fe = console_frontend_path(xsh, xen_domid, number);
+    if (fe == NULL) {
+        error_setg(errp, "failed to generate frontend path");
+        goto fail;
+    }
+
+    if (xs_node_scanf(xsh, XBT_NULL, fe, "type", errp, "%ms", &type) != 1) {
+        error_prepend(errp, "failed to read console device type: ");
+        goto fail;
+    }
+
+    if (strcmp(type, "ioemu")) {
+        error_setg(errp, "declining to handle console type '%s'",
+                   type);
+        goto fail;
+    }
+
+    xendev = XEN_DEVICE(qdev_new(TYPE_XEN_CONSOLE_DEVICE));
+    con = XEN_CONSOLE_DEVICE(xendev);
+
+    con->dev = number;
+
+    snprintf(label, sizeof(label), "xencons%ld", number);
+
+    if (xs_node_scanf(xsh, XBT_NULL, fe, "output", NULL, "%ms", &output) == 1) {
+        /*
+         * FIXME: sure we want to support implicit
+         * muxed monitors here?
+         */
+        cd = qemu_chr_new_mux_mon(label, output, NULL);
+        if (!cd) {
+            error_setg(errp, "console: No valid chardev found at '%s': ",
+                       output);
+            goto fail;
         }
-        con->sring = NULL;
+    } else if (number) {
+        cd = serial_hd(number);
+        if (!cd) {
+            error_prepend(errp, "console: No serial device #%ld found: ",
+                          number);
+            goto fail;
+        }
+    } else {
+        /* No 'output' node on primary console: use null. */
+        cd = qemu_chr_new(label, "null", NULL);
+        if (!cd) {
+            error_setg(errp, "console: failed to create null device");
+            goto fail;
+        }
+    }
+
+    if (!qemu_chr_fe_init(&con->chr, cd, errp)) {
+        error_prepend(errp, "console: failed to initialize backing chardev: ");
+        goto fail;
+    }
+
+    if (qdev_realize_and_unref(DEVICE(xendev), BUS(xenbus), errp)) {
+        xen_backend_set_device(backend, xendev);
+        goto done;
+    }
+
+    error_prepend(errp, "realization of console device %lu failed: ",
+                  number);
+
+ fail:
+    if (xendev) {
+        object_unparent(OBJECT(xendev));
     }
+ done:
+    g_free(fe);
+    free(type);
+    free(output);
 }
 
-static void con_event(struct XenLegacyDevice *xendev)
+static void xen_console_device_destroy(XenBackendInstance *backend,
+                                       Error **errp)
 {
-    struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
+    ERRP_GUARD();
+    XenDevice *xendev = xen_backend_get_device(backend);
+    XenConsole *con = XEN_CONSOLE_DEVICE(xendev);
 
-    buffer_append(con);
-    if (con->buffer.size - con->buffer.consumed)
-        xencons_send(con);
-}
+    trace_xen_console_device_destroy(con->dev);
 
-/* -------------------------------------------------------------------- */
+    object_unparent(OBJECT(xendev));
+}
 
-struct XenDevOps xen_console_ops = {
-    .size       = sizeof(struct XenConsole),
-    .flags      = DEVOPS_FLAG_IGNORE_STATE|DEVOPS_FLAG_NEED_GNTDEV,
-    .init       = con_init,
-    .initialise = con_initialise,
-    .event      = con_event,
-    .disconnect = con_disconnect,
+static const XenBackendInfo xen_console_backend_info  = {
+    .type = "console",
+    .create = xen_console_device_create,
+    .destroy = xen_console_device_destroy,
 };
+
+static void xen_console_register_backend(void)
+{
+    xen_backend_register(&xen_console_backend_info);
+}
+
+xen_backend_init(xen_console_register_backend);
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 4dd5a71fb7..b7bb44b7f7 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -558,7 +558,7 @@ static void zfree(void *x, void *addr)
 
 ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen)
 {
-    z_stream s;
+    z_stream s = {};
     ssize_t dstbytes;
     int r, i, flags;
 
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 9a4b59c6f2..a6ff6a4875 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -253,6 +253,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
     MemoryDeviceInfo *value;
     PCDIMMDeviceInfo *di;
     SgxEPCDeviceInfo *se;
+    HvBalloonDeviceInfo *hi;
 
     for (info = info_list; info; info = info->next) {
         value = info->value;
@@ -310,6 +311,20 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
                 monitor_printf(mon, "  node: %" PRId64 "\n", se->node);
                 monitor_printf(mon, "  memdev: %s\n", se->memdev);
                 break;
+            case MEMORY_DEVICE_INFO_KIND_HV_BALLOON:
+                hi = value->u.hv_balloon.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               hi->id ? hi->id : "");
+                if (hi->has_memaddr) {
+                    monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n",
+                                   hi->memaddr);
+                }
+                monitor_printf(mon, "  max-size: %" PRIu64 "\n", hi->max_size);
+                if (hi->memdev) {
+                    monitor_printf(mon, "  memdev: %s\n", hi->memdev);
+                }
+                break;
             default:
                 g_assert_not_reached();
             }
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 2f1dbb3fd7..b46d16cd2c 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -705,7 +705,7 @@ static void get_reserved_region(Object *obj, Visitor *v, const char *name,
     int rc;
 
     rc = snprintf(buffer, sizeof(buffer), "0x%"PRIx64":0x%"PRIx64":%u",
-                  rr->low, rr->high, rr->type);
+                  range_lob(&rr->range), range_upb(&rr->range), rr->type);
     assert(rc < sizeof(buffer));
 
     visit_type_str(v, name, &p, errp);
@@ -717,6 +717,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name,
     Property *prop = opaque;
     ReservedRegion *rr = object_field_prop_ptr(obj, prop);
     const char *endptr;
+    uint64_t lob, upb;
     char *str;
     int ret;
 
@@ -724,7 +725,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    ret = qemu_strtou64(str, &endptr, 16, &rr->low);
+    ret = qemu_strtou64(str, &endptr, 16, &lob);
     if (ret) {
         error_setg(errp, "start address of '%s'"
                    " must be a hexadecimal integer", name);
@@ -734,7 +735,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name,
         goto separator_error;
     }
 
-    ret = qemu_strtou64(endptr + 1, &endptr, 16, &rr->high);
+    ret = qemu_strtou64(endptr + 1, &endptr, 16, &upb);
     if (ret) {
         error_setg(errp, "end address of '%s'"
                    " must be a hexadecimal integer", name);
@@ -744,6 +745,8 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name,
         goto separator_error;
     }
 
+    range_set_bounds(&rr->range, lob, upb);
+
     ret = qemu_strtoui(endptr + 1, &endptr, 10, &rr->type);
     if (ret) {
         error_setg(errp, "type of '%s'"
@@ -1111,7 +1114,7 @@ static void get_uuid(Object *obj, Visitor *v, const char *name, void *opaque,
 {
     Property *prop = opaque;
     QemuUUID *uuid = object_field_prop_ptr(obj, prop);
-    char buffer[UUID_FMT_LEN + 1];
+    char buffer[UUID_STR_LEN];
     char *p = buffer;
 
     qemu_uuid_unparse(uuid, buffer);
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 6e38e00502..9a87a5504a 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -319,11 +319,13 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
     case DAC_CNTL:
         val = s->regs.dac_cntl;
         break;
-    case GPIO_VGA_DDC:
-        val = s->regs.gpio_vga_ddc;
+    case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3:
+        val = ati_reg_read_offs(s->regs.gpio_vga_ddc,
+                                addr - GPIO_VGA_DDC, size);
         break;
-    case GPIO_DVI_DDC:
-        val = s->regs.gpio_dvi_ddc;
+    case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3:
+        val = ati_reg_read_offs(s->regs.gpio_dvi_ddc,
+                                addr - GPIO_DVI_DDC, size);
         break;
     case GPIO_MONID ... GPIO_MONID + 3:
         val = ati_reg_read_offs(s->regs.gpio_monid,
@@ -337,6 +339,9 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
     case PALETTE_DATA:
         val = vga_ioport_read(&s->vga, VGA_PEL_D);
         break;
+    case PALETTE_30_DATA:
+        val = s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IR)];
+        break;
     case CNFG_CNTL:
         val = s->regs.config_cntl;
         break;
@@ -349,14 +354,17 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size)
                                       PCI_BASE_ADDRESS_0, size) & 0xfffffff0;
         break;
     case CONFIG_APER_SIZE:
-        val = s->vga.vram_size;
+        val = s->vga.vram_size / 2;
         break;
     case CONFIG_REG_1_BASE:
         val = pci_default_read_config(&s->dev,
                                       PCI_BASE_ADDRESS_2, size) & 0xfffffff0;
         break;
     case CONFIG_REG_APER_SIZE:
-        val = memory_region_size(&s->mm);
+        val = memory_region_size(&s->mm) / 2;
+        break;
+    case HOST_PATH_CNTL:
+        val = BIT(23); /* Radeon HDP_APER_CNTL */
         break;
     case MC_STATUS:
         val = 5;
@@ -612,29 +620,34 @@ static void ati_mm_write(void *opaque, hwaddr addr,
         s->regs.dac_cntl = data & 0xffffe3ff;
         s->vga.dac_8bit = !!(data & DAC_8BIT_EN);
         break;
-    case GPIO_VGA_DDC:
+    /*
+     * GPIO regs for DDC access. Because some drivers access these via
+     * multiple byte writes we have to be careful when we send bits to
+     * avoid spurious changes in bitbang_i2c state. Only do it when either
+     * the enable bits are changed or output bits changed while enabled.
+     */
+    case GPIO_VGA_DDC ... GPIO_VGA_DDC + 3:
         if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) {
             /* FIXME: Maybe add a property to select VGA or DVI port? */
         }
         break;
-    case GPIO_DVI_DDC:
+    case GPIO_DVI_DDC ... GPIO_DVI_DDC + 3:
         if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) {
-            s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c, data, 0);
+            ati_reg_write_offs(&s->regs.gpio_dvi_ddc,
+                               addr - GPIO_DVI_DDC, data, size);
+            if ((addr <= GPIO_DVI_DDC + 2 && addr + size > GPIO_DVI_DDC + 2) ||
+                (addr == GPIO_DVI_DDC && (s->regs.gpio_dvi_ddc & 0x30000))) {
+                s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c,
+                                               s->regs.gpio_dvi_ddc, 0);
+            }
         }
         break;
     case GPIO_MONID ... GPIO_MONID + 3:
         /* FIXME What does Radeon have here? */
         if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) {
+            /* Rage128p accesses DDC via MONID(1-2) with additional mask bit */
             ati_reg_write_offs(&s->regs.gpio_monid,
                                addr - GPIO_MONID, data, size);
-            /*
-             * Rage128p accesses DDC used to get EDID via these bits.
-             * Because some drivers access this via multiple byte writes
-             * we have to be careful when we send bits to avoid spurious
-             * changes in bitbang_i2c state. So only do it when mask is set
-             * and either the enable bits are changed or output bits changed
-             * while enabled.
-             */
             if ((s->regs.gpio_monid & BIT(25)) &&
                 ((addr <= GPIO_MONID + 2 && addr + size > GPIO_MONID + 2) ||
                  (addr == GPIO_MONID && (s->regs.gpio_monid & 0x60000)))) {
@@ -663,6 +676,12 @@ static void ati_mm_write(void *opaque, hwaddr addr,
         data >>= 8;
         vga_ioport_write(&s->vga, VGA_PEL_D, data & 0xff);
         break;
+    case PALETTE_30_DATA:
+        s->regs.palette[vga_ioport_read(&s->vga, VGA_PEL_IW)] = data;
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 22) & 0xff);
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 12) & 0xff);
+        vga_ioport_write(&s->vga, VGA_PEL_D, (data >> 2) & 0xff);
+        break;
     case CNFG_CNTL:
         s->regs.config_cntl = data;
         break;
@@ -1014,6 +1033,7 @@ static Property ati_vga_properties[] = {
     DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
                        PCI_DEVICE_ID_ATI_RAGE128_PF),
     DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
+    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1035,11 +1055,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
     k->exit = ati_vga_exit;
 }
 
+static void ati_vga_init(Object *o)
+{
+    object_property_set_description(o, "x-pixman", "Use pixman for: "
+                                    "1: fill, 2: blit");
+}
+
 static const TypeInfo ati_vga_info = {
     .name = TYPE_ATI_VGA,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(ATIVGAState),
     .class_init = ati_vga_class_init,
+    .instance_init = ati_vga_init,
     .interfaces = (InterfaceInfo[]) {
           { INTERFACE_CONVENTIONAL_PCI_DEVICE },
           { },
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 7d786653e8..0e6b8e4367 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
     switch (s->regs.dp_mix & GMC_ROP3_MASK) {
     case ROP3_SRCCOPY:
     {
+        bool fallback = false;
         unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
                        s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
         unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
@@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
                 src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
                 src_x, src_y, dst_x, dst_y,
                 s->regs.dst_width, s->regs.dst_height);
-        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
+        if ((s->use_pixman & BIT(1)) &&
+            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
             s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
-                       src_stride, dst_stride, bpp, bpp,
-                       src_x, src_y, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
-        } else {
+            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
+                                   src_stride, dst_stride, bpp, bpp,
+                                   src_x, src_y, dst_x, dst_y,
+                                   s->regs.dst_width, s->regs.dst_height);
+        } else if (s->use_pixman & BIT(1)) {
             /* FIXME: We only really need a temporary if src and dst overlap */
             int llb = s->regs.dst_width * (bpp / 8);
             int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
             uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
                                      s->regs.dst_height);
-            pixman_blt((uint32_t *)src_bits, tmp,
-                       src_stride, tmp_stride, bpp, bpp,
-                       src_x, src_y, 0, 0,
-                       s->regs.dst_width, s->regs.dst_height);
-            pixman_blt(tmp, (uint32_t *)dst_bits,
-                       tmp_stride, dst_stride, bpp, bpp,
-                       0, 0, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
+            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
+                                   src_stride, tmp_stride, bpp, bpp,
+                                   src_x, src_y, 0, 0,
+                                   s->regs.dst_width, s->regs.dst_height);
+            if (!fallback) {
+                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
+                                       tmp_stride, dst_stride, bpp, bpp,
+                                       0, 0, dst_x, dst_y,
+                                       s->regs.dst_width, s->regs.dst_height);
+            }
             g_free(tmp);
+        } else {
+            fallback = true;
+        }
+        if (fallback) {
+            unsigned int y, i, j, bypp = bpp / 8;
+            unsigned int src_pitch = src_stride * sizeof(uint32_t);
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp;
+                j = src_x * bypp;
+                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
+                    i += (dst_y + y) * dst_pitch;
+                    j += (src_y + y) * src_pitch;
+                } else {
+                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
+                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
+                }
+                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
+            }
         }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
@@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
 
         dst_stride /= sizeof(uint32_t);
         DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
-                dst_bits, dst_stride, bpp,
-                dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height,
-                filler);
-        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
-                    dst_x, dst_y,
-                    s->regs.dst_width, s->regs.dst_height,
-                    filler);
+                dst_bits, dst_stride, bpp, dst_x, dst_y,
+                s->regs.dst_width, s->regs.dst_height, filler);
+        if (!(s->use_pixman & BIT(0)) ||
+            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
+                    s->regs.dst_width, s->regs.dst_height, filler)) {
+            /* fallback when pixman failed or we don't want to call it */
+            unsigned int x, y, i, bypp = bpp / 8;
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp + (dst_y + y) * dst_pitch;
+                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
+                    stn_he_p(&dst_bits[i], bypp, filler);
+                }
+            }
+        }
         if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
             dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
             s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
diff --git a/hw/display/ati_dbg.c b/hw/display/ati_dbg.c
index bd0ecd48c7..3ffa7f35df 100644
--- a/hw/display/ati_dbg.c
+++ b/hw/display/ati_dbg.c
@@ -30,6 +30,7 @@ static struct ati_regdesc ati_reg_names[] = {
     {"AMCGPIO_EN_MIR", 0x00a8},
     {"PALETTE_INDEX", 0x00b0},
     {"PALETTE_DATA", 0x00b4},
+    {"PALETTE_30_DATA", 0x00b8},
     {"CNFG_CNTL", 0x00e0},
     {"GEN_RESET_CNTL", 0x00f0},
     {"CNFG_MEMSIZE", 0x00f8},
@@ -38,6 +39,7 @@ static struct ati_regdesc ati_reg_names[] = {
     {"CONFIG_APER_SIZE", 0x0108},
     {"CONFIG_REG_1_BASE", 0x010c},
     {"CONFIG_REG_APER_SIZE", 0x0110},
+    {"HOST_PATH_CNTL", 0x0130},
     {"MEM_CNTL", 0x0140},
     {"MC_FB_LOCATION", 0x0148},
     {"MC_AGP_LOCATION", 0x014C},
diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
index e8d3c7af75..f5a47b82b0 100644
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@@ -44,6 +44,7 @@ typedef struct ATIVGARegs {
     uint32_t gpio_dvi_ddc;
     uint32_t gpio_monid;
     uint32_t config_cntl;
+    uint32_t palette[256];
     uint32_t crtc_h_total_disp;
     uint32_t crtc_h_sync_strt_wid;
     uint32_t crtc_v_total_disp;
@@ -89,6 +90,7 @@ struct ATIVGAState {
     char *model;
     uint16_t dev_id;
     uint8_t mode;
+    uint8_t use_pixman;
     bool cursor_guest_mode;
     uint16_t cursor_size;
     uint32_t cursor_offset;
diff --git a/hw/display/ati_regs.h b/hw/display/ati_regs.h
index d6282b2ef2..d7127748ff 100644
--- a/hw/display/ati_regs.h
+++ b/hw/display/ati_regs.h
@@ -48,6 +48,7 @@
 #define AMCGPIO_EN_MIR                          0x00a8
 #define PALETTE_INDEX                           0x00b0
 #define PALETTE_DATA                            0x00b4
+#define PALETTE_30_DATA                         0x00b8
 #define CNFG_CNTL                               0x00e0
 #define GEN_RESET_CNTL                          0x00f0
 #define CNFG_MEMSIZE                            0x00f8
@@ -56,6 +57,7 @@
 #define CONFIG_APER_SIZE                        0x0108
 #define CONFIG_REG_1_BASE                       0x010c
 #define CONFIG_REG_APER_SIZE                    0x0110
+#define HOST_PATH_CNTL                          0x0130
 #define MEM_CNTL                                0x0140
 #define MC_FB_LOCATION                          0x0148
 #define MC_AGP_LOCATION                         0x014C
diff --git a/hw/display/macfb.c b/hw/display/macfb.c
index 2f8e016566..d61541ccb5 100644
--- a/hw/display/macfb.c
+++ b/hw/display/macfb.c
@@ -36,8 +36,8 @@
 #define DAFB_INTR_MASK      0x104
 #define DAFB_INTR_STAT      0x108
 #define DAFB_INTR_CLEAR     0x10c
-#define DAFB_RESET          0x200
-#define DAFB_LUT            0x213
+#define DAFB_LUT_INDEX      0x200
+#define DAFB_LUT            0x210
 
 #define DAFB_INTR_VBL   0x4
 
@@ -537,6 +537,11 @@ static uint64_t macfb_ctrl_read(void *opaque,
     case DAFB_MODE_SENSE:
         val = macfb_sense_read(s);
         break;
+    case DAFB_LUT ... DAFB_LUT + 3:
+        val = s->color_palette[s->palette_current];
+        s->palette_current = (s->palette_current + 1) %
+                             ARRAY_SIZE(s->color_palette);
+        break;
     default:
         if (addr < MACFB_CTRL_TOPADDR) {
             val = s->regs[addr >> 2];
@@ -583,13 +588,11 @@ static void macfb_ctrl_write(void *opaque,
         s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL;
         macfb_update_irq(s);
         break;
-    case DAFB_RESET:
-        s->palette_current = 0;
-        s->regs[DAFB_INTR_STAT >> 2] &= ~DAFB_INTR_VBL;
-        macfb_update_irq(s);
+    case DAFB_LUT_INDEX:
+        s->palette_current = (val & 0xff) * 3;
         break;
-    case DAFB_LUT:
-        s->color_palette[s->palette_current] = val;
+    case DAFB_LUT ... DAFB_LUT + 3:
+        s->color_palette[s->palette_current] = val & 0xff;
         s->palette_current = (s->palette_current + 1) %
                              ARRAY_SIZE(s->color_palette);
         if (s->palette_current % 3) {
diff --git a/hw/display/virtio-gpu-pci-rutabaga.c b/hw/display/virtio-gpu-pci-rutabaga.c
index c96729e198..abbb898c65 100644
--- a/hw/display/virtio-gpu-pci-rutabaga.c
+++ b/hw/display/virtio-gpu-pci-rutabaga.c
@@ -36,6 +36,7 @@ static const TypeInfo virtio_gpu_rutabaga_pci_info[] = {
         .instance_init = virtio_gpu_rutabaga_initfn,
         .interfaces = (InterfaceInfo[]) {
             { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+            { },
         }
     },
 };
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 4265316cbb..2707bceea8 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1213,6 +1213,9 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
     assert(QTAILQ_EMPTY(&g->cmdq));
 
     QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (res->blob_size) {
+            continue;
+        }
         qemu_put_be32(f, res->resource_id);
         qemu_put_be32(f, res->width);
         qemu_put_be32(f, res->height);
@@ -1230,12 +1233,40 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
     return vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL);
 }
 
+static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g,
+                                            struct virtio_gpu_simple_resource *res)
+{
+    int i;
+
+    for (i = 0; i < res->iov_cnt; i++) {
+        hwaddr len = res->iov[i].iov_len;
+        res->iov[i].iov_base =
+            dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
+                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+
+        if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
+            /* Clean up the half-a-mapping we just created... */
+            if (res->iov[i].iov_base) {
+                dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as, res->iov[i].iov_base,
+                                 len, DMA_DIRECTION_TO_DEVICE, 0);
+            }
+            /* ...and the mappings for previous loop iterations */
+            res->iov_cnt = i;
+            virtio_gpu_cleanup_mapping(g, res);
+            return false;
+        }
+    }
+
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+    g->hostmem += res->hostmem;
+    return true;
+}
+
 static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
                            const VMStateField *field)
 {
     VirtIOGPU *g = opaque;
     struct virtio_gpu_simple_resource *res;
-    struct virtio_gpu_scanout *scanout;
     uint32_t resource_id, pformat;
     void *bits = NULL;
     int i;
@@ -1294,40 +1325,96 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
         qemu_get_buffer(f, (void *)pixman_image_get_data(res->image),
                         pixman_image_get_stride(res->image) * res->height);
 
-        /* restore mapping */
-        for (i = 0; i < res->iov_cnt; i++) {
-            hwaddr len = res->iov[i].iov_len;
-            res->iov[i].iov_base =
-                dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
-                               DMA_DIRECTION_TO_DEVICE,
-                               MEMTXATTRS_UNSPECIFIED);
-
-            if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
-                /* Clean up the half-a-mapping we just created... */
-                if (res->iov[i].iov_base) {
-                    dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as,
-                                     res->iov[i].iov_base,
-                                     len,
-                                     DMA_DIRECTION_TO_DEVICE,
-                                     0);
-                }
-                /* ...and the mappings for previous loop iterations */
-                res->iov_cnt = i;
-                virtio_gpu_cleanup_mapping(g, res);
-                pixman_image_unref(res->image);
-                g_free(res);
-                return -EINVAL;
-            }
+        if (!virtio_gpu_load_restore_mapping(g, res)) {
+            pixman_image_unref(res->image);
+            g_free(res);
+            return -EINVAL;
         }
 
-        QTAILQ_INSERT_HEAD(&g->reslist, res, next);
-        g->hostmem += res->hostmem;
-
         resource_id = qemu_get_be32(f);
     }
 
     /* load & apply scanout state */
     vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
+
+    return 0;
+}
+
+static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size,
+                                const VMStateField *field, JSONWriter *vmdesc)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_simple_resource *res;
+    int i;
+
+    /* in 2d mode we should never find unprocessed commands here */
+    assert(QTAILQ_EMPTY(&g->cmdq));
+
+    QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (!res->blob_size) {
+            continue;
+        }
+        qemu_put_be32(f, res->resource_id);
+        qemu_put_be32(f, res->blob_size);
+        qemu_put_be32(f, res->iov_cnt);
+        for (i = 0; i < res->iov_cnt; i++) {
+            qemu_put_be64(f, res->addrs[i]);
+            qemu_put_be32(f, res->iov[i].iov_len);
+        }
+    }
+    qemu_put_be32(f, 0); /* end of list */
+
+    return 0;
+}
+
+static int virtio_gpu_blob_load(QEMUFile *f, void *opaque, size_t size,
+                                const VMStateField *field)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_simple_resource *res;
+    uint32_t resource_id;
+    int i;
+
+    resource_id = qemu_get_be32(f);
+    while (resource_id != 0) {
+        res = virtio_gpu_find_resource(g, resource_id);
+        if (res) {
+            return -EINVAL;
+        }
+
+        res = g_new0(struct virtio_gpu_simple_resource, 1);
+        res->resource_id = resource_id;
+        res->blob_size = qemu_get_be32(f);
+        res->iov_cnt = qemu_get_be32(f);
+        res->addrs = g_new(uint64_t, res->iov_cnt);
+        res->iov = g_new(struct iovec, res->iov_cnt);
+
+        /* read data */
+        for (i = 0; i < res->iov_cnt; i++) {
+            res->addrs[i] = qemu_get_be64(f);
+            res->iov[i].iov_len = qemu_get_be32(f);
+        }
+
+        if (!virtio_gpu_load_restore_mapping(g, res)) {
+            g_free(res);
+            return -EINVAL;
+        }
+
+        virtio_gpu_init_udmabuf(res);
+
+        resource_id = qemu_get_be32(f);
+    }
+
+    return 0;
+}
+
+static int virtio_gpu_post_load(void *opaque, int version_id)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_scanout *scanout;
+    struct virtio_gpu_simple_resource *res;
+    int i;
+
     for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
         /* FIXME: should take scanout.r.{x,y} into account */
         scanout = &g->parent_obj.scanout[i];
@@ -1475,6 +1562,32 @@ virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
     }
 }
 
+static bool virtio_gpu_blob_state_needed(void *opaque)
+{
+    VirtIOGPU *g = VIRTIO_GPU(opaque);
+
+    return virtio_gpu_blob_enabled(g->parent_obj.conf);
+}
+
+const VMStateDescription vmstate_virtio_gpu_blob_state = {
+    .name = "virtio-gpu/blob",
+    .minimum_version_id = VIRTIO_GPU_VM_VERSION,
+    .version_id = VIRTIO_GPU_VM_VERSION,
+    .needed = virtio_gpu_blob_state_needed,
+    .fields = (const VMStateField[]){
+        {
+            .name = "virtio-gpu/blob",
+            .info = &(const VMStateInfo) {
+                .name = "blob",
+                .get = virtio_gpu_blob_load,
+                .put = virtio_gpu_blob_save,
+            },
+            .flags = VMS_SINGLE,
+        } /* device */,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 /*
  * For historical reasons virtio_gpu does not adhere to virtio migration
  * scheme as described in doc/virtio-migration.txt, in a sense that no
@@ -1500,6 +1613,11 @@ static const VMStateDescription vmstate_virtio_gpu = {
         } /* device */,
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_gpu_blob_state,
+        NULL
+    },
+    .post_load = virtio_gpu_post_load,
 };
 
 static Property virtio_gpu_properties[] = {
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index 67d4d1b5e0..a3222d3a96 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -87,7 +87,7 @@ static const MemoryRegionOps hppa_pci_ignore_ops = {
     },
 };
 
-static ISABus *hppa_isa_bus(void)
+static ISABus *hppa_isa_bus(hwaddr addr)
 {
     ISABus *isa_bus;
     qemu_irq *isa_irqs;
@@ -96,8 +96,7 @@ static ISABus *hppa_isa_bus(void)
     isa_region = g_new(MemoryRegion, 1);
     memory_region_init_io(isa_region, NULL, &hppa_pci_ignore_ops,
                           NULL, "isa-io", 0x800);
-    memory_region_add_subregion(get_system_memory(), IDE_HPA,
-                                isa_region);
+    memory_region_add_subregion(get_system_memory(), addr, isa_region);
 
     isa_bus = isa_bus_new(NULL, get_system_memory(), isa_region,
                           &error_abort);
@@ -163,13 +162,24 @@ static const MemoryRegionOps hppa_io_helper_ops = {
     },
 };
 
+typedef uint64_t TranslateFn(void *opaque, uint64_t addr);
 
-static uint64_t cpu_hppa_to_phys(void *opaque, uint64_t addr)
+static uint64_t linux_kernel_virt_to_phys(void *opaque, uint64_t addr)
 {
     addr &= (0x10000000 - 1);
     return addr;
 }
 
+static uint64_t translate_pa10(void *dummy, uint64_t addr)
+{
+    return (uint32_t)addr;
+}
+
+static uint64_t translate_pa20(void *dummy, uint64_t addr)
+{
+    return hppa_abs_to_phys_pa2_w0(addr);
+}
+
 static HPPACPU *cpu[HPPA_MAX_CPUS];
 static uint64_t firmware_entry;
 
@@ -179,15 +189,17 @@ static void fw_cfg_boot_set(void *opaque, const char *boot_device,
     fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
 }
 
-static FWCfgState *create_fw_cfg(MachineState *ms, PCIBus *pci_bus)
+static FWCfgState *create_fw_cfg(MachineState *ms, PCIBus *pci_bus,
+                                 hwaddr addr)
 {
     FWCfgState *fw_cfg;
     uint64_t val;
     const char qemu_version[] = QEMU_VERSION;
     MachineClass *mc = MACHINE_GET_CLASS(ms);
+    int btlb_entries = HPPA_BTLB_ENTRIES(&cpu[0]->env);
     int len;
 
-    fw_cfg = fw_cfg_init_mem(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4);
+    fw_cfg = fw_cfg_init_mem(addr, addr + 4);
     fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, HPPA_MAX_CPUS);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, ms->ram_size);
@@ -196,11 +208,11 @@ static FWCfgState *create_fw_cfg(MachineState *ms, PCIBus *pci_bus)
     fw_cfg_add_file(fw_cfg, "/etc/firmware-min-version",
                     g_memdup(&val, sizeof(val)), sizeof(val));
 
-    val = cpu_to_le64(HPPA_TLB_ENTRIES - HPPA_BTLB_ENTRIES);
+    val = cpu_to_le64(HPPA_TLB_ENTRIES - btlb_entries);
     fw_cfg_add_file(fw_cfg, "/etc/cpu/tlb_entries",
                     g_memdup(&val, sizeof(val)), sizeof(val));
 
-    val = cpu_to_le64(HPPA_BTLB_ENTRIES);
+    val = cpu_to_le64(btlb_entries);
     fw_cfg_add_file(fw_cfg, "/etc/cpu/btlb_entries",
                     g_memdup(&val, sizeof(val)), sizeof(val));
 
@@ -257,32 +269,45 @@ static DinoState *dino_init(MemoryRegion *addr_space)
 /*
  * Step 1: Create CPUs and Memory
  */
-static void machine_HP_common_init_cpus(MachineState *machine)
+static TranslateFn *machine_HP_common_init_cpus(MachineState *machine)
 {
     MemoryRegion *addr_space = get_system_memory();
-    MemoryRegion *cpu_region;
-    long i;
     unsigned int smp_cpus = machine->smp.cpus;
-    char *name;
+    TranslateFn *translate;
+    MemoryRegion *cpu_region;
 
     /* Create CPUs.  */
-    for (i = 0; i < smp_cpus; i++) {
-        name = g_strdup_printf("cpu%ld-io-eir", i);
+    for (unsigned int i = 0; i < smp_cpus; i++) {
         cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type));
+    }
+
+    /*
+     * For now, treat address layout as if PSW_W is clear.
+     * TODO: create a proper hppa64 board model and load elf64 firmware.
+     */
+    if (hppa_is_pa20(&cpu[0]->env)) {
+        translate = translate_pa20;
+    } else {
+        translate = translate_pa10;
+    }
+
+    for (unsigned int i = 0; i < smp_cpus; i++) {
+        g_autofree char *name = g_strdup_printf("cpu%u-io-eir", i);
 
         cpu_region = g_new(MemoryRegion, 1);
         memory_region_init_io(cpu_region, OBJECT(cpu[i]), &hppa_io_eir_ops,
                               cpu[i], name, 4);
-        memory_region_add_subregion(addr_space, CPU_HPA + i * 0x1000,
+        memory_region_add_subregion(addr_space,
+                                    translate(NULL, CPU_HPA + i * 0x1000),
                                     cpu_region);
-        g_free(name);
     }
 
     /* RTC and DebugOutputPort on CPU #0 */
     cpu_region = g_new(MemoryRegion, 1);
     memory_region_init_io(cpu_region, OBJECT(cpu[0]), &hppa_io_helper_ops,
                           cpu[0], "cpu0-io-rtc", 2 * sizeof(uint64_t));
-    memory_region_add_subregion(addr_space, CPU_HPA + 16, cpu_region);
+    memory_region_add_subregion(addr_space, translate(NULL, CPU_HPA + 16),
+                                cpu_region);
 
     /* Main memory region. */
     if (machine->ram_size > 3 * GiB) {
@@ -290,12 +315,15 @@ static void machine_HP_common_init_cpus(MachineState *machine)
         exit(EXIT_FAILURE);
     }
     memory_region_add_subregion_overlap(addr_space, 0, machine->ram, -1);
+
+    return translate;
 }
 
 /*
  * Last creation step: Add SCSI discs, NICs, graphics & load firmware
  */
-static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus)
+static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus,
+                                        TranslateFn *translate)
 {
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
@@ -323,13 +351,13 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus)
         dev = qdev_new("artist");
         s = SYS_BUS_DEVICE(dev);
         sysbus_realize_and_unref(s, &error_fatal);
-        sysbus_mmio_map(s, 0, LASI_GFX_HPA);
-        sysbus_mmio_map(s, 1, ARTIST_FB_ADDR);
+        sysbus_mmio_map(s, 0, translate(NULL, LASI_GFX_HPA));
+        sysbus_mmio_map(s, 1, translate(NULL, ARTIST_FB_ADDR));
     }
 
     /* Network setup. */
     if (enable_lasi_lan()) {
-        lasi_82596_init(addr_space, LASI_LAN_HPA,
+        lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
                         qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA));
     }
 
@@ -373,7 +401,7 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus)
     qemu_register_powerdown_notifier(&hppa_system_powerdown_notifier);
 
     /* fw_cfg configuration interface */
-    create_fw_cfg(machine, pci_bus);
+    create_fw_cfg(machine, pci_bus, translate(NULL, FW_CFG_IO_BASE));
 
     /* Load firmware.  Given that this is not "real" firmware,
        but one explicitly written for the emulation, we might as
@@ -385,15 +413,10 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus)
         exit(1);
     }
 
-    size = load_elf(firmware_filename, NULL, NULL, NULL,
+    size = load_elf(firmware_filename, NULL, translate, NULL,
                     &firmware_entry, &firmware_low, &firmware_high, NULL,
                     true, EM_PARISC, 0, 0);
 
-    /* Unfortunately, load_elf sign-extends reading elf32.  */
-    firmware_entry = (target_ureg)firmware_entry;
-    firmware_low = (target_ureg)firmware_low;
-    firmware_high = (target_ureg)firmware_high;
-
     if (size < 0) {
         error_report("could not load firmware '%s'", firmware_filename);
         exit(1);
@@ -401,7 +424,8 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus)
     qemu_log_mask(CPU_LOG_PAGE, "Firmware loaded at 0x%08" PRIx64
                   "-0x%08" PRIx64 ", entry at 0x%08" PRIx64 ".\n",
                   firmware_low, firmware_high, firmware_entry);
-    if (firmware_low < FIRMWARE_START || firmware_high >= FIRMWARE_END) {
+    if (firmware_low < translate(NULL, FIRMWARE_START) ||
+        firmware_high >= translate(NULL, FIRMWARE_END)) {
         error_report("Firmware overlaps with memory or IO space");
         exit(1);
     }
@@ -410,18 +434,16 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus)
     rom_region = g_new(MemoryRegion, 1);
     memory_region_init_ram(rom_region, NULL, "firmware",
                            (FIRMWARE_END - FIRMWARE_START), &error_fatal);
-    memory_region_add_subregion(addr_space, FIRMWARE_START, rom_region);
+    memory_region_add_subregion(addr_space,
+                                translate(NULL, FIRMWARE_START), rom_region);
 
     /* Load kernel */
     if (kernel_filename) {
-        size = load_elf(kernel_filename, NULL, &cpu_hppa_to_phys,
+        size = load_elf(kernel_filename, NULL, linux_kernel_virt_to_phys,
                         NULL, &kernel_entry, &kernel_low, &kernel_high, NULL,
                         true, EM_PARISC, 0, 0);
 
-        /* Unfortunately, load_elf sign-extends reading elf32.  */
-        kernel_entry = (target_ureg) cpu_hppa_to_phys(NULL, kernel_entry);
-        kernel_low = (target_ureg)kernel_low;
-        kernel_high = (target_ureg)kernel_high;
+        kernel_entry = linux_kernel_virt_to_phys(NULL, kernel_entry);
 
         if (size < 0) {
             error_report("could not load kernel '%s'", kernel_filename);
@@ -499,41 +521,48 @@ static void machine_HP_B160L_init(MachineState *machine)
 {
     DeviceState *dev, *dino_dev;
     MemoryRegion *addr_space = get_system_memory();
+    TranslateFn *translate;
     ISABus *isa_bus;
     PCIBus *pci_bus;
 
     /* Create CPUs and RAM.  */
-    machine_HP_common_init_cpus(machine);
+    translate = machine_HP_common_init_cpus(machine);
+
+    if (hppa_is_pa20(&cpu[0]->env)) {
+        error_report("The HP B160L workstation requires a 32-bit "
+                     "CPU. Use '-machine C3700' instead.");
+        exit(1);
+    }
 
     /* Init Lasi chip */
     lasi_dev = DEVICE(lasi_init());
-    memory_region_add_subregion(addr_space, LASI_HPA,
+    memory_region_add_subregion(addr_space, translate(NULL, LASI_HPA),
                                 sysbus_mmio_get_region(
                                     SYS_BUS_DEVICE(lasi_dev), 0));
 
     /* Init Dino (PCI host bus chip).  */
     dino_dev = DEVICE(dino_init(addr_space));
-    memory_region_add_subregion(addr_space, DINO_HPA,
+    memory_region_add_subregion(addr_space, translate(NULL, DINO_HPA),
                                 sysbus_mmio_get_region(
                                     SYS_BUS_DEVICE(dino_dev), 0));
     pci_bus = PCI_BUS(qdev_get_child_bus(dino_dev, "pci"));
     assert(pci_bus);
 
     /* Create ISA bus, needed for PS/2 kbd/mouse port emulation */
-    isa_bus = hppa_isa_bus();
+    isa_bus = hppa_isa_bus(translate(NULL, IDE_HPA));
     assert(isa_bus);
 
     /* Serial ports: Lasi and Dino use a 7.272727 MHz clock. */
-    serial_mm_init(addr_space, LASI_UART_HPA + 0x800, 0,
+    serial_mm_init(addr_space, translate(NULL, LASI_UART_HPA + 0x800), 0,
         qdev_get_gpio_in(lasi_dev, LASI_IRQ_UART_HPA), 7272727 / 16,
         serial_hd(0), DEVICE_BIG_ENDIAN);
 
-    serial_mm_init(addr_space, DINO_UART_HPA + 0x800, 0,
+    serial_mm_init(addr_space, translate(NULL, DINO_UART_HPA + 0x800), 0,
         qdev_get_gpio_in(dino_dev, DINO_IRQ_RS232INT), 7272727 / 16,
         serial_hd(1), DEVICE_BIG_ENDIAN);
 
     /* Parallel port */
-    parallel_mm_init(addr_space, LASI_LPT_HPA + 0x800, 0,
+    parallel_mm_init(addr_space, translate(NULL, LASI_LPT_HPA + 0x800), 0,
                      qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
                      parallel_hds[0]);
 
@@ -542,15 +571,17 @@ static void machine_HP_B160L_init(MachineState *machine)
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
                        qdev_get_gpio_in(lasi_dev, LASI_IRQ_PS2KBD_HPA));
-    memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA,
+    memory_region_add_subregion(addr_space,
+                                translate(NULL, LASI_PS2KBD_HPA),
                                 sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
                                                        0));
-    memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA + 0x100,
+    memory_region_add_subregion(addr_space,
+                                translate(NULL, LASI_PS2KBD_HPA + 0x100),
                                 sysbus_mmio_get_region(SYS_BUS_DEVICE(dev),
                                                        1));
 
     /* Add SCSI discs, NICs, graphics & load firmware */
-    machine_HP_common_init_tail(machine, pci_bus);
+    machine_HP_common_init_tail(machine, pci_bus, translate);
 }
 
 static AstroState *astro_init(void)
@@ -572,21 +603,28 @@ static void machine_HP_C3700_init(MachineState *machine)
     AstroState *astro;
     DeviceState *astro_dev;
     MemoryRegion *addr_space = get_system_memory();
+    TranslateFn *translate;
 
     /* Create CPUs and RAM.  */
-    machine_HP_common_init_cpus(machine);
+    translate = machine_HP_common_init_cpus(machine);
+
+    if (!hppa_is_pa20(&cpu[0]->env)) {
+        error_report("The HP C3000 workstation requires a 64-bit CPU. "
+                     "Use '-machine B160L' instead.");
+        exit(1);
+    }
 
     /* Init Astro and the Elroys (PCI host bus chips).  */
     astro = astro_init();
     astro_dev = DEVICE(astro);
-    memory_region_add_subregion(addr_space, ASTRO_HPA,
+    memory_region_add_subregion(addr_space, translate(NULL, ASTRO_HPA),
                                 sysbus_mmio_get_region(
                                     SYS_BUS_DEVICE(astro_dev), 0));
     pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(astro->elroy[0]), "pci"));
     assert(pci_bus);
 
     /* Add SCSI discs, NICs, graphics & load firmware */
-    machine_HP_common_init_tail(machine, pci_bus);
+    machine_HP_common_init_tail(machine, pci_bus, translate);
 }
 
 static void hppa_machine_reset(MachineState *ms, ShutdownCause reason)
@@ -608,10 +646,6 @@ static void hppa_machine_reset(MachineState *ms, ShutdownCause reason)
 
         cs->exception_index = -1;
         cs->halted = 0;
-
-        /* clear any existing TLB and BTLB entries */
-        memset(cpu[i]->env.tlb, 0, sizeof(cpu[i]->env.tlb));
-        cpu[i]->env.tlb_last = HPPA_BTLB_ENTRIES;
     }
 
     /* already initialized by machine_hppa_init()? */
@@ -637,6 +671,11 @@ static void hppa_nmi(NMIState *n, int cpu_index, Error **errp)
     }
 }
 
+static const char *HP_B160L_machine_valid_cpu_types[] = {
+    TYPE_HPPA_CPU,
+    NULL
+};
+
 static void HP_B160L_machine_init_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -644,6 +683,7 @@ static void HP_B160L_machine_init_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "HP B160L workstation";
     mc->default_cpu_type = TYPE_HPPA_CPU;
+    mc->valid_cpu_types = HP_B160L_machine_valid_cpu_types;
     mc->init = machine_HP_B160L_init;
     mc->reset = hppa_machine_reset;
     mc->block_default_type = IF_SCSI;
@@ -668,13 +708,19 @@ static const TypeInfo HP_B160L_machine_init_typeinfo = {
     },
 };
 
+static const char *HP_C3700_machine_valid_cpu_types[] = {
+    TYPE_HPPA64_CPU,
+    NULL
+};
+
 static void HP_C3700_machine_init_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
     NMIClass *nc = NMI_CLASS(oc);
 
     mc->desc = "HP C3700 workstation";
-    mc->default_cpu_type = TYPE_HPPA_CPU;
+    mc->default_cpu_type = TYPE_HPPA64_CPU;
+    mc->valid_cpu_types = HP_C3700_machine_valid_cpu_types;
     mc->init = machine_HP_C3700_init;
     mc->reset = hppa_machine_reset;
     mc->block_default_type = IF_SCSI;
diff --git a/hw/hyperv/Kconfig b/hw/hyperv/Kconfig
index fcf65903bd..41dd827c84 100644
--- a/hw/hyperv/Kconfig
+++ b/hw/hyperv/Kconfig
@@ -16,3 +16,13 @@ config SYNDBG
     bool
     default y
     depends on VMBUS
+
+config HV_BALLOON_SUPPORTED
+    bool
+
+config HV_BALLOON
+    bool
+    default y
+    depends on VMBUS
+    depends on HV_BALLOON_POSSIBLE
+    depends on HV_BALLOON_SUPPORTED
diff --git a/hw/hyperv/hv-balloon-internal.h b/hw/hyperv/hv-balloon-internal.h
new file mode 100644
index 0000000000..164c2e5825
--- /dev/null
+++ b/hw/hyperv/hv-balloon-internal.h
@@ -0,0 +1,33 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_INTERNAL_H
+#define HW_HYPERV_HV_BALLOON_INTERNAL_H
+
+#include "qemu/osdep.h"
+
+#define HV_BALLOON_PFN_SHIFT 12
+#define HV_BALLOON_PAGE_SIZE (1 << HV_BALLOON_PFN_SHIFT)
+
+#define SUM_OVERFLOW_U64(in1, in2) ((in1) > UINT64_MAX - (in2))
+#define SUM_SATURATE_U64(in1, in2)              \
+    ({                                          \
+        uint64_t _in1 = (in1), _in2 = (in2);    \
+        uint64_t _result;                       \
+                                                \
+        if (!SUM_OVERFLOW_U64(_in1, _in2)) {    \
+            _result = _in1 + _in2;              \
+        } else {                                \
+            _result = UINT64_MAX;               \
+        }                                       \
+                                                \
+        _result;                                \
+    })
+
+#endif
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.c b/hw/hyperv/hv-balloon-our_range_memslots.c
new file mode 100644
index 0000000000..99bae870f3
--- /dev/null
+++ b/hw/hyperv/hv-balloon-our_range_memslots.c
@@ -0,0 +1,201 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+#include "hv-balloon-our_range_memslots.h"
+#include "trace.h"
+
+/* OurRange */
+static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count)
+{
+    assert(count <= UINT64_MAX - start);
+    our_range->range.start = start;
+    our_range->range.count = count;
+
+    hvb_page_range_tree_init(&our_range->removed_guest);
+    hvb_page_range_tree_init(&our_range->removed_both);
+
+    /* mark the whole range as unused but for potential use */
+    our_range->added = 0;
+    our_range->unusable_tail = 0;
+}
+
+static void our_range_destroy(OurRange *our_range)
+{
+    hvb_page_range_tree_destroy(&our_range->removed_guest);
+    hvb_page_range_tree_destroy(&our_range->removed_both);
+}
+
+void hvb_our_range_clear_removed_trees(OurRange *our_range)
+{
+    hvb_page_range_tree_destroy(&our_range->removed_guest);
+    hvb_page_range_tree_destroy(&our_range->removed_both);
+    hvb_page_range_tree_init(&our_range->removed_guest);
+    hvb_page_range_tree_init(&our_range->removed_both);
+}
+
+void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size)
+{
+    assert(additional_size <= UINT64_MAX - our_range->added);
+
+    our_range->added += additional_size;
+
+    assert(our_range->added <= UINT64_MAX - our_range->unusable_tail);
+    assert(our_range->added + our_range->unusable_tail <=
+           our_range->range.count);
+}
+
+/* OurRangeMemslots */
+static void our_range_memslots_init_slots(OurRangeMemslots *our_range,
+                                          MemoryRegion *backing_mr,
+                                          Object *memslot_owner)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    unsigned int idx;
+    uint64_t memslot_offset;
+
+    assert(memslots->count > 0);
+    memslots->slots = g_new0(MemoryRegion, memslots->count);
+
+    /* Initialize our memslots, but don't map them yet. */
+    assert(memslots->size_each > 0);
+    for (idx = 0, memslot_offset = 0; idx < memslots->count;
+         idx++, memslot_offset += memslots->size_each) {
+        uint64_t memslot_size;
+        g_autofree char *name = NULL;
+
+        /* The size of the last memslot might be smaller. */
+        if (idx == memslots->count - 1) {
+            uint64_t region_size;
+
+            assert(our_range->mr);
+            region_size = memory_region_size(our_range->mr);
+            memslot_size = region_size - memslot_offset;
+        } else {
+            memslot_size = memslots->size_each;
+        }
+
+        name = g_strdup_printf("memslot-%u", idx);
+        memory_region_init_alias(&memslots->slots[idx], memslot_owner, name,
+                                 backing_mr, memslot_offset, memslot_size);
+        /*
+         * We want to be able to atomically and efficiently activate/deactivate
+         * individual memslots without affecting adjacent memslots in memory
+         * notifiers.
+         */
+        memory_region_set_unmergeable(&memslots->slots[idx], true);
+    }
+
+    memslots->mapped_count = 0;
+}
+
+OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
+                                             MemoryRegion *parent_mr,
+                                             MemoryRegion *backing_mr,
+                                             Object *memslot_owner,
+                                             unsigned int memslot_count,
+                                             uint64_t memslot_size)
+{
+    OurRangeMemslots *our_range;
+
+    our_range = g_malloc(sizeof(*our_range));
+    our_range_init(&our_range->range,
+                   addr / HV_BALLOON_PAGE_SIZE,
+                   memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE);
+    our_range->slots.size_each = memslot_size;
+    our_range->slots.count = memslot_count;
+    our_range->mr = parent_mr;
+    our_range_memslots_init_slots(our_range, backing_mr, memslot_owner);
+
+    return our_range;
+}
+
+static void our_range_memslots_free_memslots(OurRangeMemslots *our_range)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    unsigned int idx;
+    uint64_t offset;
+
+    memory_region_transaction_begin();
+    for (idx = 0, offset = 0; idx < memslots->mapped_count;
+         idx++, offset += memslots->size_each) {
+        trace_hv_balloon_unmap_slot(idx, memslots->count, offset);
+        assert(memory_region_is_mapped(&memslots->slots[idx]));
+        memory_region_del_subregion(our_range->mr, &memslots->slots[idx]);
+    }
+    memory_region_transaction_commit();
+
+    for (idx = 0; idx < memslots->count; idx++) {
+        object_unparent(OBJECT(&memslots->slots[idx]));
+    }
+
+    g_clear_pointer(&our_range->slots.slots, g_free);
+}
+
+void hvb_our_range_memslots_free(OurRangeMemslots *our_range)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    MemoryRegion *hostmem_mr;
+    RAMBlock *rb;
+
+    assert(our_range->slots.count > 0);
+    assert(our_range->slots.slots);
+
+    hostmem_mr = memslots->slots[0].alias;
+    rb = hostmem_mr->ram_block;
+    ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
+
+    our_range_memslots_free_memslots(our_range);
+    our_range_destroy(&our_range->range);
+    g_free(our_range);
+}
+
+void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
+                                                     uint64_t additional_map_size)
+{
+    OurRangeMemslotsSlots *memslots = &our_range->slots;
+    uint64_t total_map_size;
+    unsigned int idx;
+    uint64_t offset;
+
+    total_map_size = (our_range->range.added + additional_map_size) *
+        HV_BALLOON_PAGE_SIZE;
+    idx = memslots->mapped_count;
+    assert(memslots->size_each > 0);
+    offset = idx * memslots->size_each;
+
+    /*
+     * Activate all memslots covered by the newly added region in a single
+     * transaction.
+     */
+    memory_region_transaction_begin();
+    for ( ; idx < memslots->count;
+          idx++, offset += memslots->size_each) {
+        /*
+         * If this memslot starts beyond or at the end of the range to map so
+         * does every next one.
+         */
+        if (offset >= total_map_size) {
+            break;
+        }
+
+        /*
+         * Instead of enabling/disabling memslot, we add/remove them. This
+         * should make address space updates faster, because we don't have to
+         * loop over many disabled subregions.
+         */
+        trace_hv_balloon_map_slot(idx, memslots->count, offset);
+        assert(!memory_region_is_mapped(&memslots->slots[idx]));
+        memory_region_add_subregion(our_range->mr, offset,
+                                    &memslots->slots[idx]);
+
+        memslots->mapped_count++;
+    }
+    memory_region_transaction_commit();
+}
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.h b/hw/hyperv/hv-balloon-our_range_memslots.h
new file mode 100644
index 0000000000..b6f592d34b
--- /dev/null
+++ b/hw/hyperv/hv-balloon-our_range_memslots.h
@@ -0,0 +1,110 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
+#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
+
+#include "qemu/osdep.h"
+
+#include "exec/memory.h"
+#include "qom/object.h"
+#include "hv-balloon-page_range_tree.h"
+
+/* OurRange */
+#define OUR_RANGE(ptr) ((OurRange *)(ptr))
+
+/* "our range" means the memory range owned by this driver (for hot-adding) */
+typedef struct OurRange {
+    PageRange range;
+
+    /* How many pages were hot-added to the guest */
+    uint64_t added;
+
+    /* Pages at the end not currently usable */
+    uint64_t unusable_tail;
+
+    /* Memory removed from the guest */
+    PageRangeTree removed_guest, removed_both;
+} OurRange;
+
+static inline uint64_t our_range_get_remaining_start(OurRange *our_range)
+{
+    return our_range->range.start + our_range->added;
+}
+
+static inline uint64_t our_range_get_remaining_size(OurRange *our_range)
+{
+    return our_range->range.count - our_range->added - our_range->unusable_tail;
+}
+
+void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size);
+
+static inline void our_range_mark_remaining_unusable(OurRange *our_range)
+{
+    our_range->unusable_tail = our_range->range.count - our_range->added;
+}
+
+static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range,
+                                                       bool both)
+{
+    if (both) {
+        return our_range->removed_both;
+    } else {
+        return our_range->removed_guest;
+    }
+}
+
+static inline bool our_range_is_removed_tree_empty(OurRange *our_range,
+                                                   bool both)
+{
+    if (both) {
+        return page_range_tree_is_empty(our_range->removed_both);
+    } else {
+        return page_range_tree_is_empty(our_range->removed_guest);
+    }
+}
+
+void hvb_our_range_clear_removed_trees(OurRange *our_range);
+
+/* OurRangeMemslots */
+typedef struct OurRangeMemslotsSlots {
+    /* Nominal size of each memslot (the last one might be smaller) */
+    uint64_t size_each;
+
+    /* Slots array and its element count */
+    MemoryRegion *slots;
+    unsigned int count;
+
+    /* How many slots are currently mapped */
+    unsigned int mapped_count;
+} OurRangeMemslotsSlots;
+
+typedef struct OurRangeMemslots {
+    OurRange range;
+
+    /* Memslots covering our range */
+    OurRangeMemslotsSlots slots;
+
+    MemoryRegion *mr;
+} OurRangeMemslots;
+
+OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
+                                             MemoryRegion *parent_mr,
+                                             MemoryRegion *backing_mr,
+                                             Object *memslot_owner,
+                                             unsigned int memslot_count,
+                                             uint64_t memslot_size);
+void hvb_our_range_memslots_free(OurRangeMemslots *our_range);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free)
+
+void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
+                                                     uint64_t additional_map_size);
+
+#endif
diff --git a/hw/hyperv/hv-balloon-page_range_tree.c b/hw/hyperv/hv-balloon-page_range_tree.c
new file mode 100644
index 0000000000..e178d8b413
--- /dev/null
+++ b/hw/hyperv/hv-balloon-page_range_tree.c
@@ -0,0 +1,228 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+#include "hv-balloon-page_range_tree.h"
+
+/*
+ * temporarily avoid warnings about enhanced GTree API usage requiring a
+ * too recent Glib version until GLIB_VERSION_MAX_ALLOWED finally reaches
+ * the Glib version with this API
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+/* PageRangeTree */
+static gint page_range_tree_key_compare(gconstpointer leftp,
+                                        gconstpointer rightp,
+                                        gpointer user_data)
+{
+    const uint64_t *left = leftp, *right = rightp;
+
+    if (*left < *right) {
+        return -1;
+    } else if (*left > *right) {
+        return 1;
+    } else { /* *left == *right */
+        return 0;
+    }
+}
+
+static GTreeNode *page_range_tree_insert_new(PageRangeTree tree,
+                                             uint64_t start, uint64_t count)
+{
+    uint64_t *key = g_malloc(sizeof(*key));
+    PageRange *range = g_malloc(sizeof(*range));
+
+    assert(count > 0);
+
+    *key = range->start = start;
+    range->count = count;
+
+    return g_tree_insert_node(tree.t, key, range);
+}
+
+void hvb_page_range_tree_insert(PageRangeTree tree,
+                                uint64_t start, uint64_t count,
+                                uint64_t *dupcount)
+{
+    GTreeNode *node;
+    bool joinable;
+    uint64_t intersection;
+    PageRange *range;
+
+    assert(!SUM_OVERFLOW_U64(start, count));
+    if (count == 0) {
+        return;
+    }
+
+    node = g_tree_upper_bound(tree.t, &start);
+    if (node) {
+        node = g_tree_node_previous(node);
+    } else {
+        node = g_tree_node_last(tree.t);
+    }
+
+    if (node) {
+        range = g_tree_node_value(node);
+        assert(range);
+        intersection = page_range_intersection_size(range, start, count);
+        joinable = page_range_joinable_right(range, start, count);
+    }
+
+    if (!node ||
+        (!intersection && !joinable)) {
+        /*
+         * !node case: the tree is empty or the very first node in the tree
+         * already has a higher key (the start of its range).
+         * the other case: there is a gap in the tree between the new range
+         * and the previous one.
+         * anyway, let's just insert the new range into the tree.
+         */
+        node = page_range_tree_insert_new(tree, start, count);
+        assert(node);
+        range = g_tree_node_value(node);
+        assert(range);
+    } else {
+        /*
+         * the previous range in the tree either partially covers the new
+         * range or ends just at its beginning - extend it
+         */
+        if (dupcount) {
+            *dupcount += intersection;
+        }
+
+        count += start - range->start;
+        range->count = MAX(range->count, count);
+    }
+
+    /* check next nodes for possible merging */
+    for (node = g_tree_node_next(node); node; ) {
+        PageRange *rangecur;
+
+        rangecur = g_tree_node_value(node);
+        assert(rangecur);
+
+        intersection = page_range_intersection_size(rangecur,
+                                                    range->start, range->count);
+        joinable = page_range_joinable_left(rangecur,
+                                            range->start, range->count);
+        if (!intersection && !joinable) {
+            /* the current node is disjoint */
+            break;
+        }
+
+        if (dupcount) {
+            *dupcount += intersection;
+        }
+
+        count = rangecur->count + (rangecur->start - range->start);
+        range->count = MAX(range->count, count);
+
+        /* the current node was merged in, remove it */
+        start = rangecur->start;
+        node = g_tree_node_next(node);
+        /* no hinted removal in GTree... */
+        g_tree_remove(tree.t, &start);
+    }
+}
+
+bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
+                             uint64_t maxcount)
+{
+    GTreeNode *node;
+    PageRange *range;
+
+    node = g_tree_node_last(tree.t);
+    if (!node) {
+        return false;
+    }
+
+    range = g_tree_node_value(node);
+    assert(range);
+
+    out->start = range->start;
+
+    /* can't modify range->start as it is the node key */
+    if (range->count > maxcount) {
+        out->start += range->count - maxcount;
+        out->count = maxcount;
+        range->count -= maxcount;
+    } else {
+        out->count = range->count;
+        /* no hinted removal in GTree... */
+        g_tree_remove(tree.t, &out->start);
+    }
+
+    return true;
+}
+
+bool hvb_page_range_tree_intree_any(PageRangeTree tree,
+                                    uint64_t start, uint64_t count)
+{
+    GTreeNode *node;
+
+    if (count == 0) {
+        return false;
+    }
+
+    /* find the first node that can possibly intersect our range */
+    node = g_tree_upper_bound(tree.t, &start);
+    if (node) {
+        /*
+         * a NULL node below means that the very first node in the tree
+         * already has a higher key (the start of its range).
+         */
+        node = g_tree_node_previous(node);
+    } else {
+        /* a NULL node below means that the tree is empty */
+        node = g_tree_node_last(tree.t);
+    }
+    /* node range start <= range start */
+
+    if (!node) {
+        /* node range start > range start */
+        node = g_tree_node_first(tree.t);
+    }
+
+    for ( ; node; node = g_tree_node_next(node)) {
+        PageRange *range = g_tree_node_value(node);
+
+        assert(range);
+        /*
+         * if this node starts beyond or at the end of our range so does
+         * every next one
+         */
+        if (range->start >= start + count) {
+            break;
+        }
+
+        if (page_range_intersection_size(range, start, count) > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void hvb_page_range_tree_init(PageRangeTree *tree)
+{
+    tree->t = g_tree_new_full(page_range_tree_key_compare, NULL,
+                              g_free, g_free);
+}
+
+void hvb_page_range_tree_destroy(PageRangeTree *tree)
+{
+    /* g_tree_destroy() is not NULL-safe */
+    if (!tree->t) {
+        return;
+    }
+
+    g_tree_destroy(tree->t);
+    tree->t = NULL;
+}
diff --git a/hw/hyperv/hv-balloon-page_range_tree.h b/hw/hyperv/hv-balloon-page_range_tree.h
new file mode 100644
index 0000000000..07a9ae0da6
--- /dev/null
+++ b/hw/hyperv/hv-balloon-page_range_tree.h
@@ -0,0 +1,118 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
+#define HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
+
+#include "qemu/osdep.h"
+
+/* PageRange */
+typedef struct PageRange {
+    uint64_t start;
+    uint64_t count;
+} PageRange;
+
+/* return just the part of range before (start) */
+static inline void page_range_part_before(const PageRange *range,
+                                          uint64_t start, PageRange *out)
+{
+    uint64_t endr = range->start + range->count;
+    uint64_t end = MIN(endr, start);
+
+    out->start = range->start;
+    if (end > out->start) {
+        out->count = end - out->start;
+    } else {
+        out->count = 0;
+    }
+}
+
+/* return just the part of range after (start, count) */
+static inline void page_range_part_after(const PageRange *range,
+                                         uint64_t start, uint64_t count,
+                                         PageRange *out)
+{
+    uint64_t end = range->start + range->count;
+    uint64_t ends = start + count;
+
+    out->start = MAX(range->start, ends);
+    if (end > out->start) {
+        out->count = end - out->start;
+    } else {
+        out->count = 0;
+    }
+}
+
+static inline void page_range_intersect(const PageRange *range,
+                                        uint64_t start, uint64_t count,
+                                        PageRange *out)
+{
+    uint64_t end1 = range->start + range->count;
+    uint64_t end2 = start + count;
+    uint64_t end = MIN(end1, end2);
+
+    out->start = MAX(range->start, start);
+    out->count = out->start < end ? end - out->start : 0;
+}
+
+static inline uint64_t page_range_intersection_size(const PageRange *range,
+                                                    uint64_t start, uint64_t count)
+{
+    PageRange trange;
+
+    page_range_intersect(range, start, count, &trange);
+    return trange.count;
+}
+
+static inline bool page_range_joinable_left(const PageRange *range,
+                                            uint64_t start, uint64_t count)
+{
+    return start + count == range->start;
+}
+
+static inline bool page_range_joinable_right(const PageRange *range,
+                                             uint64_t start, uint64_t count)
+{
+    return range->start + range->count == start;
+}
+
+static inline bool page_range_joinable(const PageRange *range,
+                                       uint64_t start, uint64_t count)
+{
+    return page_range_joinable_left(range, start, count) ||
+        page_range_joinable_right(range, start, count);
+}
+
+/* PageRangeTree */
+/* type safety */
+typedef struct PageRangeTree {
+    GTree *t;
+} PageRangeTree;
+
+static inline bool page_range_tree_is_empty(PageRangeTree tree)
+{
+    guint nnodes = g_tree_nnodes(tree.t);
+
+    return nnodes == 0;
+}
+
+void hvb_page_range_tree_init(PageRangeTree *tree);
+void hvb_page_range_tree_destroy(PageRangeTree *tree);
+
+bool hvb_page_range_tree_intree_any(PageRangeTree tree,
+                                    uint64_t start, uint64_t count);
+
+bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
+                             uint64_t maxcount);
+
+void hvb_page_range_tree_insert(PageRangeTree tree,
+                                uint64_t start, uint64_t count,
+                                uint64_t *dupcount);
+
+#endif
diff --git a/hw/hyperv/hv-balloon-stub.c b/hw/hyperv/hv-balloon-stub.c
new file mode 100644
index 0000000000..a47412d4a8
--- /dev/null
+++ b/hw/hyperv/hv-balloon-stub.c
@@ -0,0 +1,19 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-types-machine.h"
+
+HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
+{
+    error_setg(errp, "hv-balloon device not enabled in this build");
+    return NULL;
+}
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
new file mode 100644
index 0000000000..66f297c1d7
--- /dev/null
+++ b/hw/hyperv/hv-balloon.c
@@ -0,0 +1,1769 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hv-balloon-internal.h"
+
+#include "exec/address-spaces.h"
+#include "exec/cpu-common.h"
+#include "exec/ramblock.h"
+#include "hw/boards.h"
+#include "hw/hyperv/dynmem-proto.h"
+#include "hw/hyperv/hv-balloon.h"
+#include "hw/hyperv/vmbus.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "monitor/qdev.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-types-machine.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "qemu/timer.h"
+#include "sysemu/balloon.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/reset.h"
+#include "hv-balloon-our_range_memslots.h"
+#include "hv-balloon-page_range_tree.h"
+#include "trace.h"
+
+#define HV_BALLOON_ADDR_PROP "addr"
+#define HV_BALLOON_MEMDEV_PROP "memdev"
+#define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502"
+
+/*
+ * Some Windows versions (at least Server 2019) will crash with various
+ * error codes when receiving DM protocol requests (at least
+ * DM_MEM_HOT_ADD_REQUEST) immediately after boot.
+ *
+ * It looks like Hyper-V from Server 2016 uses a 50-second after-boot
+ * delay, probably to workaround this issue, so we'll use this value, too.
+ */
+#define HV_BALLOON_POST_INIT_WAIT (50 * 1000)
+
+#define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB)
+#define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE)
+
+#define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB)
+
+#define HV_BALLOON_HR_CHUNK_PAGES 585728
+/*
+ *                                ^ that's the maximum number of pages
+ * that Windows returns in one hot remove response
+ *
+ * If the number requested is too high Windows will no longer honor
+ * these requests
+ */
+
+struct HvBalloonClass {
+    VMBusDeviceClass parent_class;
+} HvBalloonClass;
+
+typedef enum State {
+    /* not a real state */
+    S_NO_CHANGE = 0,
+
+    S_WAIT_RESET,
+    S_POST_RESET_CLOSED,
+
+    /* init flow */
+    S_VERSION,
+    S_CAPS,
+    S_POST_INIT_WAIT,
+
+    S_IDLE,
+
+    /* balloon op flow */
+    S_BALLOON_POSTING,
+    S_BALLOON_RB_WAIT,
+    S_BALLOON_REPLY_WAIT,
+
+    /* unballoon + hot add ops flow */
+    S_UNBALLOON_POSTING,
+    S_UNBALLOON_RB_WAIT,
+    S_UNBALLOON_REPLY_WAIT,
+    S_HOT_ADD_SETUP,
+    S_HOT_ADD_RB_WAIT,
+    S_HOT_ADD_POSTING,
+    S_HOT_ADD_REPLY_WAIT,
+} State;
+
+typedef struct StateDesc {
+    State state;
+    const char *desc;
+} StateDesc;
+
+typedef struct HvBalloon {
+    VMBusDevice parent;
+    State state;
+
+    union dm_version version;
+    union dm_caps caps;
+
+    QEMUTimer post_init_timer;
+
+    unsigned int trans_id;
+
+    struct {
+        bool enabled;
+        bool received;
+        uint64_t committed;
+        uint64_t available;
+    } status_report;
+
+    /* Guest target size */
+    uint64_t target;
+    bool target_changed;
+
+    /* Current (un)balloon / hot-add operation parameters */
+    union {
+        uint64_t balloon_diff;
+
+        struct {
+            uint64_t unballoon_diff;
+            uint64_t hot_add_diff;
+        };
+
+        struct {
+            PageRange hot_add_range;
+            uint64_t ha_current_count;
+        };
+    };
+
+    OurRangeMemslots *our_range;
+
+    /* Count of memslots covering our memory */
+    unsigned int memslot_count;
+
+    /* Nominal size of each memslot (the last one might be smaller) */
+    uint64_t memslot_size;
+
+    /* Non-ours removed memory */
+    PageRangeTree removed_guest, removed_both;
+
+    /* Grand totals of removed memory (both ours and non-ours) */
+    uint64_t removed_guest_ctr, removed_both_ctr;
+
+    /* MEMORY_DEVICE props */
+    uint64_t addr;
+    HostMemoryBackend *hostmem;
+    MemoryRegion *mr;
+} HvBalloon;
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
+                                   { TYPE_MEMORY_DEVICE }, { })
+
+#define HV_BALLOON_SET_STATE(hvb, news)             \
+    do {                                            \
+        assert(news != S_NO_CHANGE);                \
+        hv_balloon_state_set(hvb, news, # news);    \
+    } while (0)
+
+#define HV_BALLOON_STATE_DESC_SET(stdesc, news)         \
+    _hv_balloon_state_desc_set(stdesc, news, # news)
+
+#define HV_BALLOON_STATE_DESC_INIT \
+    {                              \
+        .state = S_NO_CHANGE,      \
+    }
+
+typedef struct HvBalloonReq {
+    VMBusChanReq vmreq;
+} HvBalloonReq;
+
+/* total our memory includes parts currently removed from the guest */
+static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon)
+{
+    if (!balloon->our_range) {
+        return 0;
+    }
+
+    return balloon->our_range->range.added;
+}
+
+/* TODO: unify the code below with virtio-balloon and cache the value */
+static int build_dimm_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+        DeviceState *dev = DEVICE(obj);
+        if (dev->realized) { /* only realized DIMMs matter */
+            *list = g_slist_prepend(*list, dev);
+        }
+    }
+
+    object_child_foreach(obj, build_dimm_list, opaque);
+    return 0;
+}
+
+static ram_addr_t get_current_ram_size(void)
+{
+    GSList *list = NULL, *item;
+    ram_addr_t size = current_machine->ram_size;
+
+    build_dimm_list(qdev_get_machine(), &list);
+    for (item = list; item; item = g_slist_next(item)) {
+        Object *obj = OBJECT(item->data);
+        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM))
+            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
+                                            &error_abort);
+    }
+    g_slist_free(list);
+
+    return size;
+}
+
+/* total RAM includes memory currently removed from the guest */
+static uint64_t hv_balloon_total_ram(HvBalloon *balloon)
+{
+    ram_addr_t ram_size = get_current_ram_size();
+    uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT;
+    uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon);
+
+    assert(ram_size_pages > 0);
+
+    return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages);
+}
+
+/*
+ * calculating the total RAM size is a slow operation,
+ * avoid it as much as possible
+ */
+static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon,
+                                            uint64_t ram_size_pages)
+{
+    uint64_t total_removed;
+
+    total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr,
+                                     balloon->removed_both_ctr);
+
+    /* possible if guest returns pages outside actual RAM */
+    if (total_removed > ram_size_pages) {
+        total_removed = ram_size_pages;
+    }
+
+    return total_removed;
+}
+
+/* Returns whether the state has actually changed */
+static bool hv_balloon_state_set(HvBalloon *balloon,
+                                 State newst, const char *newststr)
+{
+    if (newst == S_NO_CHANGE || balloon->state == newst) {
+        return false;
+    }
+
+    balloon->state = newst;
+    trace_hv_balloon_state_change(newststr);
+    return true;
+}
+
+static void _hv_balloon_state_desc_set(StateDesc *stdesc,
+                                       State newst, const char *newststr)
+{
+    /* state setting is only permitted on a freshly init desc */
+    assert(stdesc->state == S_NO_CHANGE);
+
+    assert(newst != S_NO_CHANGE);
+
+    stdesc->state = newst;
+    stdesc->desc = newststr;
+}
+
+static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon)
+{
+    return vmbus_device_channel(&balloon->parent, 0);
+}
+
+static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon)
+{
+    VMBusChannel *chan;
+
+    chan = hv_balloon_get_channel_maybe(balloon);
+    assert(chan != NULL);
+    return chan;
+}
+
+static ssize_t hv_balloon_send_packet(VMBusChannel *chan,
+                                      struct dm_message *msg)
+{
+    int ret;
+
+    ret = vmbus_channel_reserve(chan, 0, msg->hdr.size);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                              NULL, 0, msg, msg->hdr.size, false,
+                              msg->hdr.trans_id);
+}
+
+static bool hv_balloon_unballoon_get_source(HvBalloon *balloon,
+                                            PageRangeTree *dtree,
+                                            uint64_t **dctr,
+                                            bool *is_our_range)
+{
+    OurRange *our_range = OUR_RANGE(balloon->our_range);
+
+    /* Try the boot memory first */
+    if (g_tree_nnodes(balloon->removed_guest.t) > 0) {
+        *dtree = balloon->removed_guest;
+        *dctr = &balloon->removed_guest_ctr;
+        *is_our_range = false;
+    } else if (g_tree_nnodes(balloon->removed_both.t) > 0) {
+        *dtree = balloon->removed_both;
+        *dctr = &balloon->removed_both_ctr;
+        *is_our_range = false;
+    } else if (!our_range) {
+        return false;
+    } else if (!our_range_is_removed_tree_empty(our_range, false)) {
+        *dtree = our_range_get_removed_tree(our_range, false);
+        *dctr = &balloon->removed_guest_ctr;
+        *is_our_range = true;
+    } else if (!our_range_is_removed_tree_empty(our_range, true)) {
+        *dtree = our_range_get_removed_tree(our_range, true);
+        *dctr = &balloon->removed_both_ctr;
+        *is_our_range = true;
+    } else {
+        return false;
+    }
+
+    return true;
+}
+
+static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_unballoon_request *ur;
+    size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
+
+    assert(balloon->state == S_UNBALLOON_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, ur_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING);
+}
+
+static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    PageRangeTree dtree;
+    uint64_t *dctr;
+    bool our_range;
+    struct dm_unballoon_request *ur;
+    size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
+    PageRange range;
+    bool bret;
+    ssize_t ret;
+
+    assert(balloon->state == S_UNBALLOON_POSTING);
+    assert(balloon->unballoon_diff > 0);
+
+    if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) {
+        error_report("trying to unballoon but nothing seems to be ballooned");
+        /*
+         * there is little we can do as we might have already
+         * sent the guest a partial request we can't cancel
+         */
+        return;
+    }
+
+    assert(balloon->our_range || !our_range);
+    assert(dtree.t);
+    assert(dctr);
+
+    ur = alloca(ur_size);
+    memset(ur, 0, ur_size);
+    ur->hdr.type = DM_UNBALLOON_REQUEST;
+    ur->hdr.size = ur_size;
+    ur->hdr.trans_id = balloon->trans_id;
+
+    bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff,
+                                                      HV_BALLOON_HA_CHUNK_PAGES));
+    assert(bret);
+    /* TODO: madvise? */
+
+    *dctr -= range.count;
+    balloon->unballoon_diff -= range.count;
+
+    ur->range_count = 1;
+    ur->range_array[0].finfo.start_page = range.start;
+    ur->range_array[0].finfo.page_cnt = range.count;
+    ur->more_pages = balloon->unballoon_diff > 0;
+
+    trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id,
+                                        range.count, range.start,
+                                        balloon->unballoon_diff);
+
+    if (ur->more_pages) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT);
+    }
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, ur, ur_size, false,
+                             ur->hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting unballoon msg, expect problems",
+                     ret);
+    }
+}
+
+static bool hv_balloon_our_range_ensure(HvBalloon *balloon)
+{
+    uint64_t align;
+    MemoryRegion *hostmem_mr;
+    g_autoptr(OurRangeMemslots) our_range_memslots = NULL;
+    OurRange *our_range;
+
+    if (balloon->our_range) {
+        return true;
+    }
+
+    if (!balloon->hostmem) {
+        return false;
+    }
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB;
+    assert(QEMU_IS_ALIGNED(balloon->addr, align));
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+
+    our_range_memslots = hvb_our_range_memslots_new(balloon->addr,
+                                                    balloon->mr, hostmem_mr,
+                                                    OBJECT(balloon),
+                                                    balloon->memslot_count,
+                                                    balloon->memslot_size);
+    our_range = OUR_RANGE(our_range_memslots);
+
+    if (hvb_page_range_tree_intree_any(balloon->removed_guest,
+                                       our_range->range.start,
+                                       our_range->range.count) ||
+        hvb_page_range_tree_intree_any(balloon->removed_both,
+                                       our_range->range.start,
+                                       our_range->range.count)) {
+        error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again");
+        return false;
+    }
+
+    trace_hv_balloon_our_range_add(our_range->range.count,
+                                   our_range->range.start);
+
+    balloon->our_range = g_steal_pointer(&our_range_memslots);
+    return true;
+}
+
+static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc)
+{
+    /* need to make copy since it is in union with hot_add_range */
+    uint64_t hot_add_diff = balloon->hot_add_diff;
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    uint64_t align, our_range_remaining;
+    OurRange *our_range;
+
+    assert(balloon->state == S_HOT_ADD_SETUP);
+    assert(hot_add_diff > 0);
+
+    if (!hv_balloon_our_range_ensure(balloon)) {
+        goto ret_idle;
+    }
+
+    our_range = OUR_RANGE(balloon->our_range);
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
+        (MiB / HV_BALLOON_PAGE_SIZE);
+
+    /* Absolute GPA in pages */
+    hot_add_range->start = our_range_get_remaining_start(our_range);
+    assert(QEMU_IS_ALIGNED(hot_add_range->start, align));
+
+    our_range_remaining = our_range_get_remaining_size(our_range);
+    hot_add_range->count = MIN(our_range_remaining, hot_add_diff);
+    hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align);
+    if (hot_add_range->count == 0) {
+        goto ret_idle;
+    }
+
+    hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range,
+                                                    hot_add_range->count);
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
+    return;
+
+ret_idle:
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+}
+
+static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_hot_add *ha;
+    size_t ha_size = sizeof(*ha) + sizeof(ha->range);
+
+    assert(balloon->state == S_HOT_ADD_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, ha_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING);
+}
+
+static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    uint64_t *current_count = &balloon->ha_current_count;
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_hot_add *ha;
+    size_t ha_size = sizeof(*ha) + sizeof(ha->range);
+    union dm_mem_page_range *ha_region;
+    uint64_t align, chunk_max_size;
+    ssize_t ret;
+
+    assert(balloon->state == S_HOT_ADD_POSTING);
+    assert(hot_add_range->count > 0);
+
+    align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
+        (MiB / HV_BALLOON_PAGE_SIZE);
+    if (align >= HV_BALLOON_HA_CHUNK_PAGES) {
+        /*
+         * If the required alignment is higher than the chunk size we let it
+         * override that size.
+         */
+        chunk_max_size = align;
+    } else {
+        chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align);
+    }
+
+    /*
+     * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(),
+     * then it is either reduced by subtracting aligned current_count or
+     * further hot-adds are prevented by marking the whole remaining our range
+     * as unusable in hv_balloon_handle_hot_add_response().
+     */
+    *current_count = MIN(hot_add_range->count, chunk_max_size);
+
+    ha = alloca(ha_size);
+    ha_region = &(&ha->range)[1];
+    memset(ha, 0, ha_size);
+    ha->hdr.type = DM_MEM_HOT_ADD_REQUEST;
+    ha->hdr.size = ha_size;
+    ha->hdr.trans_id = balloon->trans_id;
+
+    ha->range.finfo.start_page = hot_add_range->start;
+    ha->range.finfo.page_cnt = *current_count;
+    ha_region->finfo.start_page = hot_add_range->start;
+    ha_region->finfo.page_cnt = ha->range.finfo.page_cnt;
+
+    trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id,
+                                      *current_count, hot_add_range->start);
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, ha, ha_size, false,
+                             ha->hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting hot add msg, expect problems",
+                     ret);
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT);
+}
+
+static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    size_t bl_size = sizeof(struct dm_balloon);
+
+    assert(balloon->state == S_BALLOON_RB_WAIT);
+
+    if (vmbus_channel_reserve(chan, 0, bl_size) < 0) {
+        return;
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING);
+}
+
+static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan = hv_balloon_get_channel(balloon);
+    struct dm_balloon bl;
+    size_t bl_size = sizeof(bl);
+    ssize_t ret;
+
+    assert(balloon->state == S_BALLOON_POSTING);
+    assert(balloon->balloon_diff > 0);
+
+    memset(&bl, 0, sizeof(bl));
+    bl.hdr.type = DM_BALLOON_REQUEST;
+    bl.hdr.size = bl_size;
+    bl.hdr.trans_id = balloon->trans_id;
+    bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES);
+
+    trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages,
+                                      balloon->balloon_diff);
+
+    ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
+                             NULL, 0, &bl, bl_size, false,
+                             bl.hdr.trans_id);
+    if (ret <= 0) {
+        error_report("error %zd when posting balloon msg, expect problems",
+                     ret);
+    }
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT);
+}
+
+static void hv_balloon_idle_state_process_target(HvBalloon *balloon,
+                                                 StateDesc *stdesc)
+{
+    bool can_balloon = balloon->caps.cap_bits.balloon;
+    uint64_t ram_size_pages, total_removed;
+
+    ram_size_pages = hv_balloon_total_ram(balloon);
+    total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages);
+
+    /*
+     * we need to cache the values computed from the balloon target value when
+     * starting the adjustment procedure in case someone changes the target when
+     * the procedure is in progress
+     */
+    if (balloon->target > ram_size_pages - total_removed) {
+        bool can_hot_add = balloon->caps.cap_bits.hot_add;
+        uint64_t target_diff = balloon->target -
+            (ram_size_pages - total_removed);
+
+        balloon->unballoon_diff = MIN(target_diff, total_removed);
+
+        if (can_hot_add) {
+            balloon->hot_add_diff = target_diff - balloon->unballoon_diff;
+        } else {
+            balloon->hot_add_diff = 0;
+        }
+
+        if (balloon->unballoon_diff > 0) {
+            assert(can_balloon);
+            HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
+        } else if (balloon->hot_add_diff > 0) {
+            HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
+        }
+    } else if (can_balloon &&
+               balloon->target < ram_size_pages - total_removed) {
+        balloon->balloon_diff = ram_size_pages - total_removed -
+            balloon->target;
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
+    }
+}
+
+static void hv_balloon_idle_state(HvBalloon *balloon,
+                                  StateDesc *stdesc)
+{
+    assert(balloon->state == S_IDLE);
+
+    if (balloon->target_changed) {
+        balloon->target_changed = false;
+        hv_balloon_idle_state_process_target(balloon, stdesc);
+        return;
+    }
+}
+
+static const struct {
+    void (*handler)(HvBalloon *balloon, StateDesc *stdesc);
+} state_handlers[] = {
+    [S_IDLE].handler = hv_balloon_idle_state,
+    [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting,
+    [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait,
+    [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting,
+    [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait,
+    [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup,
+    [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait,
+    [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting,
+};
+
+static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc)
+{
+    if (balloon->state >= ARRAY_SIZE(state_handlers) ||
+        !state_handlers[balloon->state].handler) {
+        return;
+    }
+
+    state_handlers[balloon->state].handler(balloon, stdesc);
+}
+
+static void hv_balloon_remove_response_insert_range(PageRangeTree tree,
+                                                    const PageRange *range,
+                                                    uint64_t *ctr1,
+                                                    uint64_t *ctr2,
+                                                    uint64_t *ctr3)
+{
+    uint64_t dupcount, effcount;
+
+    if (range->count == 0) {
+        return;
+    }
+
+    dupcount = 0;
+    hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount);
+
+    assert(dupcount <= range->count);
+    effcount = range->count - dupcount;
+
+    *ctr1 += effcount;
+    *ctr2 += effcount;
+    if (ctr3) {
+        *ctr3 += effcount;
+    }
+}
+
+static void hv_balloon_remove_response_handle_range(HvBalloon *balloon,
+                                                    PageRange *range,
+                                                    bool both,
+                                                    uint64_t *removedctr)
+{
+    OurRange *our_range = OUR_RANGE(balloon->our_range);
+    PageRangeTree globaltree =
+        both ? balloon->removed_both : balloon->removed_guest;
+    uint64_t *globalctr =
+        both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr;
+    PageRange rangeeff;
+
+    if (range->count == 0) {
+        return;
+    }
+
+    trace_hv_balloon_remove_response(range->count, range->start, both);
+
+    if (our_range) {
+        /* Includes the not-yet-hot-added and unusable parts. */
+        rangeeff = our_range->range;
+    } else {
+        rangeeff.start = rangeeff.count = 0;
+    }
+
+    if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) {
+        PageRangeTree ourtree = our_range_get_removed_tree(our_range, both);
+        PageRange rangehole, rangecommon;
+        uint64_t ourremoved = 0;
+
+        /* process the hole before our range, if it exists */
+        page_range_part_before(range, rangeeff.start, &rangehole);
+        hv_balloon_remove_response_insert_range(globaltree, &rangehole,
+                                                globalctr, removedctr, NULL);
+        if (rangehole.count > 0) {
+            trace_hv_balloon_remove_response_hole(rangehole.count,
+                                                  rangehole.start,
+                                                  range->count, range->start,
+                                                  rangeeff.start, both);
+        }
+
+        /* process our part */
+        page_range_intersect(range, rangeeff.start, rangeeff.count,
+                             &rangecommon);
+        hv_balloon_remove_response_insert_range(ourtree, &rangecommon,
+                                                globalctr, removedctr,
+                                                &ourremoved);
+        if (rangecommon.count > 0) {
+            trace_hv_balloon_remove_response_common(rangecommon.count,
+                                                    rangecommon.start,
+                                                    range->count, range->start,
+                                                    rangeeff.count,
+                                                    rangeeff.start, ourremoved,
+                                                    both);
+        }
+
+        /* calculate what's left after our range */
+        rangecommon = *range;
+        page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count,
+                              range);
+    }
+
+    /* process the remainder of the range that lies after our range */
+    if (range->count > 0) {
+        hv_balloon_remove_response_insert_range(globaltree, range,
+                                                globalctr, removedctr, NULL);
+        trace_hv_balloon_remove_response_remainder(range->count, range->start,
+                                                   both);
+        range->count = 0;
+    }
+}
+
+static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon,
+                                                    PageRange *range,
+                                                    uint64_t start,
+                                                    uint64_t count,
+                                                    bool both,
+                                                    uint64_t *removedctr)
+{
+    assert(count > 0);
+
+    /*
+     * if there is an existing range that the new range can't be joined to
+     * dump it into tree(s)
+     */
+    if (range->count > 0 && !page_range_joinable(range, start, count)) {
+        hv_balloon_remove_response_handle_range(balloon, range, both,
+                                                removedctr);
+    }
+
+    if (range->count == 0) {
+        range->start = start;
+        range->count = count;
+    } else if (page_range_joinable_left(range, start, count)) {
+        range->start = start;
+        range->count += count;
+    } else { /* page_range_joinable_right() */
+        range->count += count;
+    }
+}
+
+static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key,
+                                                        gpointer value,
+                                                        gpointer data)
+{
+    PageRange *range = value;
+    uint64_t pageoff;
+
+    for (pageoff = 0; pageoff < range->count; ) {
+        uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE;
+        void *addr;
+        RAMBlock *rb;
+        ram_addr_t rb_offset;
+        size_t rb_page_size;
+        size_t discard_size;
+
+        assert(addr_64 <= UINTPTR_MAX);
+        addr = (void *)((uintptr_t)addr_64);
+        rb = qemu_ram_block_from_host(addr, false, &rb_offset);
+        rb_page_size = qemu_ram_pagesize(rb);
+
+        if (rb_page_size != HV_BALLOON_PAGE_SIZE) {
+            /* TODO: these should end in "removed_guest" */
+            warn_report("guest reported removed page backed by unsupported page size %zu",
+                        rb_page_size);
+            pageoff++;
+            continue;
+        }
+
+        discard_size = MIN(range->count - pageoff,
+                           (rb->max_length - rb_offset) /
+                           HV_BALLOON_PAGE_SIZE);
+        discard_size = MAX(discard_size, 1);
+
+        if (ram_block_discard_range(rb, rb_offset, discard_size *
+                                    HV_BALLOON_PAGE_SIZE) != 0) {
+            warn_report("guest reported removed page failed discard");
+        }
+
+        pageoff += discard_size;
+    }
+
+    return false;
+}
+
+static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree)
+{
+    g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL);
+}
+
+static int hv_balloon_handle_remove_section(PageRangeTree tree,
+                                            const MemoryRegionSection *section,
+                                            uint64_t count)
+{
+    void *addr = memory_region_get_ram_ptr(section->mr) +
+        section->offset_within_region;
+    uint64_t addr_page;
+
+    assert(count > 0);
+
+    if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) {
+        warn_report("guest reported removed pages at an unaligned host addr %p",
+                    addr);
+        return -EINVAL;
+    }
+
+    addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE;
+    hvb_page_range_tree_insert(tree, addr_page, count, NULL);
+
+    return 0;
+}
+
+static void hv_balloon_handle_remove_ranges(HvBalloon *balloon,
+                                            union dm_mem_page_range ranges[],
+                                            uint32_t count)
+{
+    uint64_t removedcnt;
+    PageRangeTree removed_host_addr;
+    PageRange range_guest, range_both;
+
+    hvb_page_range_tree_init(&removed_host_addr);
+    range_guest.count = range_both.count = removedcnt = 0;
+    for (unsigned int ctr = 0; ctr < count; ctr++) {
+        union dm_mem_page_range *mr = &ranges[ctr];
+        hwaddr pa;
+        MemoryRegionSection section;
+
+        for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) {
+            int ret;
+            uint64_t pageno = mr->finfo.start_page + offset;
+            uint64_t pagecnt = 1;
+
+            pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT;
+            section = memory_region_find(get_system_memory(), pa,
+                                         (mr->finfo.page_cnt - offset) *
+                                         HV_BALLOON_PAGE_SIZE);
+            if (!section.mr) {
+                warn_report("guest reported removed page %"PRIu64" not found in RAM",
+                            pageno);
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE;
+            if (pagecnt <= 0) {
+                warn_report("guest reported removed page %"PRIu64" in a section smaller than page size",
+                            pageno);
+                pagecnt = 1; /* skip the whole page */
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            if (!memory_region_is_ram(section.mr) ||
+                memory_region_is_rom(section.mr) ||
+                memory_region_is_romd(section.mr)) {
+                warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM",
+                            pageno);
+                ret = -EINVAL;
+                goto finish_page;
+            }
+
+            ret = hv_balloon_handle_remove_section(removed_host_addr, &section,
+                                                   pagecnt);
+
+        finish_page:
+            if (ret == 0) {
+                hv_balloon_remove_response_handle_pages(balloon,
+                                                        &range_both,
+                                                        pageno, pagecnt,
+                                                        true, &removedcnt);
+            } else {
+                hv_balloon_remove_response_handle_pages(balloon,
+                                                        &range_guest,
+                                                        pageno, pagecnt,
+                                                        false, &removedcnt);
+            }
+
+            if (section.mr) {
+                memory_region_unref(section.mr);
+            }
+
+            offset += pagecnt;
+        }
+    }
+
+    hv_balloon_remove_response_handle_range(balloon, &range_both, true,
+                                            &removedcnt);
+    hv_balloon_remove_response_handle_range(balloon, &range_guest, false,
+                                            &removedcnt);
+
+    hv_balloon_handle_remove_host_addr_tree(removed_host_addr);
+    hvb_page_range_tree_destroy(&removed_host_addr);
+
+    if (removedcnt > balloon->balloon_diff) {
+        warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")",
+                    removedcnt, balloon->balloon_diff);
+        balloon->balloon_diff = 0;
+    } else {
+        balloon->balloon_diff -= removedcnt;
+    }
+}
+
+static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize,
+                                       const char *msgname)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    uint32_t msglen = vmreq->msglen;
+
+    if (msglen >= minsize) {
+        return true;
+    }
+
+    warn_report("%s message too short (%u vs %zu), ignoring", msgname,
+                (unsigned int)msglen, minsize);
+    return false;
+}
+
+static void hv_balloon_handle_version_request(HvBalloon *balloon,
+                                              HvBalloonReq *req,
+                                              StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_version_request *msgVr = vmreq->msg;
+    struct dm_version_response respVr;
+
+    if (balloon->state != S_VERSION) {
+        warn_report("unexpected DM_VERSION_REQUEST in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr),
+                                    "DM_VERSION_REQUEST")) {
+        return;
+    }
+
+    trace_hv_balloon_incoming_version(msgVr->version.major_version,
+                                      msgVr->version.minor_version);
+
+    memset(&respVr, 0, sizeof(respVr));
+    respVr.hdr.type = DM_VERSION_RESPONSE;
+    respVr.hdr.size = sizeof(respVr);
+    respVr.hdr.trans_id = msgVr->hdr.trans_id;
+    respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 &&
+        msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3;
+
+    hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr);
+
+    if (respVr.is_accepted) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS);
+    }
+}
+
+static void hv_balloon_handle_caps_report(HvBalloon *balloon,
+                                          HvBalloonReq *req,
+                                          StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_capabilities *msgCap = vmreq->msg;
+    struct dm_capabilities_resp_msg respCap;
+
+    if (balloon->state != S_CAPS) {
+        warn_report("unexpected DM_CAPABILITIES_REPORT in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap),
+                                    "DM_CAPABILITIES_REPORT")) {
+        return;
+    }
+
+    trace_hv_balloon_incoming_caps(msgCap->caps.caps);
+    balloon->caps = msgCap->caps;
+
+    memset(&respCap, 0, sizeof(respCap));
+    respCap.hdr.type = DM_CAPABILITIES_RESPONSE;
+    respCap.hdr.size = sizeof(respCap);
+    respCap.hdr.trans_id = msgCap->hdr.trans_id;
+    respCap.is_accepted = 1;
+    respCap.hot_remove = 1;
+    respCap.suppress_pressure_reports = !balloon->status_report.enabled;
+    hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap);
+
+    timer_mod(&balloon->post_init_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+              HV_BALLOON_POST_INIT_WAIT);
+
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT);
+}
+
+static void hv_balloon_handle_status_report(HvBalloon *balloon,
+                                            HvBalloonReq *req)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_status *msgStatus = vmreq->msg;
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus),
+                                    "DM_STATUS_REPORT")) {
+        return;
+    }
+
+    if (!balloon->status_report.enabled) {
+        return;
+    }
+
+    balloon->status_report.committed = msgStatus->num_committed;
+    balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE;
+    balloon->status_report.available = msgStatus->num_avail;
+    balloon->status_report.available *= HV_BALLOON_PAGE_SIZE;
+    balloon->status_report.received = true;
+
+    qapi_event_send_hv_balloon_status_report(balloon->status_report.committed,
+                                             balloon->status_report.available);
+}
+
+HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
+{
+    HvBalloon *balloon;
+    HvBalloonInfo *info;
+
+    balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL));
+    if (!balloon) {
+        error_setg(errp, "no %s device present", TYPE_HV_BALLOON);
+        return NULL;
+    }
+
+    if (!balloon->status_report.enabled) {
+        error_setg(errp, "guest memory status reporting not enabled");
+        return NULL;
+    }
+
+    if (!balloon->status_report.received) {
+        error_setg(errp, "no guest memory status report received yet");
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    info->committed = balloon->status_report.committed;
+    info->available = balloon->status_report.available;
+    return info;
+}
+
+static void hv_balloon_handle_unballoon_response(HvBalloon *balloon,
+                                                 HvBalloonReq *req,
+                                                 StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_unballoon_response *msgUrR = vmreq->msg;
+
+    if (balloon->state != S_UNBALLOON_REPLY_WAIT) {
+        warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR),
+                                    "DM_UNBALLOON_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id);
+
+    balloon->trans_id++;
+
+    if (balloon->hot_add_diff > 0) {
+        bool can_hot_add = balloon->caps.cap_bits.hot_add;
+
+        assert(can_hot_add);
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+    }
+}
+
+static void hv_balloon_handle_hot_add_response(HvBalloon *balloon,
+                                               HvBalloonReq *req,
+                                               StateDesc *stdesc)
+{
+    PageRange *hot_add_range = &balloon->hot_add_range;
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_hot_add_response *msgHaR = vmreq->msg;
+    OurRange *our_range;
+
+    if (balloon->state != S_HOT_ADD_REPLY_WAIT) {
+        warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    assert(balloon->our_range);
+    our_range = OUR_RANGE(balloon->our_range);
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR),
+                                    "DM_HOT_ADD_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result,
+                                      msgHaR->page_count);
+
+    balloon->trans_id++;
+
+    if (msgHaR->result) {
+        if (msgHaR->page_count > balloon->ha_current_count) {
+            warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")",
+                        msgHaR->page_count, balloon->ha_current_count);
+            msgHaR->page_count = balloon->ha_current_count;
+        }
+
+        hvb_our_range_mark_added(our_range, msgHaR->page_count);
+        hot_add_range->start += msgHaR->page_count;
+        hot_add_range->count -= msgHaR->page_count;
+    }
+
+    if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) {
+        /*
+         * the current planned range was only partially hot-added, take note
+         * how much of it remains and don't attempt any further hot adds
+         */
+        our_range_mark_remaining_unusable(our_range);
+
+        goto ret_idle;
+    }
+
+    /* any pages remaining to hot-add in our range? */
+    if (hot_add_range->count > 0) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
+        return;
+    }
+
+ret_idle:
+    HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+}
+
+static void hv_balloon_handle_balloon_response(HvBalloon *balloon,
+                                               HvBalloonReq *req,
+                                               StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_balloon_response *msgBR = vmreq->msg;
+
+    if (balloon->state != S_BALLOON_REPLY_WAIT) {
+        warn_report("unexpected DM_BALLOON_RESPONSE in %d state",
+                    balloon->state);
+        return;
+    }
+
+    if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR),
+                                    "DM_BALLOON_RESPONSE"))
+        return;
+
+    trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count,
+                                      msgBR->more_pages);
+
+    if (vmreq->msglen < sizeof(*msgBR) +
+        (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) {
+        warn_report("DM_BALLOON_RESPONSE too short for the range count");
+        return;
+    }
+
+    if (msgBR->range_count == 0) {
+        /* The guest is already at its minimum size */
+        balloon->balloon_diff = 0;
+        goto ret_end_trans;
+    } else {
+        hv_balloon_handle_remove_ranges(balloon,
+                                        msgBR->range_array,
+                                        msgBR->range_count);
+    }
+
+    /* More responses expected? */
+    if (msgBR->more_pages) {
+        return;
+    }
+
+ret_end_trans:
+    balloon->trans_id++;
+
+    if (balloon->balloon_diff > 0) {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
+    } else {
+        HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
+    }
+}
+
+static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req,
+                                     StateDesc *stdesc)
+{
+    VMBusChanReq *vmreq = &req->vmreq;
+    struct dm_message *msg = vmreq->msg;
+
+    if (vmreq->msglen < sizeof(msg->hdr)) {
+        return;
+    }
+
+    switch (msg->hdr.type) {
+    case DM_VERSION_REQUEST:
+        hv_balloon_handle_version_request(balloon, req, stdesc);
+        break;
+
+    case DM_CAPABILITIES_REPORT:
+        hv_balloon_handle_caps_report(balloon, req, stdesc);
+        break;
+
+    case DM_STATUS_REPORT:
+        hv_balloon_handle_status_report(balloon, req);
+        break;
+
+    case DM_MEM_HOT_ADD_RESPONSE:
+        hv_balloon_handle_hot_add_response(balloon, req, stdesc);
+        break;
+
+    case DM_UNBALLOON_RESPONSE:
+        hv_balloon_handle_unballoon_response(balloon, req, stdesc);
+        break;
+
+    case DM_BALLOON_RESPONSE:
+        hv_balloon_handle_balloon_response(balloon, req, stdesc);
+        break;
+
+    default:
+        warn_report("unknown DM message %u", msg->hdr.type);
+        break;
+    }
+}
+
+static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc)
+{
+    VMBusChannel *chan;
+    HvBalloonReq *req;
+
+    if (balloon->state == S_WAIT_RESET ||
+        balloon->state == S_POST_RESET_CLOSED) {
+        return false;
+    }
+
+    chan = hv_balloon_get_channel(balloon);
+    if (vmbus_channel_recv_start(chan)) {
+        return false;
+    }
+
+    while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) {
+        hv_balloon_handle_packet(balloon, req, stdesc);
+        vmbus_free_req(req);
+        vmbus_channel_recv_pop(chan);
+
+        if (stdesc->state != S_NO_CHANGE) {
+            break;
+        }
+    }
+
+    return vmbus_channel_recv_done(chan) > 0;
+}
+
+/* old state handler -> new state transition (potential) */
+static bool hv_balloon_event_loop_state(HvBalloon *balloon)
+{
+    StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
+
+    hv_balloon_handle_state(balloon, &state_new);
+    return hv_balloon_state_set(balloon, state_new.state, state_new.desc);
+}
+
+/* VMBus message -> new state transition (potential) */
+static bool hv_balloon_event_loop_recv(HvBalloon *balloon)
+{
+    StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
+    bool any_recv, state_changed;
+
+    any_recv = hv_balloon_recv_channel(balloon, &state_new);
+    state_changed = hv_balloon_state_set(balloon,
+                                         state_new.state, state_new.desc);
+
+    return state_changed || any_recv;
+}
+
+static void hv_balloon_event_loop(HvBalloon *balloon)
+{
+    bool state_repeat, recv_repeat;
+
+    do {
+        state_repeat = hv_balloon_event_loop_state(balloon);
+        recv_repeat = hv_balloon_event_loop_recv(balloon);
+    } while (state_repeat || recv_repeat);
+}
+
+static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_stat(void *opaque, BalloonInfo *info)
+{
+    HvBalloon *balloon = opaque;
+    info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr)
+        << HV_BALLOON_PFN_SHIFT;
+}
+
+static void hv_balloon_to_target(void *opaque, ram_addr_t target)
+{
+    HvBalloon *balloon = opaque;
+    uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT;
+
+    if (!target_pages) {
+        return;
+    }
+
+    /*
+     * always set target_changed, even with unchanged target, as the user
+     * might be asking us to try again reaching it
+     */
+    balloon->target = target_pages;
+    balloon->target_changed = true;
+
+    hv_balloon_event_loop(balloon);
+}
+
+static int hv_balloon_vmdev_open_channel(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    if (balloon->state != S_POST_RESET_CLOSED) {
+        warn_report("guest trying to open a DM channel in invalid %d state",
+                    balloon->state);
+        return -EINVAL;
+    }
+
+    HV_BALLOON_SET_STATE(balloon, S_VERSION);
+    hv_balloon_event_loop(balloon);
+
+    return 0;
+}
+
+static void hv_balloon_vmdev_close_channel(VMBusChannel *chan)
+{
+    HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
+
+    timer_del(&balloon->post_init_timer);
+
+    /* Don't report stale data */
+    balloon->status_report.received = false;
+
+    HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET);
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_post_init_timer(void *opaque)
+{
+    HvBalloon *balloon = opaque;
+
+    if (balloon->state != S_POST_INIT_WAIT) {
+        return;
+    }
+
+    HV_BALLOON_SET_STATE(balloon, S_IDLE);
+    hv_balloon_event_loop(balloon);
+}
+
+static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon)
+{
+    g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free);
+}
+
+static void hv_balloon_system_reset(void *opaque)
+{
+    HvBalloon *balloon = HV_BALLOON(opaque);
+
+    hv_balloon_system_reset_unrealize_common(balloon);
+}
+
+static void hv_balloon_ensure_mr(HvBalloon *balloon)
+{
+    MemoryRegion *hostmem_mr;
+
+    assert(balloon->hostmem);
+
+    if (balloon->mr) {
+        return;
+    }
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+
+    balloon->mr = g_new0(MemoryRegion, 1);
+    memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
+                       memory_region_size(hostmem_mr));
+
+    /*
+     * The VM can indicate an alignment up to 32 GiB. Memory device core can
+     * usually only handle/guarantee 1 GiB alignment. The user will have to
+     * specify a larger maxmem eventually.
+     *
+     * The memory device core will warn the user in case maxmem might have to be
+     * increased and will fail plugging the device if there is not sufficient
+     * space after alignment.
+     *
+     * TODO: we could do the alignment ourselves in a slightly bigger region.
+     * But this feels better, although the warning might be annoying. Maybe
+     * we can optimize that in the future (e.g., with such a device on the
+     * cmdline place/size the device memory region differently.
+     */
+    balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
+}
+
+static void hv_balloon_free_mr(HvBalloon *balloon)
+{
+    if (!balloon->mr) {
+        return;
+    }
+
+    object_unparent(OBJECT(balloon->mr));
+    g_clear_pointer(&balloon->mr, g_free);
+}
+
+static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
+{
+    ERRP_GUARD();
+    HvBalloon *balloon = HV_BALLOON(vdev);
+    int ret;
+
+    balloon->state = S_WAIT_RESET;
+
+    ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat,
+                                   balloon);
+    if (ret < 0) {
+        /* This also protects against having multiple hv-balloon instances */
+        error_setg(errp, "Only one balloon device is supported");
+        return;
+    }
+
+    if (balloon->hostmem) {
+        if (host_memory_backend_is_mapped(balloon->hostmem)) {
+            Object *obj = OBJECT(balloon->hostmem);
+
+            error_setg(errp, "'%s' property specifies a busy memdev: %s",
+                       HV_BALLOON_MEMDEV_PROP,
+                       object_get_canonical_path_component(obj));
+            goto out_balloon_handler;
+        }
+
+        hv_balloon_ensure_mr(balloon);
+
+        /* This is rather unlikely to happen, but let's still check for it. */
+        if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr),
+                             HV_BALLOON_PAGE_SIZE)) {
+            error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64,
+                       HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE);
+            goto out_balloon_handler;
+        }
+
+        host_memory_backend_set_mapped(balloon->hostmem, true);
+        vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem),
+                             DEVICE(balloon));
+    } else if (balloon->addr) {
+        error_setg(errp, "'%s' property must not be set without a memdev",
+                   HV_BALLOON_MEMDEV_PROP);
+        goto out_balloon_handler;
+    }
+
+    timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL,
+                  hv_balloon_post_init_timer, balloon);
+
+    qemu_register_reset(hv_balloon_system_reset, balloon);
+
+    return;
+
+out_balloon_handler:
+    qemu_remove_balloon_handler(balloon);
+}
+
+/*
+ * VMBus device reset has to be implemented in case the guest decides to
+ * disconnect and reconnect to the VMBus without rebooting the whole system.
+ *
+ * However, the hot-added memory can't be removed here as Windows keeps on using
+ * it until the system is restarted, even after disconnecting from the VMBus.
+ */
+static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
+{
+    HvBalloon *balloon = HV_BALLOON(vdev);
+
+    if (balloon->state == S_POST_RESET_CLOSED) {
+        return;
+    }
+
+    if (balloon->our_range) {
+        hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range));
+    }
+
+    hvb_page_range_tree_destroy(&balloon->removed_guest);
+    hvb_page_range_tree_destroy(&balloon->removed_both);
+    hvb_page_range_tree_init(&balloon->removed_guest);
+    hvb_page_range_tree_init(&balloon->removed_both);
+
+    balloon->trans_id = 0;
+    balloon->removed_guest_ctr = 0;
+    balloon->removed_both_ctr = 0;
+
+    HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED);
+    hv_balloon_event_loop(balloon);
+}
+
+/*
+ * Clean up things that were (possibly) allocated pre-realization, for example
+ * from memory_device_pre_plug(), so we don't leak them if the device don't
+ * actually get realized in the end.
+ */
+static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
+{
+    hv_balloon_free_mr(balloon);
+    balloon->addr = 0;
+
+    balloon->memslot_count = 0;
+}
+
+static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev)
+{
+    HvBalloon *balloon = HV_BALLOON(vdev);
+
+    qemu_unregister_reset(hv_balloon_system_reset, balloon);
+
+    hv_balloon_system_reset_unrealize_common(balloon);
+
+    qemu_remove_balloon_handler(balloon);
+
+    if (balloon->hostmem) {
+        vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem),
+                               DEVICE(balloon));
+        host_memory_backend_set_mapped(balloon->hostmem, false);
+    }
+
+    hvb_page_range_tree_destroy(&balloon->removed_guest);
+    hvb_page_range_tree_destroy(&balloon->removed_both);
+
+    hv_balloon_unrealize_finalize_common(balloon);
+}
+
+static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md)
+{
+    return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP,
+                                    &error_abort);
+}
+
+static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                   Error **errp)
+{
+    object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp);
+}
+
+static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
+                                                     Error **errp)
+{
+    HvBalloon *balloon = HV_BALLOON(md);
+
+    if (!balloon->hostmem) {
+        return NULL;
+    }
+
+    hv_balloon_ensure_mr(balloon);
+
+    return balloon->mr;
+}
+
+static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
+                                           MemoryDeviceInfo *info)
+{
+    HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1);
+    const HvBalloon *balloon = HV_BALLOON(md);
+    DeviceState *dev = DEVICE(md);
+
+    if (dev->id) {
+        hi->id = g_strdup(dev->id);
+    }
+
+    if (balloon->hostmem) {
+        hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem));
+        hi->memaddr = balloon->addr;
+        hi->has_memaddr = true;
+        hi->max_size = memory_region_size(balloon->mr);
+        /* TODO: expose current provided size or something else? */
+    } else {
+        hi->max_size = 0;
+    }
+
+    info->u.hv_balloon.data = hi;
+    info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON;
+}
+
+static void hv_balloon_decide_memslots(MemoryDeviceState *md,
+                                       unsigned int limit)
+{
+    HvBalloon *balloon = HV_BALLOON(md);
+    MemoryRegion *hostmem_mr;
+    uint64_t region_size, memslot_size, memslots;
+
+    /* We're called exactly once, before realizing the device. */
+    assert(!balloon->memslot_count);
+
+    /* We should not be called if we don't have a memory backend */
+    assert(balloon->hostmem);
+
+    hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
+    region_size = memory_region_size(hostmem_mr);
+
+    assert(region_size > 0);
+    memslot_size = QEMU_ALIGN_UP(region_size / limit,
+                                 HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN);
+    memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
+
+    if (memslots > 1) {
+        balloon->memslot_size = memslot_size;
+    } else {
+        balloon->memslot_size = region_size;
+    }
+
+    assert(memslots <= UINT_MAX);
+    balloon->memslot_count = memslots;
+}
+
+static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md)
+{
+    const HvBalloon *balloon = HV_BALLOON(md);
+
+    /* We're called after setting the suggested limit. */
+    assert(balloon->memslot_count > 0);
+
+    return balloon->memslot_count;
+}
+
+static void hv_balloon_init(Object *obj)
+{
+}
+
+static void hv_balloon_finalize(Object *obj)
+{
+    HvBalloon *balloon = HV_BALLOON(obj);
+
+    hv_balloon_unrealize_finalize_common(balloon);
+}
+
+static Property hv_balloon_properties[] = {
+    DEFINE_PROP_BOOL("status-report", HvBalloon,
+                     status_report.enabled, false),
+
+    /* MEMORY_DEVICE props */
+    DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0),
+
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void hv_balloon_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, hv_balloon_properties);
+    qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+
+    vdc->vmdev_realize = hv_balloon_vmdev_realize;
+    vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize;
+    vdc->vmdev_reset = hv_balloon_vmdev_reset;
+    vdc->open_channel = hv_balloon_vmdev_open_channel;
+    vdc->close_channel = hv_balloon_vmdev_close_channel;
+    vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify;
+
+    mdc->get_addr = hv_balloon_md_get_addr;
+    mdc->set_addr = hv_balloon_md_set_addr;
+    mdc->get_plugged_size = memory_device_get_region_size;
+    mdc->get_memory_region = hv_balloon_md_get_memory_region;
+    mdc->decide_memslots = hv_balloon_decide_memslots;
+    mdc->get_memslots = hv_balloon_get_memslots;
+    mdc->fill_device_info = hv_balloon_md_fill_device_info;
+}
diff --git a/hw/hyperv/meson.build b/hw/hyperv/meson.build
index b43f119ea5..d3d2668c71 100644
--- a/hw/hyperv/meson.build
+++ b/hw/hyperv/meson.build
@@ -2,3 +2,4 @@ specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'))
 specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c'))
 specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c'))
 specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c'))
+specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c'))
diff --git a/hw/hyperv/trace-events b/hw/hyperv/trace-events
index b4c35ca8e3..7963c215b1 100644
--- a/hw/hyperv/trace-events
+++ b/hw/hyperv/trace-events
@@ -16,3 +16,21 @@ vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d"
 vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d"
 vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d"
 vmbus_close_channel(uint32_t chan_id) "channel #%d"
+
+# hv-balloon
+hv_balloon_state_change(const char *tostr) "-> %s"
+hv_balloon_incoming_version(uint16_t major, uint16_t minor) "incoming proto version %u.%u"
+hv_balloon_incoming_caps(uint32_t caps) "incoming caps 0x%x"
+hv_balloon_outgoing_unballoon(uint32_t trans_id, uint64_t count, uint64_t start, uint64_t rempages) "posting unballoon %"PRIu32" for %"PRIu64" @ 0x%"PRIx64", remaining %"PRIu64
+hv_balloon_incoming_unballoon(uint32_t trans_id) "incoming unballoon response %"PRIu32
+hv_balloon_outgoing_hot_add(uint32_t trans_id, uint64_t count, uint64_t start) "posting hot add %"PRIu32" for %"PRIu64" @ 0x%"PRIx64
+hv_balloon_incoming_hot_add(uint32_t trans_id, uint32_t result, uint32_t count) "incoming hot add response %"PRIu32", result %"PRIu32", count %"PRIu32
+hv_balloon_outgoing_balloon(uint32_t trans_id, uint64_t count, uint64_t rempages) "posting balloon %"PRIu32" for %"PRIu64", remaining %"PRIu64
+hv_balloon_incoming_balloon(uint32_t trans_id, uint32_t range_count, uint32_t more_pages) "incoming balloon response %"PRIu32", ranges %"PRIu32", more %"PRIu32
+hv_balloon_our_range_add(uint64_t count, uint64_t start) "adding our range %"PRIu64" @ 0x%"PRIx64
+hv_balloon_remove_response(uint64_t count, uint64_t start, unsigned int both) "processing remove response range %"PRIu64" @ 0x%"PRIx64", both %u"
+hv_balloon_remove_response_hole(uint64_t counthole, uint64_t starthole, uint64_t countrange, uint64_t startrange, uint64_t starthpr, unsigned int both) "response range hole %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64", before our start 0x%"PRIx64", both %u"
+hv_balloon_remove_response_common(uint64_t countcommon, uint64_t startcommon, uint64_t countrange, uint64_t startrange, uint64_t counthpr, uint64_t starthpr, uint64_t removed, unsigned int both) "response common range %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64" with our %"PRIu64" @ 0x%"PRIx64", removed %"PRIu64", both %u"
+hv_balloon_remove_response_remainder(uint64_t count, uint64_t start, unsigned int both) "remove response remaining range %"PRIu64" @ 0x%"PRIx64", both %u"
+hv_balloon_map_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "mapping memslot %u / %u @ 0x%"PRIx64
+hv_balloon_unmap_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "unmapping memslot %u / %u @ 0x%"PRIx64
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 271289f902..c64eaa5a46 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2271,7 +2271,7 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp)
     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
     BusChild *child;
     Error *err = NULL;
-    char idstr[UUID_FMT_LEN + 1];
+    char idstr[UUID_STR_LEN];
 
     assert(!qemu_uuid_is_null(&vdev->instanceid));
 
@@ -2467,7 +2467,7 @@ static char *vmbus_get_dev_path(DeviceState *dev)
 static char *vmbus_get_fw_dev_path(DeviceState *dev)
 {
     VMBusDevice *vdev = VMBUS_DEVICE(dev);
-    char uuid[UUID_FMT_LEN + 1];
+    char uuid[UUID_STR_LEN];
 
     qemu_uuid_unparse(&vdev->instanceid, uuid);
     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 94772c726b..55850791df 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -45,6 +45,7 @@ config PC
     select ACPI_VMGENID
     select VIRTIO_PMEM_SUPPORTED
     select VIRTIO_MEM_SUPPORTED
+    select HV_BALLOON_SUPPORTED
 
 config PC_PCI
     bool
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 7965415b47..4203144da9 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1450,6 +1450,10 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &iommu_as[devfn]->as;
 }
 
+static const PCIIOMMUOps amdvi_iommu_ops = {
+    .get_address_space = amdvi_host_dma_iommu,
+};
+
 static const MemoryRegionOps mmio_mem_ops = {
     .read = amdvi_mmio_read,
     .write = amdvi_mmio_write,
@@ -1581,7 +1585,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
                           AMDVI_MMIO_SIZE);
     memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
                                 &s->mmio);
-    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    pci_setup_iommu(bus, &amdvi_iommu_ops, s);
     amdvi_init(s);
 }
 
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1c6c18622f..5085a6fee3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1045,18 +1045,35 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
  * Rsvd field masks for spte:
  *     vtd_spte_rsvd 4k pages
  *     vtd_spte_rsvd_large large pages
+ *
+ * We support only 3-level and 4-level page tables (see vtd_init() which
+ * sets only VTD_CAP_SAGAW_39bit and maybe VTD_CAP_SAGAW_48bit bits in s->cap).
  */
-static uint64_t vtd_spte_rsvd[5];
-static uint64_t vtd_spte_rsvd_large[5];
+#define VTD_SPTE_RSVD_LEN 5
+static uint64_t vtd_spte_rsvd[VTD_SPTE_RSVD_LEN];
+static uint64_t vtd_spte_rsvd_large[VTD_SPTE_RSVD_LEN];
 
 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
 {
-    uint64_t rsvd_mask = vtd_spte_rsvd[level];
+    uint64_t rsvd_mask;
+
+    /*
+     * We should have caught a guest-mis-programmed level earlier,
+     * via vtd_is_level_supported.
+     */
+    assert(level < VTD_SPTE_RSVD_LEN);
+    /*
+     * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and
+     * checked by vtd_is_last_slpte().
+     */
+    assert(level);
 
     if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) &&
         (slpte & VTD_SL_PT_PAGE_SIZE_MASK)) {
         /* large page */
         rsvd_mask = vtd_spte_rsvd_large[level];
+    } else {
+        rsvd_mask = vtd_spte_rsvd[level];
     }
 
     return slpte & rsvd_mask;
@@ -4088,6 +4105,10 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &vtd_as->as;
 }
 
+static PCIIOMMUOps vtd_iommu_ops = {
+    .get_address_space = vtd_host_dma_iommu,
+};
+
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -4210,7 +4231,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
                                       g_free, g_free);
     vtd_init(s);
-    pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+    pci_setup_iommu(bus, &vtd_iommu_ops, dev);
     /* Pseudo address space under root PCI bus. */
     x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
     qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build
index ab143d6474..a4a2e23c06 100644
--- a/hw/i386/kvm/meson.build
+++ b/hw/i386/kvm/meson.build
@@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files(
   'xen_evtchn.c',
   'xen_gnttab.c',
   'xen_xenstore.c',
+  'xen_primary_console.c',
   'xenstore_impl.c',
   ))
 
diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
index e4c82de6f3..67bf7f174e 100644
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -18,3 +18,5 @@ xenstore_watch(const char *path, const char *token) "path %s token %s"
 xenstore_unwatch(const char *path, const char *token) "path %s token %s"
 xenstore_reset_watches(void) ""
 xenstore_watch_event(const char *path, const char *token) "path %s token %s"
+xen_primary_console_create(void) ""
+xen_primary_console_reset(int port) "port %u"
diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c
index ae406e0b02..d03131e686 100644
--- a/hw/i386/kvm/xen-stubs.c
+++ b/hw/i386/kvm/xen-stubs.c
@@ -15,6 +15,7 @@
 #include "qapi/qapi-commands-misc-target.h"
 
 #include "xen_evtchn.h"
+#include "xen_primary_console.h"
 
 void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
                           uint64_t addr, uint32_t data, bool is_masked)
@@ -30,6 +31,13 @@ bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
     return false;
 }
 
+void xen_primary_console_create(void)
+{
+}
+
+void xen_primary_console_set_be_port(uint16_t port)
+{
+}
 #ifdef TARGET_I386
 EvtchnInfoList *qmp_xen_event_list(Error **errp)
 {
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index a731738411..02b8cbf8df 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -58,7 +58,15 @@ OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
 typedef struct XenEvtchnPort {
     uint32_t vcpu;      /* Xen/ACPI vcpu_id */
     uint16_t type;      /* EVTCHNSTAT_xxxx */
-    uint16_t type_val;  /* pirq# / virq# / remote port according to type */
+    union {
+        uint16_t val;  /* raw value for serialization etc. */
+        uint16_t pirq;
+        uint16_t virq;
+        struct {
+            uint16_t port:15;
+            uint16_t to_qemu:1; /* Only two targets; qemu or loopback */
+        } interdomain;
+    } u;
 } XenEvtchnPort;
 
 /* 32-bit compatibility definitions, also used natively in 32-bit build */
@@ -106,14 +114,6 @@ struct xenevtchn_handle {
 };
 
 /*
- * For unbound/interdomain ports there are only two possible remote
- * domains; self and QEMU. Use a single high bit in type_val for that,
- * and the low bits for the remote port number (or 0 for unbound).
- */
-#define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
-#define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
-
-/*
  * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
  * insane enough to think about guest-transparent live migration from actual
  * Xen to QEMU, and ensuring that we can convert/consume the stream.
@@ -210,16 +210,16 @@ static int xen_evtchn_post_load(void *opaque, int version_id)
         XenEvtchnPort *p = &s->port_table[i];
 
         if (p->type == EVTCHNSTAT_pirq) {
-            assert(p->type_val);
-            assert(p->type_val < s->nr_pirqs);
+            assert(p->u.pirq);
+            assert(p->u.pirq < s->nr_pirqs);
 
             /*
              * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
              * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
              * catches up with it.
              */
-            s->pirq[p->type_val].gsi = IRQ_UNBOUND;
-            s->pirq[p->type_val].port = i;
+            s->pirq[p->u.pirq].gsi = IRQ_UNBOUND;
+            s->pirq[p->u.pirq].port = i;
         }
     }
     /* Rebuild s->pirq[].gsi mapping */
@@ -243,7 +243,7 @@ static const VMStateDescription xen_evtchn_port_vmstate = {
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(vcpu, XenEvtchnPort),
         VMSTATE_UINT16(type, XenEvtchnPort),
-        VMSTATE_UINT16(type_val, XenEvtchnPort),
+        VMSTATE_UINT16(u.val, XenEvtchnPort),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -490,6 +490,12 @@ int xen_evtchn_set_callback_param(uint64_t param)
         break;
     }
 
+    /* If the guest has set a per-vCPU callback vector, prefer that. */
+    if (gsi && kvm_xen_has_vcpu_callback_vector()) {
+        in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
+        gsi = 0;
+    }
+
     if (!ret) {
         /* If vector delivery was turned *off* then tell the kernel */
         if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
@@ -599,14 +605,13 @@ static void unbind_backend_ports(XenEvtchnState *s)
 
     for (i = 1; i < s->nr_ports; i++) {
         p = &s->port_table[i];
-        if (p->type == EVTCHNSTAT_interdomain &&
-            (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
-            evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
+        if (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu) {
+            evtchn_port_t be_port = p->u.interdomain.port;
 
             if (s->be_handles[be_port]) {
                 /* This part will be overwritten on the load anyway. */
                 p->type = EVTCHNSTAT_unbound;
-                p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
+                p->u.interdomain.port = 0;
 
                 /* Leave the backend port open and unbound too. */
                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
@@ -644,30 +649,22 @@ int xen_evtchn_status_op(struct evtchn_status *status)
 
     switch (p->type) {
     case EVTCHNSTAT_unbound:
-        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
-            status->u.unbound.dom = DOMID_QEMU;
-        } else {
-            status->u.unbound.dom = xen_domid;
-        }
+        status->u.unbound.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
+                                                         : xen_domid;
         break;
 
     case EVTCHNSTAT_interdomain:
-        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
-            status->u.interdomain.dom = DOMID_QEMU;
-        } else {
-            status->u.interdomain.dom = xen_domid;
-        }
-
-        status->u.interdomain.port = p->type_val &
-            PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
+        status->u.interdomain.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
+                                                             : xen_domid;
+        status->u.interdomain.port = p->u.interdomain.port;
         break;
 
     case EVTCHNSTAT_pirq:
-        status->u.pirq = p->type_val;
+        status->u.pirq = p->u.pirq;
         break;
 
     case EVTCHNSTAT_virq:
-        status->u.virq = p->type_val;
+        status->u.virq = p->u.virq;
         break;
     }
 
@@ -983,7 +980,7 @@ static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
 static void free_port(XenEvtchnState *s, evtchn_port_t port)
 {
     s->port_table[port].type = EVTCHNSTAT_closed;
-    s->port_table[port].type_val = 0;
+    s->port_table[port].u.val = 0;
     s->port_table[port].vcpu = 0;
 
     if (s->nr_ports == port + 1) {
@@ -1006,7 +1003,7 @@ static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
         if (s->port_table[p].type == EVTCHNSTAT_closed) {
             s->port_table[p].vcpu = vcpu;
             s->port_table[p].type = type;
-            s->port_table[p].type_val = val;
+            s->port_table[p].u.val = val;
 
             *port = p;
 
@@ -1047,15 +1044,15 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port,
         return -ENOENT;
 
     case EVTCHNSTAT_pirq:
-        s->pirq[p->type_val].port = 0;
-        if (s->pirq[p->type_val].is_translated) {
+        s->pirq[p->u.pirq].port = 0;
+        if (s->pirq[p->u.pirq].is_translated) {
             *flush_kvm_routes = true;
         }
         break;
 
     case EVTCHNSTAT_virq:
-        kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
-                              p->type_val, 0);
+        kvm_xen_set_vcpu_virq(virq_is_global(p->u.virq) ? 0 : p->vcpu,
+                              p->u.virq, 0);
         break;
 
     case EVTCHNSTAT_ipi:
@@ -1065,8 +1062,8 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port,
         break;
 
     case EVTCHNSTAT_interdomain:
-        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
-            uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
+        if (p->u.interdomain.to_qemu) {
+            uint16_t be_port = p->u.interdomain.port;
             struct xenevtchn_handle *xc = s->be_handles[be_port];
             if (xc) {
                 if (kvm_xen_has_cap(EVTCHN_SEND)) {
@@ -1076,14 +1073,15 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port,
             }
         } else {
             /* Loopback interdomain */
-            XenEvtchnPort *rp = &s->port_table[p->type_val];
-            if (!valid_port(p->type_val) || rp->type_val != port ||
+            XenEvtchnPort *rp = &s->port_table[p->u.interdomain.port];
+            if (!valid_port(p->u.interdomain.port) ||
+                rp->u.interdomain.port != port ||
                 rp->type != EVTCHNSTAT_interdomain) {
                 error_report("Inconsistent state for interdomain unbind");
             } else {
                 /* Set the other end back to unbound */
                 rp->type = EVTCHNSTAT_unbound;
-                rp->type_val = 0;
+                rp->u.interdomain.port = 0;
             }
         }
         break;
@@ -1129,6 +1127,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset)
         return -ESRCH;
     }
 
+    QEMU_IOTHREAD_LOCK_GUARD();
     return xen_evtchn_soft_reset();
 }
 
@@ -1207,7 +1206,7 @@ int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
     if (p->type == EVTCHNSTAT_interdomain ||
         p->type == EVTCHNSTAT_unbound ||
         p->type == EVTCHNSTAT_pirq ||
-        (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
+        (p->type == EVTCHNSTAT_virq && virq_is_global(p->u.virq))) {
         /*
          * unmask_port() with do_unmask==false will just raise the event
          * on the new vCPU if the port was already pending.
@@ -1352,19 +1351,15 @@ int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
 int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
-    uint16_t type_val;
     int ret;
 
     if (!s) {
         return -ENOTSUP;
     }
 
-    if (interdomain->remote_dom == DOMID_QEMU) {
-        type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
-    } else if (interdomain->remote_dom == DOMID_SELF ||
-               interdomain->remote_dom == xen_domid) {
-        type_val = 0;
-    } else {
+    if (interdomain->remote_dom != DOMID_QEMU &&
+        interdomain->remote_dom != DOMID_SELF &&
+        interdomain->remote_dom != xen_domid) {
         return -ESRCH;
     }
 
@@ -1375,8 +1370,8 @@ int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
     qemu_mutex_lock(&s->port_lock);
 
     /* The newly allocated port starts out as unbound */
-    ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
-                        &interdomain->local_port);
+    ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &interdomain->local_port);
+
     if (ret) {
         goto out;
     }
@@ -1401,7 +1396,8 @@ int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
             assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
         }
         lp->type = EVTCHNSTAT_interdomain;
-        lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
+        lp->u.interdomain.to_qemu = 1;
+        lp->u.interdomain.port = interdomain->remote_port;
         ret = 0;
     } else {
         /* Loopback */
@@ -1409,19 +1405,18 @@ int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
         XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
 
         /*
-         * The 'remote' port for loopback must be an unbound port allocated for
-         * communication with the local domain (as indicated by rp->type_val
-         * being zero, not PORT_INFO_TYPEVAL_REMOTE_QEMU), and must *not* be
-         * the port that was just allocated for the local end.
+         * The 'remote' port for loopback must be an unbound port allocated
+         * for communication with the local domain, and must *not* be the
+         * port that was just allocated for the local end.
          */
         if (interdomain->local_port != interdomain->remote_port &&
-            rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
+            rp->type == EVTCHNSTAT_unbound && !rp->u.interdomain.to_qemu) {
 
             rp->type = EVTCHNSTAT_interdomain;
-            rp->type_val = interdomain->local_port;
+            rp->u.interdomain.port = interdomain->local_port;
 
             lp->type = EVTCHNSTAT_interdomain;
-            lp->type_val = interdomain->remote_port;
+            lp->u.interdomain.port = interdomain->remote_port;
         } else {
             ret = -EINVAL;
         }
@@ -1440,7 +1435,6 @@ int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
-    uint16_t type_val;
     int ret;
 
     if (!s) {
@@ -1451,18 +1445,20 @@ int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
         return -ESRCH;
     }
 
-    if (alloc->remote_dom == DOMID_QEMU) {
-        type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
-    } else if (alloc->remote_dom == DOMID_SELF ||
-               alloc->remote_dom == xen_domid) {
-        type_val = 0;
-    } else {
+    if (alloc->remote_dom != DOMID_QEMU &&
+        alloc->remote_dom != DOMID_SELF &&
+        alloc->remote_dom != xen_domid) {
         return -EPERM;
     }
 
     qemu_mutex_lock(&s->port_lock);
 
-    ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
+    ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &alloc->port);
+
+    if (!ret && alloc->remote_dom == DOMID_QEMU) {
+        XenEvtchnPort *p = &s->port_table[alloc->port];
+        p->u.interdomain.to_qemu = 1;
+    }
 
     qemu_mutex_unlock(&s->port_lock);
 
@@ -1489,12 +1485,12 @@ int xen_evtchn_send_op(struct evtchn_send *send)
 
     switch (p->type) {
     case EVTCHNSTAT_interdomain:
-        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
+        if (p->u.interdomain.to_qemu) {
             /*
              * This is an event from the guest to qemu itself, which is
              * serving as the driver domain.
              */
-            uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
+            uint16_t be_port = p->u.interdomain.port;
             struct xenevtchn_handle *xc = s->be_handles[be_port];
             if (xc) {
                 eventfd_write(xc->fd, 1);
@@ -1504,7 +1500,7 @@ int xen_evtchn_send_op(struct evtchn_send *send)
             }
         } else {
             /* Loopback interdomain ports; just a complex IPI */
-            set_port_pending(s, p->type_val);
+            set_port_pending(s, p->u.interdomain.port);
         }
         break;
 
@@ -1546,8 +1542,7 @@ int xen_evtchn_set_port(uint16_t port)
 
     /* QEMU has no business sending to anything but these */
     if (p->type == EVTCHNSTAT_virq ||
-        (p->type == EVTCHNSTAT_interdomain &&
-         (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
+        (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu)) {
         set_port_pending(s, port);
         ret = 0;
     }
@@ -2057,7 +2052,7 @@ int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
     switch (gp->type) {
     case EVTCHNSTAT_interdomain:
         /* Allow rebinding after migration, preserve port # if possible */
-        be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
+        be_port = gp->u.interdomain.port;
         assert(be_port != 0);
         if (!s->be_handles[be_port]) {
             s->be_handles[be_port] = xc;
@@ -2078,7 +2073,8 @@ int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
         }
 
         gp->type = EVTCHNSTAT_interdomain;
-        gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
+        gp->u.interdomain.to_qemu = 1;
+        gp->u.interdomain.port = be_port;
         xc->guest_port = guest_port;
         if (kvm_xen_has_cap(EVTCHN_SEND)) {
             assign_kernel_eventfd(gp->type, guest_port, xc->fd);
@@ -2123,7 +2119,7 @@ int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
         /* This should never *not* be true */
         if (gp->type == EVTCHNSTAT_interdomain) {
             gp->type = EVTCHNSTAT_unbound;
-            gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
+            gp->u.interdomain.port = 0;
         }
 
         if (kvm_xen_has_cap(EVTCHN_SEND)) {
@@ -2277,11 +2273,11 @@ EvtchnInfoList *qmp_xen_event_list(Error **errp)
 
         info->type = p->type;
         if (p->type == EVTCHNSTAT_interdomain) {
-            info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
+            info->remote_domain = g_strdup(p->u.interdomain.to_qemu ?
                                            "qemu" : "loopback");
-            info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
+            info->target = p->u.interdomain.port;
         } else {
-            info->target = p->type_val;
+            info->target = p->u.val; /* pirq# or virq# */
         }
         info->vcpu = p->vcpu;
         info->pending = test_bit(i, pending);
diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 21c30e3659..0a24f53f20 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -25,6 +25,7 @@
 #include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_gnttab.h"
+#include "xen_primary_console.h"
 
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_xen.h"
@@ -537,11 +538,13 @@ int xen_gnttab_reset(void)
     s->nr_frames = 0;
 
     memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
-
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
 
-    memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
+    if (xen_primary_console_get_pfn()) {
+        s->entries.v1[GNTTAB_RESERVED_CONSOLE].flags = GTF_permit_access;
+        s->entries.v1[GNTTAB_RESERVED_CONSOLE].frame = XEN_SPECIAL_PFN(CONSOLE);
+    }
 
     return 0;
 }
diff --git a/hw/i386/kvm/xen_primary_console.c b/hw/i386/kvm/xen_primary_console.c
new file mode 100644
index 0000000000..abe79f565b
--- /dev/null
+++ b/hw/i386/kvm/xen_primary_console.c
@@ -0,0 +1,193 @@
+/*
+ * QEMU Xen emulation: Primary console support
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+
+#include "hw/sysbus.h"
+#include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
+#include "xen_evtchn.h"
+#include "xen_overlay.h"
+#include "xen_primary_console.h"
+
+#include "sysemu/kvm.h"
+#include "sysemu/kvm_xen.h"
+
+#include "trace.h"
+
+#include "hw/xen/interface/event_channel.h"
+#include "hw/xen/interface/grant_table.h"
+
+#define TYPE_XEN_PRIMARY_CONSOLE "xen-primary-console"
+OBJECT_DECLARE_SIMPLE_TYPE(XenPrimaryConsoleState, XEN_PRIMARY_CONSOLE)
+
+struct XenPrimaryConsoleState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+
+    MemoryRegion console_page;
+    void *cp;
+
+    evtchn_port_t guest_port;
+    evtchn_port_t be_port;
+
+    struct xengntdev_handle *gt;
+    void *granted_xs;
+};
+
+struct XenPrimaryConsoleState *xen_primary_console_singleton;
+
+static void xen_primary_console_realize(DeviceState *dev, Error **errp)
+{
+    XenPrimaryConsoleState *s = XEN_PRIMARY_CONSOLE(dev);
+
+    if (xen_mode != XEN_EMULATE) {
+        error_setg(errp, "Xen primary console support is for Xen emulation");
+        return;
+    }
+
+    memory_region_init_ram(&s->console_page, OBJECT(dev), "xen:console_page",
+                           XEN_PAGE_SIZE, &error_abort);
+    memory_region_set_enabled(&s->console_page, true);
+    s->cp = memory_region_get_ram_ptr(&s->console_page);
+    memset(s->cp, 0, XEN_PAGE_SIZE);
+
+    /* We can't map it this early as KVM isn't ready */
+    xen_primary_console_singleton = s;
+}
+
+static void xen_primary_console_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = xen_primary_console_realize;
+}
+
+static const TypeInfo xen_primary_console_info = {
+    .name          = TYPE_XEN_PRIMARY_CONSOLE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XenPrimaryConsoleState),
+    .class_init    = xen_primary_console_class_init,
+};
+
+
+void xen_primary_console_create(void)
+{
+    DeviceState *dev = sysbus_create_simple(TYPE_XEN_PRIMARY_CONSOLE, -1, NULL);
+
+    trace_xen_primary_console_create();
+
+    xen_primary_console_singleton = XEN_PRIMARY_CONSOLE(dev);
+
+    /*
+     * Defer the init (xen_primary_console_reset()) until KVM is set up and the
+     * overlay page can be mapped.
+     */
+}
+
+static void xen_primary_console_register_types(void)
+{
+    type_register_static(&xen_primary_console_info);
+}
+
+type_init(xen_primary_console_register_types)
+
+uint16_t xen_primary_console_get_port(void)
+{
+    XenPrimaryConsoleState *s = xen_primary_console_singleton;
+    if (!s) {
+        return 0;
+    }
+    return s->guest_port;
+}
+
+void xen_primary_console_set_be_port(uint16_t port)
+{
+    XenPrimaryConsoleState *s = xen_primary_console_singleton;
+    if (s) {
+        s->be_port = port;
+    }
+}
+
+uint64_t xen_primary_console_get_pfn(void)
+{
+    XenPrimaryConsoleState *s = xen_primary_console_singleton;
+    if (!s) {
+        return 0;
+    }
+    return XEN_SPECIAL_PFN(CONSOLE);
+}
+
+void *xen_primary_console_get_map(void)
+{
+    XenPrimaryConsoleState *s = xen_primary_console_singleton;
+    if (!s) {
+        return 0;
+    }
+    return s->cp;
+}
+
+static void alloc_guest_port(XenPrimaryConsoleState *s)
+{
+    struct evtchn_alloc_unbound alloc = {
+        .dom = DOMID_SELF,
+        .remote_dom = DOMID_QEMU,
+    };
+
+    if (!xen_evtchn_alloc_unbound_op(&alloc)) {
+        s->guest_port = alloc.port;
+    }
+}
+
+static void rebind_guest_port(XenPrimaryConsoleState *s)
+{
+    struct evtchn_bind_interdomain inter = {
+        .remote_dom = DOMID_QEMU,
+        .remote_port = s->be_port,
+    };
+
+    if (!xen_evtchn_bind_interdomain_op(&inter)) {
+        s->guest_port = inter.local_port;
+    }
+
+    s->be_port = 0;
+}
+
+int xen_primary_console_reset(void)
+{
+    XenPrimaryConsoleState *s = xen_primary_console_singleton;
+    if (!s) {
+        return 0;
+    }
+
+    if (!memory_region_is_mapped(&s->console_page)) {
+        uint64_t gpa = XEN_SPECIAL_PFN(CONSOLE) << TARGET_PAGE_BITS;
+        xen_overlay_do_map_page(&s->console_page, gpa);
+    }
+
+    if (s->be_port) {
+        rebind_guest_port(s);
+    } else {
+        alloc_guest_port(s);
+    }
+
+    trace_xen_primary_console_reset(s->guest_port);
+
+    s->gt = qemu_xen_gnttab_open();
+    uint32_t xs_gntref = GNTTAB_RESERVED_CONSOLE;
+    s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
+                                             PROT_READ | PROT_WRITE);
+
+    return 0;
+}
diff --git a/hw/i386/kvm/xen_primary_console.h b/hw/i386/kvm/xen_primary_console.h
new file mode 100644
index 0000000000..7e2989ea0d
--- /dev/null
+++ b/hw/i386/kvm/xen_primary_console.h
@@ -0,0 +1,23 @@
+/*
+ * QEMU Xen emulation: Primary console support
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_XEN_PRIMARY_CONSOLE_H
+#define QEMU_XEN_PRIMARY_CONSOLE_H
+
+void xen_primary_console_create(void);
+int xen_primary_console_reset(void);
+
+uint16_t xen_primary_console_get_port(void);
+void xen_primary_console_set_be_port(uint16_t port);
+uint64_t xen_primary_console_get_pfn(void);
+void *xen_primary_console_get_map(void);
+
+#endif /* QEMU_XEN_PRIMARY_CONSOLE_H */
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 660d0b72f9..6e651960b3 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -25,6 +25,7 @@
 #include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_evtchn.h"
+#include "xen_primary_console.h"
 #include "xen_xenstore.h"
 
 #include "sysemu/kvm.h"
@@ -331,7 +332,7 @@ static void xs_error(XenXenstoreState *s, unsigned int id,
     const char *errstr = NULL;
 
     for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
-        struct xsd_errors *xsd_error = &xsd_errors[i];
+        const struct xsd_errors *xsd_error = &xsd_errors[i];
 
         if (xsd_error->errnum == errnum) {
             errstr = xsd_error->errstring;
@@ -1357,10 +1358,12 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token)
     } else {
         deliver_watch(s, path, token);
         /*
-         * If the message was queued because there was already ring activity,
-         * no need to wake the guest. But if not, we need to send the evtchn.
+         * Attempt to queue the message into the actual ring, and send
+         * the event channel notification if any bytes are copied.
          */
-        xen_be_evtchn_notify(s->eh, s->be_port);
+        if (s->rsp_pending && put_rsp(s) > 0) {
+            xen_be_evtchn_notify(s->eh, s->be_port);
+        }
     }
 }
 
@@ -1432,6 +1435,8 @@ static void alloc_guest_port(XenXenstoreState *s)
 int xen_xenstore_reset(void)
 {
     XenXenstoreState *s = xen_xenstore_singleton;
+    int console_port;
+    GList *perms;
     int err;
 
     if (!s) {
@@ -1459,6 +1464,24 @@ int xen_xenstore_reset(void)
     }
     s->be_port = err;
 
+    /* Create frontend store nodes */
+    perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
+    perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
+
+    relpath_printf(s, perms, "store/port", "%u", s->guest_port);
+    relpath_printf(s, perms, "store/ring-ref", "%lu",
+                   XEN_SPECIAL_PFN(XENSTORE));
+
+    console_port = xen_primary_console_get_port();
+    if (console_port) {
+        relpath_printf(s, perms, "console/ring-ref", "%lu",
+                       XEN_SPECIAL_PFN(CONSOLE));
+        relpath_printf(s, perms, "console/port", "%u", console_port);
+        relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
+    }
+
+    g_list_free_full(perms, g_free);
+
     /*
      * We don't actually access the guest's page through the grant, because
      * this isn't real Xen, and we can just use the page we gave it in the
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6031234a73..188bc9d0f8 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -27,6 +27,7 @@
 #include "hw/i386/pc.h"
 #include "hw/char/serial.h"
 #include "hw/char/parallel.h"
+#include "hw/hyperv/hv-balloon.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/i386/vmport.h"
 #include "sysemu/cpus.h"
@@ -57,6 +58,7 @@
 #include "hw/i386/kvm/xen_evtchn.h"
 #include "hw/i386/kvm/xen_gnttab.h"
 #include "hw/i386/kvm/xen_xenstore.h"
+#include "hw/mem/memory-device.h"
 #include "e820_memory_layout.h"
 #include "trace.h"
 #include CONFIG_DEVICES
@@ -1259,7 +1261,7 @@ void pc_basic_device_init(struct PCMachineState *pcms,
         if (pcms->bus) {
             pci_create_simple(pcms->bus, -1, "xen-platform");
         }
-        xen_bus_init();
+        pcms->xenbus = xen_bus_init();
         xen_be_init();
     }
 #endif
@@ -1287,7 +1289,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
                     pcms->vmport != ON_OFF_AUTO_ON);
 }
 
-void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
+void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus,
+                 BusState *xen_bus)
 {
     MachineClass *mc = MACHINE_CLASS(pcmc);
     int i;
@@ -1297,7 +1300,11 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
         NICInfo *nd = &nd_table[i];
         const char *model = nd->model ? nd->model : mc->default_nic;
 
-        if (g_str_equal(model, "ne2k_isa")) {
+        if (xen_bus && (!nd->model || g_str_equal(model, "xen-net-device"))) {
+            DeviceState *dev = qdev_new("xen-net-device");
+            qdev_set_nic_properties(dev, nd);
+            qdev_realize_and_unref(dev, xen_bus, &error_fatal);
+        } else if (g_str_equal(model, "ne2k_isa")) {
             pc_init_ne2k_isa(isa_bus, nd);
         } else {
             pci_nic_init_nofail(nd, pci_bus, model, NULL);
@@ -1422,6 +1429,21 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev,
     error_propagate(errp, local_err);
 }
 
+static void pc_hv_balloon_pre_plug(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
+{
+    /* The vmbus handler has no hotplug handler; we should never end up here. */
+    g_assert(!dev->hotplugged);
+    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
+                           errp);
+}
+
+static void pc_hv_balloon_plug(HotplugHandler *hotplug_dev,
+                               DeviceState *dev, Error **errp)
+{
+    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+}
+
 static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                           DeviceState *dev, Error **errp)
 {
@@ -1452,6 +1474,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
             return;
         }
         pcms->iommu = dev;
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
+        pc_hv_balloon_pre_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -1464,6 +1488,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
         x86_cpu_plug(hotplug_dev, dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
         virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
+        pc_hv_balloon_plug(hotplug_dev, dev, errp);
     }
 }
 
@@ -1505,6 +1531,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
         object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON) ||
         object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) {
         return HOTPLUG_HANDLER(machine);
     }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 26e161beb9..eace854335 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -342,7 +342,7 @@ static void pc_init1(MachineState *machine,
     pc_basic_device_init(pcms, isa_bus, x86ms->gsi, rtc_state, true,
                          0x4);
 
-    pc_nic_init(pcmc, isa_bus, pci_bus);
+    pc_nic_init(pcmc, isa_bus, pci_bus, pcms->xenbus);
 
     if (pcmc->pci_enabled) {
         pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 597943ff1b..4f3e5412f6 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -340,7 +340,7 @@ static void pc_q35_init(MachineState *machine)
 
     /* the rest devices to which pci devfn is automatically assigned */
     pc_vga_init(isa_bus, host_bus);
-    pc_nic_init(pcmc, isa_bus, host_bus);
+    pc_nic_init(pcmc, isa_bus, host_bus, pcms->xenbus);
 
     if (machine->nvdimms_state->is_enabled) {
         nvdimm_init_acpi_state(machine->nvdimms_state, system_io,
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 17457ff3de..ef7d3fc05f 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -140,9 +140,14 @@ static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
 /* Remove the peer of the NIC device. Normally, this would be a tap device. */
 static void del_nic_peer(NICState *nic, void *opaque)
 {
-    NetClientState *nc;
+    NetClientState *nc = qemu_get_queue(nic);
+    ObjectClass *klass = module_object_class_by_name(nc->model);
+
+    /* Only delete peers of PCI NICs that we're about to delete */
+    if (!klass || !object_class_dynamic_cast(klass, TYPE_PCI_DEVICE)) {
+        return;
+    }
 
-    nc = qemu_get_queue(nic);
     if (nc->peer)
         qemu_del_net_client(nc->peer);
 }
@@ -164,39 +169,60 @@ static void pci_unplug_nics(PCIBus *bus)
  *
  * [1] https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
  */
-static void pci_xen_ide_unplug(PCIDevice *d, bool aux)
+struct ide_unplug_state {
+    bool aux;
+    int nr_unplugged;
+};
+
+static int ide_dev_unplug(DeviceState *dev, void *_st)
 {
-    DeviceState *dev = DEVICE(d);
-    PCIIDEState *pci_ide;
-    int i;
+    struct ide_unplug_state *st = _st;
     IDEDevice *idedev;
     IDEBus *idebus;
     BlockBackend *blk;
+    int unit;
 
-    pci_ide = PCI_IDE(dev);
+    idedev = IDE_DEVICE(object_dynamic_cast(OBJECT(dev), "ide-hd"));
+    if (!idedev) {
+        return 0;
+    }
 
-    for (i = aux ? 1 : 0; i < 4; i++) {
-        idebus = &pci_ide->bus[i / 2];
-        blk = idebus->ifs[i % 2].blk;
+    idebus = IDE_BUS(qdev_get_parent_bus(dev));
 
-        if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
-            if (!(i % 2)) {
-                idedev = idebus->master;
-            } else {
-                idedev = idebus->slave;
-            }
+    unit = (idedev == idebus->slave);
+    assert(unit || idedev == idebus->master);
 
-            blk_drain(blk);
-            blk_flush(blk);
+    if (st->aux && !unit && !strcmp(BUS(idebus)->name, "ide.0")) {
+        return 0;
+    }
 
-            blk_detach_dev(blk, DEVICE(idedev));
-            idebus->ifs[i % 2].blk = NULL;
-            idedev->conf.blk = NULL;
-            monitor_remove_blk(blk);
-            blk_unref(blk);
-        }
+    blk = idebus->ifs[unit].blk;
+    if (blk) {
+        blk_drain(blk);
+        blk_flush(blk);
+
+        blk_detach_dev(blk, DEVICE(idedev));
+        idebus->ifs[unit].blk = NULL;
+        idedev->conf.blk = NULL;
+        monitor_remove_blk(blk);
+        blk_unref(blk);
+    }
+
+    object_unparent(OBJECT(dev));
+    st->nr_unplugged++;
+
+    return 0;
+}
+
+static void pci_xen_ide_unplug(PCIDevice *d, bool aux)
+{
+    struct ide_unplug_state st = { aux, 0 };
+    DeviceState *dev = DEVICE(d);
+
+    qdev_walk_children(dev, NULL, NULL, ide_dev_unplug, NULL, &st);
+    if (st.nr_unplugged) {
+        pci_device_reset(d);
     }
-    pci_device_reset(d);
 }
 
 static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
@@ -211,6 +237,7 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
 
     switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) {
     case PCI_CLASS_STORAGE_IDE:
+    case PCI_CLASS_STORAGE_SATA:
         pci_xen_ide_unplug(d, aux);
         break;
 
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index ae38f48f16..e0704b8dc3 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -20,6 +20,22 @@
 #include "exec/address-spaces.h"
 #include "trace.h"
 
+static bool memory_device_is_empty(const MemoryDeviceState *md)
+{
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    Error *local_err = NULL;
+    MemoryRegion *mr;
+
+    /* dropping const here is fine as we don't touch the memory region */
+    mr = mdc->get_memory_region((MemoryDeviceState *)md, &local_err);
+    if (local_err) {
+        /* Not empty, we'll report errors later when ontaining the MR again. */
+        error_free(local_err);
+        return false;
+    }
+    return !mr;
+}
+
 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
 {
     const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
@@ -220,12 +236,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
         return 0;
     }
 
-    if (!QEMU_IS_ALIGNED(size, align)) {
-        error_setg(errp, "backend memory size must be multiple of 0x%"
-                   PRIx64, align);
-        return 0;
-    }
-
     if (hint) {
         if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
             error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
@@ -249,6 +259,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
         uint64_t next_addr;
         Range tmp;
 
+        if (memory_device_is_empty(md)) {
+            continue;
+        }
+
         range_init_nofail(&tmp, mdc->get_addr(md),
                           memory_device_get_region_size(md, &error_abort));
 
@@ -292,6 +306,7 @@ MemoryDeviceInfoList *qmp_memory_device_list(void)
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
         MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
 
+        /* Let's query infotmation even for empty memory devices. */
         mdc->fill_device_info(md, info);
 
         QAPI_LIST_APPEND(tail, info);
@@ -311,7 +326,7 @@ static int memory_device_plugged_size(Object *obj, void *opaque)
         const MemoryDeviceState *md = MEMORY_DEVICE(obj);
         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
 
-        if (dev->realized) {
+        if (dev->realized && !memory_device_is_empty(md)) {
             *size += mdc->get_plugged_size(md, &error_abort);
         }
     }
@@ -337,6 +352,11 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
     uint64_t addr, align = 0;
     MemoryRegion *mr;
 
+    /* We support empty memory devices even without device memory. */
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
     if (!ms->device_memory) {
         error_setg(errp, "the configuration is not prepared for memory devices"
                          " (e.g., for memory hotplug), consider specifying the"
@@ -380,10 +400,17 @@ out:
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 {
     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
-    const unsigned int memslots = memory_device_get_memslots(md);
-    const uint64_t addr = mdc->get_addr(md);
+    unsigned int memslots;
+    uint64_t addr;
     MemoryRegion *mr;
 
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
+    memslots = memory_device_get_memslots(md);
+    addr = mdc->get_addr(md);
+
     /*
      * We expect that a previous call to memory_device_pre_plug() succeeded, so
      * it can't fail at this point.
@@ -408,6 +435,10 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
     const unsigned int memslots = memory_device_get_memslots(md);
     MemoryRegion *mr;
 
+    if (memory_device_is_empty(md)) {
+        return;
+    }
+
     /*
      * We expect that a previous call to memory_device_pre_plug() succeeded, so
      * it can't fail at this point.
diff --git a/hw/net/meson.build b/hw/net/meson.build
index 2632634df3..f64651c467 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -1,5 +1,5 @@
 system_ss.add(when: 'CONFIG_DP8393X', if_true: files('dp8393x.c'))
-system_ss.add(when: 'CONFIG_XEN', if_true: files('xen_nic.c'))
+system_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_nic.c'))
 system_ss.add(when: 'CONFIG_NE2000_COMMON', if_true: files('ne2000.c'))
 
 # PCI network cards
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 3abfd65e5b..3097742cc0 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -482,3 +482,14 @@ dp8393x_receive_oversize(int size) "oversize packet, pkt_size is %d"
 dp8393x_receive_not_netcard(void) "packet not for netcard"
 dp8393x_receive_packet(int crba) "Receive packet at 0x%"PRIx32
 dp8393x_receive_write_status(int crba) "Write status at 0x%"PRIx32
+
+# xen_nic.c
+xen_netdev_realize(int dev, const char *info, const char *peer) "vif%u info '%s' peer '%s'"
+xen_netdev_unrealize(int dev) "vif%u"
+xen_netdev_create(int dev) "vif%u"
+xen_netdev_destroy(int dev) "vif%u"
+xen_netdev_disconnect(int dev) "vif%u"
+xen_netdev_connect(int dev, unsigned int tx, unsigned int rx, int port) "vif%u tx %u rx %u port %u"
+xen_netdev_frontend_changed(const char *dev, int state) "vif%s state %d"
+xen_netdev_tx(int dev, int ref, int off, int len, unsigned int flags, const char *c, const char *d, const char *m, const char *e) "vif%u ref %u off %u len %u flags 0x%x%s%s%s%s"
+xen_netdev_rx(int dev, int idx, int status, int flags) "vif%u idx %d status %d flags 0x%x"
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 9bbf6599fc..af4ba3f1e6 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -20,6 +20,13 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "qemu/qemu-print.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #include <sys/wait.h>
@@ -27,18 +34,26 @@
 #include "net/net.h"
 #include "net/checksum.h"
 #include "net/util.h"
-#include "hw/xen/xen-legacy-backend.h"
+
+#include "hw/xen/xen-backend.h"
+#include "hw/xen/xen-bus-helper.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 
 #include "hw/xen/interface/io/netif.h"
+#include "hw/xen/interface/io/xs_wire.h"
+
+#include "trace.h"
 
 /* ------------------------------------------------------------- */
 
 struct XenNetDev {
-    struct XenLegacyDevice      xendev;  /* must be first */
-    char                  *mac;
+    struct XenDevice      xendev;  /* must be first */
+    XenEventChannel       *event_channel;
+    int                   dev;
     int                   tx_work;
-    int                   tx_ring_ref;
-    int                   rx_ring_ref;
+    unsigned int          tx_ring_ref;
+    unsigned int          rx_ring_ref;
     struct netif_tx_sring *txs;
     struct netif_rx_sring *rxs;
     netif_tx_back_ring_t  tx_ring;
@@ -47,6 +62,11 @@ struct XenNetDev {
     NICState              *nic;
 };
 
+typedef struct XenNetDev XenNetDev;
+
+#define TYPE_XEN_NET_DEVICE "xen-net-device"
+OBJECT_DECLARE_SIMPLE_TYPE(XenNetDev, XEN_NET_DEVICE)
+
 /* ------------------------------------------------------------- */
 
 static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st)
@@ -68,7 +88,8 @@ static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, i
     netdev->tx_ring.rsp_prod_pvt = ++i;
     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
     if (notify) {
-        xen_pv_send_notify(&netdev->xendev);
+        xen_device_notify_event_channel(XEN_DEVICE(netdev),
+                                        netdev->event_channel, NULL);
     }
 
     if (i == netdev->tx_ring.req_cons) {
@@ -104,13 +125,16 @@ static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING
 #endif
 }
 
-static void net_tx_packets(struct XenNetDev *netdev)
+static bool net_tx_packets(struct XenNetDev *netdev)
 {
+    bool done_something = false;
     netif_tx_request_t txreq;
     RING_IDX rc, rp;
     void *page;
     void *tmpbuf = NULL;
 
+    assert(qemu_mutex_iothread_locked());
+
     for (;;) {
         rc = netdev->tx_ring.req_cons;
         rp = netdev->tx_ring.sring->req_prod;
@@ -122,49 +146,52 @@ static void net_tx_packets(struct XenNetDev *netdev)
             }
             memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq));
             netdev->tx_ring.req_cons = ++rc;
+            done_something = true;
 
 #if 1
             /* should not happen in theory, we don't announce the *
              * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
             if (txreq.flags & NETTXF_extra_info) {
-                xen_pv_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
+                qemu_log_mask(LOG_UNIMP, "vif%u: FIXME: extra info flag\n",
+                              netdev->dev);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
             if (txreq.flags & NETTXF_more_data) {
-                xen_pv_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
+                qemu_log_mask(LOG_UNIMP, "vif%u: FIXME: more data flag\n",
+                              netdev->dev);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
 #endif
 
             if (txreq.size < 14) {
-                xen_pv_printf(&netdev->xendev, 0, "bad packet size: %d\n",
-                              txreq.size);
+                qemu_log_mask(LOG_GUEST_ERROR, "vif%u: bad packet size: %d\n",
+                              netdev->dev, txreq.size);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
 
             if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
-                xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n");
+                qemu_log_mask(LOG_GUEST_ERROR, "vif%u: error: page crossing\n",
+                              netdev->dev);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
 
-            xen_pv_printf(&netdev->xendev, 3,
-                          "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
-                          txreq.gref, txreq.offset, txreq.size, txreq.flags,
-                          (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
-                          (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
-                          (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
-                          (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");
+            trace_xen_netdev_tx(netdev->dev, txreq.gref, txreq.offset,
+                                txreq.size, txreq.flags,
+                                (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
+                                (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
+                                (txreq.flags & NETTXF_more_data)      ? " more_data"      : "",
+                                (txreq.flags & NETTXF_extra_info)     ? " extra_info"     : "");
 
-            page = xen_be_map_grant_ref(&netdev->xendev, txreq.gref,
-                                        PROT_READ);
+            page = xen_device_map_grant_refs(&netdev->xendev, &txreq.gref, 1,
+                                             PROT_READ, NULL);
             if (page == NULL) {
-                xen_pv_printf(&netdev->xendev, 0,
-                              "error: tx gref dereference failed (%d)\n",
-                             txreq.gref);
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "vif%u: tx gref dereference failed (%d)\n",
+                              netdev->dev, txreq.gref);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
@@ -181,7 +208,8 @@ static void net_tx_packets(struct XenNetDev *netdev)
                 qemu_send_packet(qemu_get_queue(netdev->nic),
                                  page + txreq.offset, txreq.size);
             }
-            xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref);
+            xen_device_unmap_grant_refs(&netdev->xendev, page, &txreq.gref, 1,
+                                        NULL);
             net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
         }
         if (!netdev->tx_work) {
@@ -190,6 +218,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
         netdev->tx_work = 0;
     }
     g_free(tmpbuf);
+    return done_something;
 }
 
 /* ------------------------------------------------------------- */
@@ -212,14 +241,13 @@ static void net_rx_response(struct XenNetDev *netdev,
         resp->status = (int16_t)st;
     }
 
-    xen_pv_printf(&netdev->xendev, 3,
-                  "rx response: idx %d, status %d, flags 0x%x\n",
-                  i, resp->status, resp->flags);
+    trace_xen_netdev_rx(netdev->dev, i, resp->status, resp->flags);
 
     netdev->rx_ring.rsp_prod_pvt = ++i;
     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
     if (notify) {
-        xen_pv_send_notify(&netdev->xendev);
+        xen_device_notify_event_channel(XEN_DEVICE(netdev),
+                                        netdev->event_channel, NULL);
     }
 }
 
@@ -232,7 +260,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
     RING_IDX rc, rp;
     void *page;
 
-    if (netdev->xendev.be_state != XenbusStateConnected) {
+    assert(qemu_mutex_iothread_locked());
+
+    if (xen_device_backend_get_state(&netdev->xendev) != XenbusStateConnected) {
         return -1;
     }
 
@@ -244,24 +274,26 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
         return 0;
     }
     if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) {
-        xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
-                      (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN);
+        qemu_log_mask(LOG_GUEST_ERROR, "vif%u: packet too big (%lu > %ld)",
+                      netdev->dev, (unsigned long)size,
+                      XEN_PAGE_SIZE - NET_IP_ALIGN);
         return -1;
     }
 
     memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq));
     netdev->rx_ring.req_cons = ++rc;
 
-    page = xen_be_map_grant_ref(&netdev->xendev, rxreq.gref, PROT_WRITE);
+    page = xen_device_map_grant_refs(&netdev->xendev, &rxreq.gref, 1,
+                                     PROT_WRITE, NULL);
     if (page == NULL) {
-        xen_pv_printf(&netdev->xendev, 0,
-                      "error: rx gref dereference failed (%d)\n",
-                      rxreq.gref);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "vif%u: rx gref dereference failed (%d)\n",
+                      netdev->dev, rxreq.gref);
         net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
         return -1;
     }
     memcpy(page + NET_IP_ALIGN, buf, size);
-    xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref);
+    xen_device_unmap_grant_refs(&netdev->xendev, page, &rxreq.gref, 1, NULL);
     net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
 
     return size;
@@ -275,139 +307,361 @@ static NetClientInfo net_xen_info = {
     .receive = net_rx_packet,
 };
 
-static int net_init(struct XenLegacyDevice *xendev)
+static void xen_netdev_realize(XenDevice *xendev, Error **errp)
 {
-    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+    ERRP_GUARD();
+    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
+    NetClientState *nc;
 
-    /* read xenstore entries */
-    if (netdev->mac == NULL) {
-        netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac");
-    }
-
-    /* do we have all we need? */
-    if (netdev->mac == NULL) {
-        return -1;
-    }
+    qemu_macaddr_default_if_unset(&netdev->conf.macaddr);
 
-    if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) {
-        return -1;
-    }
+    xen_device_frontend_printf(xendev, "mac", "%02x:%02x:%02x:%02x:%02x:%02x",
+                               netdev->conf.macaddr.a[0],
+                               netdev->conf.macaddr.a[1],
+                               netdev->conf.macaddr.a[2],
+                               netdev->conf.macaddr.a[3],
+                               netdev->conf.macaddr.a[4],
+                               netdev->conf.macaddr.a[5]);
 
     netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
-                               "xen", NULL, netdev);
+                               object_get_typename(OBJECT(xendev)),
+                               DEVICE(xendev)->id, netdev);
 
-    qemu_set_info_str(qemu_get_queue(netdev->nic),
-                      "nic: xenbus vif macaddr=%s", netdev->mac);
+    nc = qemu_get_queue(netdev->nic);
+    qemu_format_nic_info_str(nc, netdev->conf.macaddr.a);
 
     /* fill info */
-    xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
-    xenstore_write_be_int(&netdev->xendev, "feature-rx-flip", 0);
+    xen_device_backend_printf(xendev, "feature-rx-copy", "%u", 1);
+    xen_device_backend_printf(xendev, "feature-rx-flip", "%u", 0);
 
-    return 0;
+    trace_xen_netdev_realize(netdev->dev, nc->info_str, nc->peer ?
+                             nc->peer->name : "(none)");
 }
 
-static int net_connect(struct XenLegacyDevice *xendev)
+static bool net_event(void *_xendev)
 {
-    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
-    int rx_copy;
+    XenNetDev *netdev = XEN_NET_DEVICE(_xendev);
+    bool done_something;
 
-    if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref",
-                             &netdev->tx_ring_ref) == -1) {
-        return -1;
+    done_something = net_tx_packets(netdev);
+    qemu_flush_queued_packets(qemu_get_queue(netdev->nic));
+    return done_something;
+}
+
+static bool xen_netdev_connect(XenDevice *xendev, Error **errp)
+{
+    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
+    unsigned int port, rx_copy;
+
+    assert(qemu_mutex_iothread_locked());
+
+    if (xen_device_frontend_scanf(xendev, "tx-ring-ref", "%u",
+                                  &netdev->tx_ring_ref) != 1) {
+        error_setg(errp, "failed to read tx-ring-ref");
+        return false;
     }
-    if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref",
-                             &netdev->rx_ring_ref) == -1) {
-        return 1;
+
+    if (xen_device_frontend_scanf(xendev, "rx-ring-ref", "%u",
+                                  &netdev->rx_ring_ref) != 1) {
+        error_setg(errp, "failed to read rx-ring-ref");
+        return false;
     }
-    if (xenstore_read_fe_int(&netdev->xendev, "event-channel",
-                             &netdev->xendev.remote_port) == -1) {
-        return -1;
+
+    if (xen_device_frontend_scanf(xendev, "event-channel", "%u",
+                                  &port) != 1) {
+        error_setg(errp, "failed to read event-channel");
+        return false;
     }
 
-    if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) {
+    if (xen_device_frontend_scanf(xendev, "request-rx-copy", "%u",
+                                  &rx_copy) != 1) {
         rx_copy = 0;
     }
     if (rx_copy == 0) {
-        xen_pv_printf(&netdev->xendev, 0,
-                      "frontend doesn't support rx-copy.\n");
-        return -1;
+        error_setg(errp, "frontend doesn't support rx-copy");
+        return false;
     }
 
-    netdev->txs = xen_be_map_grant_ref(&netdev->xendev,
-                                       netdev->tx_ring_ref,
-                                       PROT_READ | PROT_WRITE);
+    netdev->txs = xen_device_map_grant_refs(xendev,
+                                            &netdev->tx_ring_ref, 1,
+                                            PROT_READ | PROT_WRITE,
+                                            errp);
     if (!netdev->txs) {
-        return -1;
+        error_prepend(errp, "failed to map tx grant ref: ");
+        return false;
     }
-    netdev->rxs = xen_be_map_grant_ref(&netdev->xendev,
-                                       netdev->rx_ring_ref,
-                                       PROT_READ | PROT_WRITE);
+
+    netdev->rxs = xen_device_map_grant_refs(xendev,
+                                            &netdev->rx_ring_ref, 1,
+                                            PROT_READ | PROT_WRITE,
+                                            errp);
     if (!netdev->rxs) {
-        xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
-                               netdev->tx_ring_ref);
-        netdev->txs = NULL;
-        return -1;
+        error_prepend(errp, "failed to map rx grant ref: ");
+        return false;
     }
+
     BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE);
     BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE);
 
-    xen_be_bind_evtchn(&netdev->xendev);
+    netdev->event_channel = xen_device_bind_event_channel(xendev, port,
+                                                          net_event,
+                                                          netdev,
+                                                          errp);
+    if (!netdev->event_channel) {
+        return false;
+    }
 
-    xen_pv_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
-                  "remote port %d, local port %d\n",
-                  netdev->tx_ring_ref, netdev->rx_ring_ref,
-                  netdev->xendev.remote_port, netdev->xendev.local_port);
+    trace_xen_netdev_connect(netdev->dev, netdev->tx_ring_ref,
+                             netdev->rx_ring_ref, port);
 
     net_tx_packets(netdev);
-    return 0;
+    return true;
 }
 
-static void net_disconnect(struct XenLegacyDevice *xendev)
+static void xen_netdev_disconnect(XenDevice *xendev, Error **errp)
 {
-    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
+
+    trace_xen_netdev_disconnect(netdev->dev);
+
+    assert(qemu_mutex_iothread_locked());
 
-    xen_pv_unbind_evtchn(&netdev->xendev);
+    netdev->tx_ring.sring = NULL;
+    netdev->rx_ring.sring = NULL;
 
+    if (netdev->event_channel) {
+        xen_device_unbind_event_channel(xendev, netdev->event_channel,
+                                        errp);
+        netdev->event_channel = NULL;
+    }
     if (netdev->txs) {
-        xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
-                               netdev->tx_ring_ref);
+        xen_device_unmap_grant_refs(xendev, netdev->txs,
+                                    &netdev->tx_ring_ref, 1, errp);
         netdev->txs = NULL;
     }
     if (netdev->rxs) {
-        xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs,
-                               netdev->rx_ring_ref);
+        xen_device_unmap_grant_refs(xendev, netdev->rxs,
+                                    &netdev->rx_ring_ref, 1, errp);
         netdev->rxs = NULL;
     }
 }
 
-static void net_event(struct XenLegacyDevice *xendev)
+/* -------------------------------------------------------------------- */
+
+
+static void xen_netdev_frontend_changed(XenDevice *xendev,
+                                       enum xenbus_state frontend_state,
+                                       Error **errp)
 {
-    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
-    net_tx_packets(netdev);
-    qemu_flush_queued_packets(qemu_get_queue(netdev->nic));
+    ERRP_GUARD();
+    enum xenbus_state backend_state = xen_device_backend_get_state(xendev);
+
+    trace_xen_netdev_frontend_changed(xendev->name, frontend_state);
+
+    switch (frontend_state) {
+    case XenbusStateConnected:
+        if (backend_state == XenbusStateConnected) {
+            break;
+        }
+
+        xen_netdev_disconnect(xendev, errp);
+        if (*errp) {
+            break;
+        }
+
+        if (!xen_netdev_connect(xendev, errp)) {
+            xen_netdev_disconnect(xendev, NULL);
+            xen_device_backend_set_state(xendev, XenbusStateClosing);
+            break;
+        }
+
+        xen_device_backend_set_state(xendev, XenbusStateConnected);
+        break;
+
+    case XenbusStateClosing:
+        xen_device_backend_set_state(xendev, XenbusStateClosing);
+        break;
+
+    case XenbusStateClosed:
+    case XenbusStateUnknown:
+        xen_netdev_disconnect(xendev, errp);
+        if (*errp) {
+            break;
+        }
+
+        xen_device_backend_set_state(xendev, XenbusStateClosed);
+        break;
+
+    case XenbusStateInitialised:
+        /*
+         * Linux netback does nothing on the frontend going (back) to
+         * XenbusStateInitialised, so do the same here.
+         */
+    default:
+        break;
+    }
 }
 
-static int net_free(struct XenLegacyDevice *xendev)
+static char *xen_netdev_get_name(XenDevice *xendev, Error **errp)
 {
-    struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
+    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
+
+    if (netdev->dev == -1) {
+        XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
+        char fe_path[XENSTORE_ABS_PATH_MAX + 1];
+        int idx = (xen_mode == XEN_EMULATE) ? 0 : 1;
+        char *value;
+
+        /* Theoretically we could go up to INT_MAX here but that's overkill */
+        while (idx < 100) {
+            snprintf(fe_path, sizeof(fe_path),
+                     "/local/domain/%u/device/vif/%u",
+                     xendev->frontend_id, idx);
+            value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL);
+            if (!value) {
+                if (errno == ENOENT) {
+                    netdev->dev = idx;
+                    goto found;
+                }
+                error_setg(errp, "cannot read %s: %s", fe_path,
+                           strerror(errno));
+                return NULL;
+            }
+            free(value);
+            idx++;
+        }
+        error_setg(errp, "cannot find device index for netdev device");
+        return NULL;
+    }
+ found:
+    return g_strdup_printf("%u", netdev->dev);
+}
+
+static void xen_netdev_unrealize(XenDevice *xendev)
+{
+    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
+
+    trace_xen_netdev_unrealize(netdev->dev);
+
+    /* Disconnect from the frontend in case this has not already happened */
+    xen_netdev_disconnect(xendev, NULL);
 
     if (netdev->nic) {
         qemu_del_nic(netdev->nic);
-        netdev->nic = NULL;
     }
-    g_free(netdev->mac);
-    netdev->mac = NULL;
-    return 0;
 }
 
 /* ------------------------------------------------------------- */
 
-struct XenDevOps xen_netdev_ops = {
-    .size       = sizeof(struct XenNetDev),
-    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
-    .init       = net_init,
-    .initialise    = net_connect,
-    .event      = net_event,
-    .disconnect = net_disconnect,
-    .free       = net_free,
+static Property xen_netdev_properties[] = {
+    DEFINE_NIC_PROPERTIES(XenNetDev, conf),
+    DEFINE_PROP_INT32("idx", XenNetDev, dev, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xen_netdev_class_init(ObjectClass *class, void *data)
+{
+    DeviceClass *dev_class = DEVICE_CLASS(class);
+    XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);
+
+    xendev_class->backend = "qnic";
+    xendev_class->device = "vif";
+    xendev_class->get_name = xen_netdev_get_name;
+    xendev_class->realize = xen_netdev_realize;
+    xendev_class->frontend_changed = xen_netdev_frontend_changed;
+    xendev_class->unrealize = xen_netdev_unrealize;
+    set_bit(DEVICE_CATEGORY_NETWORK, dev_class->categories);
+    dev_class->user_creatable = true;
+
+    device_class_set_props(dev_class, xen_netdev_properties);
+}
+
+static const TypeInfo xen_net_type_info = {
+    .name = TYPE_XEN_NET_DEVICE,
+    .parent = TYPE_XEN_DEVICE,
+    .instance_size = sizeof(XenNetDev),
+    .class_init = xen_netdev_class_init,
+};
+
+static void xen_net_register_types(void)
+{
+    type_register_static(&xen_net_type_info);
+}
+
+type_init(xen_net_register_types)
+
+/* Called to instantiate a XenNetDev when the backend is detected. */
+static void xen_net_device_create(XenBackendInstance *backend,
+                                  QDict *opts, Error **errp)
+{
+    ERRP_GUARD();
+    XenBus *xenbus = xen_backend_get_bus(backend);
+    const char *name = xen_backend_get_name(backend);
+    XenDevice *xendev = NULL;
+    unsigned long number;
+    const char *macstr;
+    XenNetDev *net;
+    MACAddr mac;
+
+    if (qemu_strtoul(name, NULL, 10, &number) || number >= INT_MAX) {
+        error_setg(errp, "failed to parse name '%s'", name);
+        goto fail;
+    }
+
+    trace_xen_netdev_create(number);
+
+    macstr = qdict_get_try_str(opts, "mac");
+    if (macstr == NULL) {
+        error_setg(errp, "no MAC address found");
+        goto fail;
+    }
+
+    if (net_parse_macaddr(mac.a, macstr) < 0) {
+        error_setg(errp, "failed to parse MAC address");
+        goto fail;
+    }
+
+    xendev = XEN_DEVICE(qdev_new(TYPE_XEN_NET_DEVICE));
+    net = XEN_NET_DEVICE(xendev);
+
+    net->dev = number;
+    memcpy(&net->conf.macaddr, &mac, sizeof(mac));
+
+    if (qdev_realize_and_unref(DEVICE(xendev), BUS(xenbus), errp)) {
+        xen_backend_set_device(backend, xendev);
+        return;
+    }
+
+    error_prepend(errp, "realization of net device %lu failed: ",
+                  number);
+
+ fail:
+    if (xendev) {
+        object_unparent(OBJECT(xendev));
+    }
+}
+
+static void xen_net_device_destroy(XenBackendInstance *backend,
+                                   Error **errp)
+{
+    ERRP_GUARD();
+    XenDevice *xendev = xen_backend_get_device(backend);
+    XenNetDev *netdev = XEN_NET_DEVICE(xendev);
+
+    trace_xen_netdev_destroy(netdev->dev);
+
+    object_unparent(OBJECT(xendev));
+}
+
+static const XenBackendInfo xen_net_backend_info  = {
+    .type = "qnic",
+    .create = xen_net_device_create,
+    .destroy = xen_net_device_destroy,
 };
+
+static void xen_net_register_backend(void)
+{
+    xen_backend_register(&xen_net_backend_info);
+}
+
+xen_backend_init(xen_net_register_backend);
diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c
index 4b2d7caf2d..bd226581af 100644
--- a/hw/pci-host/astro.c
+++ b/hw/pci-host/astro.c
@@ -19,6 +19,8 @@
 
 #define TYPE_ASTRO_IOMMU_MEMORY_REGION "astro-iommu-memory-region"
 
+#define F_EXTEND(addr) ((addr) | MAKE_64BIT_MASK(32, 32))
+
 #include "qemu/osdep.h"
 #include "qemu/module.h"
 #include "qemu/units.h"
@@ -345,6 +347,10 @@ static AddressSpace *elroy_pcihost_set_iommu(PCIBus *bus, void *opaque,
     return &s->astro->iommu_as;
 }
 
+static const PCIIOMMUOps elroy_pcihost_iommu_ops = {
+    .get_address_space = elroy_pcihost_set_iommu,
+};
+
 /*
  * Encoding in IOSAPIC:
  * base_addr == 0xfffa0000, we want to get 0xa0ff0000.
@@ -382,7 +388,7 @@ static void elroy_set_irq(void *opaque, int irq, int level)
         uint32_t ena = bit & ~old_ilr;
         s->ilr = old_ilr | bit;
         if (ena != 0) {
-            stl_be_phys(&address_space_memory, cpu_hpa, val & 63);
+            stl_be_phys(&address_space_memory, F_EXTEND(cpu_hpa), val & 63);
         }
     } else {
         s->ilr = old_ilr & ~bit;
@@ -821,20 +827,21 @@ static void astro_realize(DeviceState *obj, Error **errp)
 
         /* map elroys mmio */
         map_size = LMMIO_DIST_BASE_SIZE / ROPES_PER_IOC;
-        map_addr = (uint32_t) (LMMIO_DIST_BASE_ADDR + rope * map_size);
+        map_addr = F_EXTEND(LMMIO_DIST_BASE_ADDR + rope * map_size);
         memory_region_init_alias(&elroy->pci_mmio_alias, OBJECT(elroy),
                                  "pci-mmio-alias",
-                                 &elroy->pci_mmio, map_addr, map_size);
+                                 &elroy->pci_mmio, (uint32_t) map_addr, map_size);
         memory_region_add_subregion(get_system_memory(), map_addr,
                                  &elroy->pci_mmio_alias);
 
+        /* map elroys io */
         map_size = IOS_DIST_BASE_SIZE / ROPES_PER_IOC;
-        map_addr = (uint32_t) (IOS_DIST_BASE_ADDR + rope * map_size);
+        map_addr = F_EXTEND(IOS_DIST_BASE_ADDR + rope * map_size);
         memory_region_add_subregion(get_system_memory(), map_addr,
                                  &elroy->pci_io);
 
         /* Host memory as seen from the PCI side, via the IOMMU.  */
-        pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, elroy_pcihost_set_iommu,
+        pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, &elroy_pcihost_iommu_ops,
                                  elroy);
     }
 }
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index 6f5442f108..f477f97847 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -663,6 +663,10 @@ static AddressSpace *designware_pcie_host_set_iommu(PCIBus *bus, void *opaque,
     return &s->pci.address_space;
 }
 
+static const PCIIOMMUOps designware_iommu_ops = {
+    .get_address_space = designware_pcie_host_set_iommu,
+};
+
 static void designware_pcie_host_realize(DeviceState *dev, Error **errp)
 {
     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
@@ -705,7 +709,7 @@ static void designware_pcie_host_realize(DeviceState *dev, Error **errp)
     address_space_init(&s->pci.address_space,
                        &s->pci.address_space_root,
                        "pcie-bus-address-space");
-    pci_setup_iommu(pci->bus, designware_pcie_host_set_iommu, s);
+    pci_setup_iommu(pci->bus, &designware_iommu_ops, s);
 
     qdev_realize(DEVICE(&s->root), BUS(pci->bus), &error_fatal);
 }
diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c
index 82503229fa..5b0947a16c 100644
--- a/hw/pci-host/dino.c
+++ b/hw/pci-host/dino.c
@@ -354,6 +354,10 @@ static AddressSpace *dino_pcihost_set_iommu(PCIBus *bus, void *opaque,
     return &s->bm_as;
 }
 
+static const PCIIOMMUOps dino_iommu_ops = {
+    .get_address_space = dino_pcihost_set_iommu,
+};
+
 /*
  * Dino interrupts are connected as shown on Page 78, Table 23
  * (Little-endian bit numbers)
@@ -481,7 +485,7 @@ static void dino_pcihost_init(Object *obj)
         g_free(name);
     }
 
-    pci_setup_iommu(phb->bus, dino_pcihost_set_iommu, s);
+    pci_setup_iommu(phb->bus, &dino_iommu_ops, s);
 
     sysbus_init_mmio(sbd, &s->this_mem);
 
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index c5e58f4086..2a74dbe45f 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -968,6 +968,10 @@ static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &ds->dma_as;
 }
 
+static PCIIOMMUOps pnv_phb3_iommu_ops = {
+    .get_address_space = pnv_phb3_dma_iommu,
+};
+
 static void pnv_phb3_instance_init(Object *obj)
 {
     PnvPHB3 *phb = PNV_PHB3(obj);
@@ -1012,7 +1016,7 @@ void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb)
     object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id,
                             &error_abort);
 
-    pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+    pci_setup_iommu(pci->bus, &pnv_phb3_iommu_ops, phb);
 }
 
 static void pnv_phb3_realize(DeviceState *dev, Error **errp)
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 29cb11a5d9..37c7afc18c 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1518,6 +1518,10 @@ static void pnv_phb4_xscom_realize(PnvPHB4 *phb)
                             &phb->phb_regs_mr);
 }
 
+static PCIIOMMUOps pnv_phb4_iommu_ops = {
+    .get_address_space = pnv_phb4_dma_iommu,
+};
+
 static void pnv_phb4_instance_init(Object *obj)
 {
     PnvPHB4 *phb = PNV_PHB4(obj);
@@ -1557,7 +1561,7 @@ void pnv_phb4_bus_init(DeviceState *dev, PnvPHB4 *phb)
     object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id,
                             &error_abort);
 
-    pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
+    pci_setup_iommu(pci->bus, &pnv_phb4_iommu_ops, phb);
     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
 }
 
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index 38814247f2..453a4e6ed3 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -435,6 +435,10 @@ static AddressSpace *e500_pcihost_set_iommu(PCIBus *bus, void *opaque,
     return &s->bm_as;
 }
 
+static const PCIIOMMUOps ppce500_iommu_ops = {
+    .get_address_space = e500_pcihost_set_iommu,
+};
+
 static void e500_pcihost_realize(DeviceState *dev, Error **errp)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
@@ -469,7 +473,7 @@ static void e500_pcihost_realize(DeviceState *dev, Error **errp)
     memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX);
     memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
     address_space_init(&s->bm_as, &s->bm, "pci-bm");
-    pci_setup_iommu(b, e500_pcihost_set_iommu, s);
+    pci_setup_iommu(b, &ppce500_iommu_ops, s);
 
     pci_create_simple(b, 0, "e500-host-bridge");
 
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
index 9a11ac4b2b..86c3a49087 100644
--- a/hw/pci-host/raven.c
+++ b/hw/pci-host/raven.c
@@ -223,6 +223,10 @@ static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque,
     return &s->bm_as;
 }
 
+static const PCIIOMMUOps raven_iommu_ops = {
+    .get_address_space = raven_pcihost_set_iommu,
+};
+
 static void raven_change_gpio(void *opaque, int n, int level)
 {
     PREPPCIState *s = opaque;
@@ -320,7 +324,7 @@ static void raven_pcihost_initfn(Object *obj)
     memory_region_add_subregion(&s->bm, 0         , &s->bm_pci_memory_alias);
     memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias);
     address_space_init(&s->bm_as, &s->bm, "raven-bm");
-    pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s);
+    pci_setup_iommu(&s->pci_bus, &raven_iommu_ops, s);
 
     h->bus = &s->pci_bus;
 
diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c
index dcb2e230b6..d0851b48b0 100644
--- a/hw/pci-host/sabre.c
+++ b/hw/pci-host/sabre.c
@@ -112,6 +112,10 @@ static AddressSpace *sabre_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &is->iommu_as;
 }
 
+static const PCIIOMMUOps sabre_iommu_ops = {
+    .get_address_space = sabre_pci_dma_iommu,
+};
+
 static void sabre_config_write(void *opaque, hwaddr addr,
                                uint64_t val, unsigned size)
 {
@@ -384,7 +388,7 @@ static void sabre_realize(DeviceState *dev, Error **errp)
     /* IOMMU */
     memory_region_add_subregion_overlap(&s->sabre_config, 0x200,
                     sysbus_mmio_get_region(SYS_BUS_DEVICE(s->iommu), 0), 1);
-    pci_setup_iommu(phb->bus, sabre_pci_dma_iommu, s->iommu);
+    pci_setup_iommu(phb->bus, &sabre_iommu_ops, s->iommu);
 
     /* APB secondary busses */
     pci_dev = pci_new_multifunction(PCI_DEVFN(1, 0), TYPE_SIMBA_PCI_BRIDGE);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 885c04b6f5..c49417abb2 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2678,7 +2678,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
     PCIBus *iommu_bus = bus;
     uint8_t devfn = dev->devfn;
 
-    while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
+    while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {
         PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
 
         /*
@@ -2717,15 +2717,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
 
         iommu_bus = parent_bus;
     }
-    if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
-        return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
+    if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+        return iommu_bus->iommu_ops->get_address_space(bus,
+                                 iommu_bus->iommu_opaque, devfn);
     }
     return &address_space_memory;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
-    bus->iommu_fn = fn;
+    /*
+     * If called, pci_setup_iommu() should provide a minimum set of
+     * useful callbacks for the bus.
+     */
+    assert(ops);
+    assert(ops->get_address_space);
+
+    bus->iommu_ops = ops;
     bus->iommu_opaque = opaque;
 }
 
diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index 672090de94..df4ee374d0 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -449,6 +449,10 @@ static AddressSpace *ppc440_pcix_set_iommu(PCIBus *b, void *opaque, int devfn)
     return &s->bm_as;
 }
 
+static const PCIIOMMUOps ppc440_iommu_ops = {
+    .get_address_space = ppc440_pcix_set_iommu,
+};
+
 /*
  * Some guests on sam460ex write all kinds of garbage here such as
  * missing enable bit and low bits set and still expect this to work
@@ -503,7 +507,7 @@ static void ppc440_pcix_realize(DeviceState *dev, Error **errp)
     memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX);
     memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
     address_space_init(&s->bm_as, &s->bm, "pci-bm");
-    pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s);
+    pci_setup_iommu(h->bus, &ppc440_iommu_ops, s);
 
     memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE);
     memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops,
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 370c5a90f2..a27024e45a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -780,6 +780,10 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &phb->iommu_as;
 }
 
+static const PCIIOMMUOps spapr_iommu_ops = {
+    .get_address_space = spapr_pci_dma_iommu,
+};
+
 static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb,  PCIDevice *pdev)
 {
     g_autofree char *path = NULL;
@@ -1978,7 +1982,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
                                 &sphb->msiwindow);
 
-    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
+    pci_setup_iommu(bus, &spapr_iommu_ops, sphb);
 
     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
 
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 9016720547..f283f7e38d 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -18,14 +18,112 @@
  */
 
 #include "qemu/osdep.h"
+#include <sys/ioctl.h>
 #include <linux/vfio.h>
 #include "hw/ppc/spapr.h"
 #include "hw/pci-host/spapr.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci_device.h"
-#include "hw/vfio/vfio.h"
+#include "hw/vfio/vfio-common.h"
 #include "qemu/error-report.h"
 
+/*
+ * Interfaces for IBM EEH (Enhanced Error Handling)
+ */
+static bool vfio_eeh_container_ok(VFIOContainer *container)
+{
+    /*
+     * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
+     * implementation is broken if there are multiple groups in a
+     * container.  The hardware works in units of Partitionable
+     * Endpoints (== IOMMU groups) and the EEH operations naively
+     * iterate across all groups in the container, without any logic
+     * to make sure the groups have their state synchronized.  For
+     * certain operations (ENABLE) that might be ok, until an error
+     * occurs, but for others (GET_STATE) it's clearly broken.
+     */
+
+    /*
+     * XXX Once fixed kernels exist, test for them here
+     */
+
+    if (QLIST_EMPTY(&container->group_list)) {
+        return false;
+    }
+
+    if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
+        return false;
+    }
+
+    return true;
+}
+
+static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+{
+    struct vfio_eeh_pe_op pe_op = {
+        .argsz = sizeof(pe_op),
+        .op = op,
+    };
+    int ret;
+
+    if (!vfio_eeh_container_ok(container)) {
+        error_report("vfio/eeh: EEH_PE_OP 0x%x: "
+                     "kernel requires a container with exactly one group", op);
+        return -EPERM;
+    }
+
+    ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
+    if (ret < 0) {
+        error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
+        return -errno;
+    }
+
+    return ret;
+}
+
+static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
+{
+    VFIOAddressSpace *space = vfio_get_address_space(as);
+    VFIOContainer *container = NULL;
+
+    if (QLIST_EMPTY(&space->containers)) {
+        /* No containers to act on */
+        goto out;
+    }
+
+    container = QLIST_FIRST(&space->containers);
+
+    if (QLIST_NEXT(container, next)) {
+        /*
+         * We don't yet have logic to synchronize EEH state across
+         * multiple containers
+         */
+        container = NULL;
+        goto out;
+    }
+
+out:
+    vfio_put_address_space(space);
+    return container;
+}
+
+static bool vfio_eeh_as_ok(AddressSpace *as)
+{
+    VFIOContainer *container = vfio_eeh_as_container(as);
+
+    return (container != NULL) && vfio_eeh_container_ok(container);
+}
+
+static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
+{
+    VFIOContainer *container = vfio_eeh_as_container(as);
+
+    if (!container) {
+        return -ENODEV;
+    }
+    return vfio_eeh_container_op(container, op);
+}
+
 bool spapr_phb_eeh_available(SpaprPhbState *sphb)
 {
     return vfio_eeh_as_ok(&sphb->iommu_as);
diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
index 1391dd712c..7c56aad0fc 100644
--- a/hw/remote/iommu.c
+++ b/hw/remote/iommu.c
@@ -100,6 +100,10 @@ static void remote_iommu_finalize(Object *obj)
     iommu->elem_by_devfn = NULL;
 }
 
+static const PCIIOMMUOps remote_iommu_ops = {
+    .get_address_space = remote_iommu_find_add_as,
+};
+
 void remote_iommu_setup(PCIBus *pci_bus)
 {
     RemoteIommu *iommu = NULL;
@@ -108,7 +112,7 @@ void remote_iommu_setup(PCIBus *pci_bus)
 
     iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU));
 
-    pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu);
+    pci_setup_iommu(pci_bus, &remote_iommu_ops, iommu);
 
     object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu));
 
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 52bf8e67de..0ffca05189 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -414,7 +414,7 @@ void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts
         reset_vec[4] = 0x0182b283;   /*     ld     t0, 24(t0) */
     }
 
-    if (!harts->harts[0].cfg.ext_icsr) {
+    if (!harts->harts[0].cfg.ext_zicsr) {
         /*
          * The Zicsr extension has been disabled, so let's ensure we don't
          * run the CSR instruction. Let's fill the address with a non
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 9de578c756..c7fc97e273 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -263,12 +263,12 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
         qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
         g_free(name);
 
-        if (cpu_ptr->cfg.ext_icbom) {
+        if (cpu_ptr->cfg.ext_zicbom) {
             qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
                                   cpu_ptr->cfg.cbom_blocksize);
         }
 
-        if (cpu_ptr->cfg.ext_icboz) {
+        if (cpu_ptr->cfg.ext_zicboz) {
             qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size",
                                   cpu_ptr->cfg.cboz_blocksize);
         }
@@ -722,7 +722,7 @@ static void create_fdt_pmu(RISCVVirtState *s)
     pmu_name = g_strdup_printf("/pmu");
     qemu_fdt_add_subnode(ms->fdt, pmu_name);
     qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu");
-    riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name);
+    riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
 
     g_free(pmu_name);
 }
diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index c27c362db9..2d391a8396 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -599,7 +599,7 @@ static void rtc_get_time(MC146818RtcState *s, struct tm *tm)
 
 static void rtc_set_time(MC146818RtcState *s)
 {
-    struct tm tm;
+    struct tm tm = {};
     g_autofree const char *qom_path = object_get_canonical_path(OBJECT(s));
 
     rtc_get_time(s, &tm);
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 2ca36f9f3b..347580ebac 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -652,6 +652,10 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &iommu->as;
 }
 
+static const PCIIOMMUOps s390_iommu_ops = {
+    .get_address_space = s390_pci_dma_iommu,
+};
+
 static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
 {
     uint8_t expected, actual;
@@ -839,7 +843,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
     b = pci_register_root_bus(dev, NULL, s390_pci_set_irq, s390_pci_map_irq,
                               NULL, get_system_memory(), get_system_io(), 0,
                               64, TYPE_PCI_BUS);
-    pci_setup_iommu(b, s390_pci_dma_iommu, s);
+    pci_setup_iommu(b, &s390_iommu_ops, s);
 
     bus = BUS(b);
     qbus_set_hotplug_handler(bus, OBJECT(dev));
@@ -1058,7 +1062,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
         pdev = PCI_DEVICE(dev);
         pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq);
-        pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s);
+        pci_setup_iommu(&pb->sec_bus, &s390_iommu_ops, s);
 
         qbus_set_hotplug_handler(BUS(&pb->sec_bus), OBJECT(s));
 
diff --git a/hw/ssi/ibex_spi_host.c b/hw/ssi/ibex_spi_host.c
index 1ee7d88c22..c300ec294d 100644
--- a/hw/ssi/ibex_spi_host.c
+++ b/hw/ssi/ibex_spi_host.c
@@ -205,9 +205,10 @@ static void ibex_spi_host_irq(IbexSPIHostState *s)
         if (err_irq) {
             s->regs[IBEX_SPI_HOST_INTR_STATE] |= R_INTR_STATE_ERROR_MASK;
         }
-        qemu_set_irq(s->host_err, err_irq);
     }
 
+    qemu_set_irq(s->host_err, err_irq);
+
     /* Event IRQ Enabled and Event IRQ Cleared */
     if (event_en && !status_pending) {
         if (FIELD_EX32(intr_test_reg, INTR_STATE,  SPI_EVENT)) {
@@ -229,8 +230,9 @@ static void ibex_spi_host_irq(IbexSPIHostState *s)
         if (event_irq) {
             s->regs[IBEX_SPI_HOST_INTR_STATE] |= R_INTR_STATE_SPI_EVENT_MASK;
         }
-        qemu_set_irq(s->event, event_irq);
     }
+
+    qemu_set_irq(s->event, event_irq);
 }
 
 static void ibex_spi_host_transfer(IbexSPIHostState *s)
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 5f257bffb9..bbf69ff55a 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -14,7 +14,6 @@
 #include <linux/vfio.h>
 #include <sys/ioctl.h>
 #include "qapi/error.h"
-#include "hw/vfio/vfio.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/s390x/ap-device.h"
 #include "qemu/error-report.h"
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 6623ae237b..d857bb8d0f 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -20,7 +20,6 @@
 #include <sys/ioctl.h>
 
 #include "qapi/error.h"
-#include "hw/vfio/vfio.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/s390x/s390-ccw.h"
 #include "hw/s390x/vfio-ccw.h"
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index d806057b40..e70fdf5e0c 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -26,7 +26,6 @@
 #include <linux/vfio.h>
 
 #include "hw/vfio/vfio-common.h"
-#include "hw/vfio/vfio.h"
 #include "hw/vfio/pci.h"
 #include "exec/address-spaces.h"
 #include "exec/memory.h"
@@ -246,44 +245,6 @@ bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
     return true;
 }
 
-void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
-                       hwaddr max_iova, uint64_t iova_pgsizes)
-{
-    VFIOHostDMAWindow *hostwin;
-
-    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
-        if (ranges_overlap(hostwin->min_iova,
-                           hostwin->max_iova - hostwin->min_iova + 1,
-                           min_iova,
-                           max_iova - min_iova + 1)) {
-            hw_error("%s: Overlapped IOMMU are not enabled", __func__);
-        }
-    }
-
-    hostwin = g_malloc0(sizeof(*hostwin));
-
-    hostwin->min_iova = min_iova;
-    hostwin->max_iova = max_iova;
-    hostwin->iova_pgsizes = iova_pgsizes;
-    QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
-}
-
-int vfio_host_win_del(VFIOContainer *container,
-                      hwaddr min_iova, hwaddr max_iova)
-{
-    VFIOHostDMAWindow *hostwin;
-
-    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
-        if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
-            QLIST_REMOVE(hostwin, hostwin_next);
-            g_free(hostwin);
-            return 0;
-        }
-    }
-
-    return -1;
-}
-
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
     return (!memory_region_is_ram(section->mr) &&
@@ -532,22 +493,6 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
     g_free(vrdl);
 }
 
-static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
-                                            hwaddr iova, hwaddr end)
-{
-    VFIOHostDMAWindow *hostwin;
-    bool hostwin_found = false;
-
-    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
-        if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
-            hostwin_found = true;
-            break;
-        }
-    }
-
-    return hostwin_found ? hostwin : NULL;
-}
-
 static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
 {
     MemoryRegion *mr = section->mr;
@@ -626,7 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
     Int128 llend, llsize;
     void *vaddr;
     int ret;
-    VFIOHostDMAWindow *hostwin;
     Error *err = NULL;
 
     if (!vfio_listener_valid_section(section, "region_add")) {
@@ -648,13 +592,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
         goto fail;
     }
 
-    hostwin = vfio_find_hostwin(container, iova, end);
-    if (!hostwin) {
-        error_setg(&err, "Container %p can't map guest IOVA region"
-                   " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
-        goto fail;
-    }
-
     memory_region_ref(section->mr);
 
     if (memory_region_is_iommu(section->mr)) {
@@ -693,6 +630,15 @@ static void vfio_listener_region_add(MemoryListener *listener,
             goto fail;
         }
 
+        if (container->iova_ranges) {
+            ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
+                    container->iova_ranges, &err);
+            if (ret) {
+                g_free(giommu);
+                goto fail;
+            }
+        }
+
         ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
                                                     &err);
         if (ret) {
@@ -726,7 +672,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
     llsize = int128_sub(llend, int128_make64(iova));
 
     if (memory_region_is_ram_device(section->mr)) {
-        hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+        hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
 
         if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
             trace_vfio_listener_region_add_no_dma_map(
@@ -825,12 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
 
     if (memory_region_is_ram_device(section->mr)) {
         hwaddr pgmask;
-        VFIOHostDMAWindow *hostwin;
-
-        hostwin = vfio_find_hostwin(container, iova, end);
-        assert(hostwin); /* or region_add() would have failed */
 
-        pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+        pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
         try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
     } else if (memory_region_has_ram_discard_manager(section->mr)) {
         vfio_unregister_ram_discard_listener(container, section);
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index adc467210f..242010036a 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -20,20 +20,15 @@
 
 #include "qemu/osdep.h"
 #include <sys/ioctl.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm.h>
-#endif
 #include <linux/vfio.h>
 
 #include "hw/vfio/vfio-common.h"
-#include "hw/vfio/vfio.h"
 #include "exec/address-spaces.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
 #include "hw/hw.h"
 #include "qemu/error-report.h"
 #include "qemu/range.h"
-#include "sysemu/kvm.h"
 #include "sysemu/reset.h"
 #include "trace.h"
 #include "qapi/error.h"
@@ -205,92 +200,6 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova,
     return -errno;
 }
 
-int vfio_container_add_section_window(VFIOContainer *container,
-                                      MemoryRegionSection *section,
-                                      Error **errp)
-{
-    VFIOHostDMAWindow *hostwin;
-    hwaddr pgsize = 0;
-    int ret;
-
-    if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
-        return 0;
-    }
-
-    /* For now intersections are not allowed, we may relax this later */
-    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
-        if (ranges_overlap(hostwin->min_iova,
-                           hostwin->max_iova - hostwin->min_iova + 1,
-                           section->offset_within_address_space,
-                           int128_get64(section->size))) {
-            error_setg(errp,
-                "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
-                "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
-                section->offset_within_address_space,
-                section->offset_within_address_space +
-                    int128_get64(section->size) - 1,
-                hostwin->min_iova, hostwin->max_iova);
-            return -EINVAL;
-        }
-    }
-
-    ret = vfio_spapr_create_window(container, section, &pgsize);
-    if (ret) {
-        error_setg_errno(errp, -ret, "Failed to create SPAPR window");
-        return ret;
-    }
-
-    vfio_host_win_add(container, section->offset_within_address_space,
-                      section->offset_within_address_space +
-                      int128_get64(section->size) - 1, pgsize);
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        VFIOGroup *group;
-        IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
-        struct kvm_vfio_spapr_tce param;
-        struct kvm_device_attr attr = {
-            .group = KVM_DEV_VFIO_GROUP,
-            .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
-            .addr = (uint64_t)(unsigned long)&param,
-        };
-
-        if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
-                                          &param.tablefd)) {
-            QLIST_FOREACH(group, &container->group_list, container_next) {
-                param.groupfd = group->fd;
-                if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
-                    error_setg_errno(errp, errno,
-                                     "vfio: failed GROUP_SET_SPAPR_TCE for "
-                                     "KVM VFIO device %d and group fd %d",
-                                     param.tablefd, param.groupfd);
-                    return -errno;
-                }
-                trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
-            }
-        }
-    }
-#endif
-    return 0;
-}
-
-void vfio_container_del_section_window(VFIOContainer *container,
-                                       MemoryRegionSection *section)
-{
-    if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
-        return;
-    }
-
-    vfio_spapr_remove_window(container,
-                             section->offset_within_address_space);
-    if (vfio_host_win_del(container,
-                          section->offset_within_address_space,
-                          section->offset_within_address_space +
-                          int128_get64(section->size) - 1) < 0) {
-        hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
-                 __func__, section->offset_within_address_space);
-    }
-}
-
 int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
 {
     int ret;
@@ -355,14 +264,6 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
     return ret;
 }
 
-static void vfio_listener_release(VFIOContainer *container)
-{
-    memory_listener_unregister(&container->listener);
-    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
-        memory_listener_unregister(&container->prereg_listener);
-    }
-}
-
 static struct vfio_info_cap_header *
 vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
 {
@@ -382,7 +283,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
     /* If the capability cannot be found, assume no DMA limiting */
     hdr = vfio_get_iommu_type1_info_cap(info,
                                         VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL);
-    if (hdr == NULL) {
+    if (!hdr) {
         return false;
     }
 
@@ -394,6 +295,32 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
     return true;
 }
 
+static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
+                                     VFIOContainer *container)
+{
+    struct vfio_info_cap_header *hdr;
+    struct vfio_iommu_type1_info_cap_iova_range *cap;
+
+    hdr = vfio_get_iommu_type1_info_cap(info,
+                                        VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+    if (!hdr) {
+        return false;
+    }
+
+    cap = (void *)hdr;
+
+    for (int i = 0; i < cap->nr_iovas; i++) {
+        Range *range = g_new(Range, 1);
+
+        range_set_bounds(range, cap->iova_ranges[i].start,
+                         cap->iova_ranges[i].end);
+        container->iova_ranges =
+            range_list_insert(container->iova_ranges, range);
+    }
+
+    return true;
+}
+
 static void vfio_kvm_device_add_group(VFIOGroup *group)
 {
     Error *err = NULL;
@@ -535,6 +462,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
     }
 }
 
+static void vfio_free_container(VFIOContainer *container)
+{
+    g_list_free_full(container->iova_ranges, g_free);
+    g_free(container);
+}
+
 static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
                                   Error **errp)
 {
@@ -616,8 +549,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     container->error = NULL;
     container->dirty_pages_supported = false;
     container->dma_max_mappings = 0;
+    container->iova_ranges = NULL;
     QLIST_INIT(&container->giommu_list);
-    QLIST_INIT(&container->hostwin_list);
     QLIST_INIT(&container->vrdl_list);
 
     ret = vfio_init_container(container, group->fd, errp);
@@ -652,84 +585,21 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
             container->dma_max_mappings = 65535;
         }
-        vfio_get_iommu_info_migration(container, info);
-        g_free(info);
 
-        /*
-         * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
-         * information to get the actual window extent rather than assume
-         * a 64-bit IOVA address space.
-         */
-        vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes);
+        vfio_get_info_iova_range(info, container);
 
+        vfio_get_iommu_info_migration(container, info);
+        g_free(info);
         break;
     }
     case VFIO_SPAPR_TCE_v2_IOMMU:
     case VFIO_SPAPR_TCE_IOMMU:
     {
-        struct vfio_iommu_spapr_tce_info info;
-        bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
-
-        /*
-         * The host kernel code implementing VFIO_IOMMU_DISABLE is called
-         * when container fd is closed so we do not call it explicitly
-         * in this file.
-         */
-        if (!v2) {
-            ret = ioctl(fd, VFIO_IOMMU_ENABLE);
-            if (ret) {
-                error_setg_errno(errp, errno, "failed to enable container");
-                ret = -errno;
-                goto enable_discards_exit;
-            }
-        } else {
-            container->prereg_listener = vfio_prereg_listener;
-
-            memory_listener_register(&container->prereg_listener,
-                                     &address_space_memory);
-            if (container->error) {
-                memory_listener_unregister(&container->prereg_listener);
-                ret = -1;
-                error_propagate_prepend(errp, container->error,
-                    "RAM memory listener initialization failed: ");
-                goto enable_discards_exit;
-            }
-        }
-
-        info.argsz = sizeof(info);
-        ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+        ret = vfio_spapr_container_init(container, errp);
         if (ret) {
-            error_setg_errno(errp, errno,
-                             "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
-            ret = -errno;
-            if (v2) {
-                memory_listener_unregister(&container->prereg_listener);
-            }
             goto enable_discards_exit;
         }
-
-        if (v2) {
-            container->pgsizes = info.ddw.pgsizes;
-            /*
-             * There is a default window in just created container.
-             * To make region_add/del simpler, we better remove this
-             * window now and let those iommu_listener callbacks
-             * create/remove them when needed.
-             */
-            ret = vfio_spapr_remove_window(container, info.dma32_window_start);
-            if (ret) {
-                error_setg_errno(errp, -ret,
-                                 "failed to remove existing window");
-                goto enable_discards_exit;
-            }
-        } else {
-            /* The default table uses 4K pages */
-            container->pgsizes = 0x1000;
-            vfio_host_win_add(container, info.dma32_window_start,
-                              info.dma32_window_start +
-                              info.dma32_window_size - 1,
-                              0x1000);
-        }
+        break;
     }
     }
 
@@ -759,13 +629,17 @@ listener_release_exit:
     QLIST_REMOVE(group, container_next);
     QLIST_REMOVE(container, next);
     vfio_kvm_device_del_group(group);
-    vfio_listener_release(container);
+    memory_listener_unregister(&container->listener);
+    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
+        container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
+        vfio_spapr_container_deinit(container);
+    }
 
 enable_discards_exit:
     vfio_ram_block_discard_disable(container, false);
 
 free_container_exit:
-    g_free(container);
+    vfio_free_container(container);
 
 close_fd_exit:
     close(fd);
@@ -789,7 +663,11 @@ static void vfio_disconnect_container(VFIOGroup *group)
      * group.
      */
     if (QLIST_EMPTY(&container->group_list)) {
-        vfio_listener_release(container);
+        memory_listener_unregister(&container->listener);
+        if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
+            container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
+            vfio_spapr_container_deinit(container);
+        }
     }
 
     if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
@@ -800,7 +678,6 @@ static void vfio_disconnect_container(VFIOGroup *group)
     if (QLIST_EMPTY(&container->group_list)) {
         VFIOAddressSpace *space = container->space;
         VFIOGuestIOMMU *giommu, *tmp;
-        VFIOHostDMAWindow *hostwin, *next;
 
         QLIST_REMOVE(container, next);
 
@@ -811,15 +688,9 @@ static void vfio_disconnect_container(VFIOGroup *group)
             g_free(giommu);
         }
 
-        QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
-                           next) {
-            QLIST_REMOVE(hostwin, hostwin_next);
-            g_free(hostwin);
-        }
-
         trace_vfio_disconnect_container(container->fd);
         close(container->fd);
-        g_free(container);
+        vfio_free_container(container);
 
         vfio_put_address_space(space);
     }
@@ -975,103 +846,6 @@ static void vfio_put_base_device(VFIODevice *vbasedev)
     close(vbasedev->fd);
 }
 
-/*
- * Interfaces for IBM EEH (Enhanced Error Handling)
- */
-static bool vfio_eeh_container_ok(VFIOContainer *container)
-{
-    /*
-     * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
-     * implementation is broken if there are multiple groups in a
-     * container.  The hardware works in units of Partitionable
-     * Endpoints (== IOMMU groups) and the EEH operations naively
-     * iterate across all groups in the container, without any logic
-     * to make sure the groups have their state synchronized.  For
-     * certain operations (ENABLE) that might be ok, until an error
-     * occurs, but for others (GET_STATE) it's clearly broken.
-     */
-
-    /*
-     * XXX Once fixed kernels exist, test for them here
-     */
-
-    if (QLIST_EMPTY(&container->group_list)) {
-        return false;
-    }
-
-    if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
-        return false;
-    }
-
-    return true;
-}
-
-static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
-{
-    struct vfio_eeh_pe_op pe_op = {
-        .argsz = sizeof(pe_op),
-        .op = op,
-    };
-    int ret;
-
-    if (!vfio_eeh_container_ok(container)) {
-        error_report("vfio/eeh: EEH_PE_OP 0x%x: "
-                     "kernel requires a container with exactly one group", op);
-        return -EPERM;
-    }
-
-    ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
-    if (ret < 0) {
-        error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
-        return -errno;
-    }
-
-    return ret;
-}
-
-static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
-{
-    VFIOAddressSpace *space = vfio_get_address_space(as);
-    VFIOContainer *container = NULL;
-
-    if (QLIST_EMPTY(&space->containers)) {
-        /* No containers to act on */
-        goto out;
-    }
-
-    container = QLIST_FIRST(&space->containers);
-
-    if (QLIST_NEXT(container, next)) {
-        /*
-         * We don't yet have logic to synchronize EEH state across
-         * multiple containers
-         */
-        container = NULL;
-        goto out;
-    }
-
-out:
-    vfio_put_address_space(space);
-    return container;
-}
-
-bool vfio_eeh_as_ok(AddressSpace *as)
-{
-    VFIOContainer *container = vfio_eeh_as_container(as);
-
-    return (container != NULL) && vfio_eeh_container_ok(container);
-}
-
-int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
-{
-    VFIOContainer *container = vfio_eeh_as_container(as);
-
-    if (!container) {
-        return -ENODEV;
-    }
-    return vfio_eeh_container_op(container, op);
-}
-
 static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
 {
     char *tmp, group_path[PATH_MAX], *group_name;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 7e5da21b31..168847e7c5 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -23,7 +23,6 @@
 #include <sys/ioctl.h>
 
 #include "hw/vfio/vfio-common.h"
-#include "hw/vfio/vfio.h"
 #include "hw/hw.h"
 #include "trace.h"
 #include "qapi/error.h"
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b27011cee7..c62c02f7b6 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3081,7 +3081,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
     struct stat st;
     int i, ret;
     bool is_mdev;
-    char uuid[UUID_FMT_LEN];
+    char uuid[UUID_STR_LEN];
     char *name;
 
     if (!vbasedev->sysfsdev) {
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 9ec1e95f6d..83da2f7ec2 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -11,6 +11,11 @@
 #include "qemu/osdep.h"
 #include <sys/ioctl.h>
 #include <linux/vfio.h>
+#ifdef CONFIG_KVM
+#include <linux/kvm.h>
+#endif
+#include "sysemu/kvm.h"
+#include "exec/address-spaces.h"
 
 #include "hw/vfio/vfio-common.h"
 #include "hw/hw.h"
@@ -135,15 +140,90 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener,
     trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0);
 }
 
-const MemoryListener vfio_prereg_listener = {
+static const MemoryListener vfio_prereg_listener = {
     .name = "vfio-pre-reg",
     .region_add = vfio_prereg_listener_region_add,
     .region_del = vfio_prereg_listener_region_del,
 };
 
-int vfio_spapr_create_window(VFIOContainer *container,
-                             MemoryRegionSection *section,
-                             hwaddr *pgsize)
+static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
+                              hwaddr max_iova, uint64_t iova_pgsizes)
+{
+    VFIOHostDMAWindow *hostwin;
+
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (ranges_overlap(hostwin->min_iova,
+                           hostwin->max_iova - hostwin->min_iova + 1,
+                           min_iova,
+                           max_iova - min_iova + 1)) {
+            hw_error("%s: Overlapped IOMMU are not enabled", __func__);
+        }
+    }
+
+    hostwin = g_malloc0(sizeof(*hostwin));
+
+    hostwin->min_iova = min_iova;
+    hostwin->max_iova = max_iova;
+    hostwin->iova_pgsizes = iova_pgsizes;
+    QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
+}
+
+static int vfio_host_win_del(VFIOContainer *container,
+                             hwaddr min_iova, hwaddr max_iova)
+{
+    VFIOHostDMAWindow *hostwin;
+
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
+            QLIST_REMOVE(hostwin, hostwin_next);
+            g_free(hostwin);
+            return 0;
+        }
+    }
+
+    return -1;
+}
+
+static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
+                                            hwaddr iova, hwaddr end)
+{
+    VFIOHostDMAWindow *hostwin;
+    bool hostwin_found = false;
+
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+            hostwin_found = true;
+            break;
+        }
+    }
+
+    return hostwin_found ? hostwin : NULL;
+}
+
+static int vfio_spapr_remove_window(VFIOContainer *container,
+                                    hwaddr offset_within_address_space)
+{
+    struct vfio_iommu_spapr_tce_remove remove = {
+        .argsz = sizeof(remove),
+        .start_addr = offset_within_address_space,
+    };
+    int ret;
+
+    ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+    if (ret) {
+        error_report("Failed to remove window at %"PRIx64,
+                     (uint64_t)remove.start_addr);
+        return -errno;
+    }
+
+    trace_vfio_spapr_remove_window(offset_within_address_space);
+
+    return 0;
+}
+
+static int vfio_spapr_create_window(VFIOContainer *container,
+                                    MemoryRegionSection *section,
+                                    hwaddr *pgsize)
 {
     int ret = 0;
     IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
@@ -233,23 +313,195 @@ int vfio_spapr_create_window(VFIOContainer *container,
     return 0;
 }
 
-int vfio_spapr_remove_window(VFIOContainer *container,
-                             hwaddr offset_within_address_space)
+int vfio_container_add_section_window(VFIOContainer *container,
+                                      MemoryRegionSection *section,
+                                      Error **errp)
 {
-    struct vfio_iommu_spapr_tce_remove remove = {
-        .argsz = sizeof(remove),
-        .start_addr = offset_within_address_space,
-    };
+    VFIOHostDMAWindow *hostwin;
+    hwaddr pgsize = 0;
     int ret;
 
-    ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+    /*
+     * VFIO_SPAPR_TCE_IOMMU supports a single host window between
+     * [dma32_window_start, dma32_window_size), we need to ensure
+     * the section fall in this range.
+     */
+    if (container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
+        hwaddr iova, end;
+
+        iova = section->offset_within_address_space;
+        end = iova + int128_get64(section->size) - 1;
+
+        if (!vfio_find_hostwin(container, iova, end)) {
+            error_setg(errp, "Container %p can't map guest IOVA region"
+                       " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container,
+                       iova, end);
+            return -EINVAL;
+        }
+        return 0;
+    }
+
+    if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
+        return 0;
+    }
+
+    /* For now intersections are not allowed, we may relax this later */
+    QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+        if (ranges_overlap(hostwin->min_iova,
+                           hostwin->max_iova - hostwin->min_iova + 1,
+                           section->offset_within_address_space,
+                           int128_get64(section->size))) {
+            error_setg(errp,
+                "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
+                "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
+                section->offset_within_address_space,
+                section->offset_within_address_space +
+                    int128_get64(section->size) - 1,
+                hostwin->min_iova, hostwin->max_iova);
+            return -EINVAL;
+        }
+    }
+
+    ret = vfio_spapr_create_window(container, section, &pgsize);
     if (ret) {
-        error_report("Failed to remove window at %"PRIx64,
-                     (uint64_t)remove.start_addr);
-        return -errno;
+        error_setg_errno(errp, -ret, "Failed to create SPAPR window");
+        return ret;
     }
 
-    trace_vfio_spapr_remove_window(offset_within_address_space);
+    vfio_host_win_add(container, section->offset_within_address_space,
+                      section->offset_within_address_space +
+                      int128_get64(section->size) - 1, pgsize);
+#ifdef CONFIG_KVM
+    if (kvm_enabled()) {
+        VFIOGroup *group;
+        IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+        struct kvm_vfio_spapr_tce param;
+        struct kvm_device_attr attr = {
+            .group = KVM_DEV_VFIO_GROUP,
+            .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
+            .addr = (uint64_t)(unsigned long)&param,
+        };
+
+        if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
+                                          &param.tablefd)) {
+            QLIST_FOREACH(group, &container->group_list, container_next) {
+                param.groupfd = group->fd;
+                if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+                    error_setg_errno(errp, errno,
+                                     "vfio: failed GROUP_SET_SPAPR_TCE for "
+                                     "KVM VFIO device %d and group fd %d",
+                                     param.tablefd, param.groupfd);
+                    return -errno;
+                }
+                trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
+            }
+        }
+    }
+#endif
+    return 0;
+}
+
+void vfio_container_del_section_window(VFIOContainer *container,
+                                       MemoryRegionSection *section)
+{
+    if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
+        return;
+    }
+
+    vfio_spapr_remove_window(container,
+                             section->offset_within_address_space);
+    if (vfio_host_win_del(container,
+                          section->offset_within_address_space,
+                          section->offset_within_address_space +
+                          int128_get64(section->size) - 1) < 0) {
+        hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
+                 __func__, section->offset_within_address_space);
+    }
+}
+
+int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
+{
+    struct vfio_iommu_spapr_tce_info info;
+    bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
+    int ret, fd = container->fd;
+
+    QLIST_INIT(&container->hostwin_list);
+
+    /*
+     * The host kernel code implementing VFIO_IOMMU_DISABLE is called
+     * when container fd is closed so we do not call it explicitly
+     * in this file.
+     */
+    if (!v2) {
+        ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+        if (ret) {
+            error_setg_errno(errp, errno, "failed to enable container");
+            return -errno;
+        }
+    } else {
+        container->prereg_listener = vfio_prereg_listener;
+
+        memory_listener_register(&container->prereg_listener,
+                                 &address_space_memory);
+        if (container->error) {
+            ret = -1;
+            error_propagate_prepend(errp, container->error,
+                    "RAM memory listener initialization failed: ");
+            goto listener_unregister_exit;
+        }
+    }
+
+    info.argsz = sizeof(info);
+    ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+    if (ret) {
+        error_setg_errno(errp, errno,
+                         "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
+        ret = -errno;
+        goto listener_unregister_exit;
+    }
+
+    if (v2) {
+        container->pgsizes = info.ddw.pgsizes;
+        /*
+         * There is a default window in just created container.
+         * To make region_add/del simpler, we better remove this
+         * window now and let those iommu_listener callbacks
+         * create/remove them when needed.
+         */
+        ret = vfio_spapr_remove_window(container, info.dma32_window_start);
+        if (ret) {
+            error_setg_errno(errp, -ret,
+                             "failed to remove existing window");
+            goto listener_unregister_exit;
+        }
+    } else {
+        /* The default table uses 4K pages */
+        container->pgsizes = 0x1000;
+        vfio_host_win_add(container, info.dma32_window_start,
+                          info.dma32_window_start +
+                          info.dma32_window_size - 1,
+                          0x1000);
+    }
 
     return 0;
+
+listener_unregister_exit:
+    if (v2) {
+        memory_listener_unregister(&container->prereg_listener);
+    }
+    return ret;
+}
+
+void vfio_spapr_container_deinit(VFIOContainer *container)
+{
+    VFIOHostDMAWindow *hostwin, *next;
+
+    if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+        memory_listener_unregister(&container->prereg_listener);
+    }
+    QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
+                       next) {
+        QLIST_REMOVE(hostwin, hostwin_next);
+        g_free(hostwin);
+    }
 }
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 0af7a2886c..637cac4edf 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -135,6 +135,7 @@ virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s"
 virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
 virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
 virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64
+virtio_iommu_host_resv_regions(const char *name, uint32_t index, uint64_t lob, uint64_t upb) "mr=%s host-resv-reg[%d] = [0x%"PRIx64",0x%"PRIx64"]"
 
 # virtio-mem.c
 virtio_mem_send_response(uint16_t type) "type=%" PRIu16
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 7ef2f9dcdb..9459fbf6ed 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -37,7 +37,7 @@ struct VirtIOIOMMUPCI {
 static Property virtio_iommu_pci_properties[] = {
     DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
     DEFINE_PROP_ARRAY("reserved-regions", VirtIOIOMMUPCI,
-                      vdev.nb_reserved_regions, vdev.reserved_regions,
+                      vdev.nr_prop_resv_regions, vdev.prop_resv_regions,
                       qdev_prop_reserved_region, ReservedRegion),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -54,9 +54,9 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
                          "for the virtio-iommu-pci device");
         return;
     }
-    for (int i = 0; i < s->nb_reserved_regions; i++) {
-        if (s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
-            s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
+    for (int i = 0; i < s->nr_prop_resv_regions; i++) {
+        if (s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
+            s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
             error_setg(errp, "reserved region %d has an invalid type", i);
             error_append_hint(errp, "Valid values are 0 and 1\n");
             return;
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index be51635895..89fb5767d1 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -20,12 +20,15 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qemu/iov.h"
+#include "qemu/range.h"
+#include "qemu/reserved-region.h"
 #include "exec/target_page.h"
 #include "hw/qdev-properties.h"
 #include "hw/virtio/virtio.h"
 #include "sysemu/kvm.h"
 #include "sysemu/reset.h"
 #include "sysemu/sysemu.h"
+#include "qemu/reserved-region.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "trace.h"
@@ -378,6 +381,19 @@ static void virtio_iommu_put_domain(gpointer data)
     g_free(domain);
 }
 
+static void add_prop_resv_regions(IOMMUDevice *sdev)
+{
+    VirtIOIOMMU *s = sdev->viommu;
+    int i;
+
+    for (i = 0; i < s->nr_prop_resv_regions; i++) {
+        ReservedRegion *reg = g_new0(ReservedRegion, 1);
+
+        *reg = s->prop_resv_regions[i];
+        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+    }
+}
+
 static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
                                               int devfn)
 {
@@ -408,6 +424,7 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
 
         memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
         address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
+        add_prop_resv_regions(sdev);
 
         /*
          * Build the IOMMU disabled container with aliases to the
@@ -444,6 +461,10 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
     return &sdev->as;
 }
 
+static const PCIIOMMUOps virtio_iommu_ops = {
+    .get_address_space = virtio_iommu_find_add_as,
+};
+
 static int virtio_iommu_attach(VirtIOIOMMU *s,
                                struct virtio_iommu_req_attach *req)
 {
@@ -624,29 +645,30 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
     return ret;
 }
 
-static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep,
+static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep,
                                                uint8_t *buf, size_t free)
 {
     struct virtio_iommu_probe_resv_mem prop = {};
     size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
-    int i;
-
-    total = size * s->nb_reserved_regions;
+    GList *l;
 
+    total = size * g_list_length(sdev->resv_regions);
     if (total > free) {
         return -ENOSPC;
     }
 
-    for (i = 0; i < s->nb_reserved_regions; i++) {
-        unsigned subtype = s->reserved_regions[i].type;
+    for (l = sdev->resv_regions; l; l = l->next) {
+        ReservedRegion *reg = l->data;
+        unsigned subtype = reg->type;
+        Range *range = &reg->range;
 
         assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
                subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
         prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
         prop.head.length = cpu_to_le16(length);
         prop.subtype = subtype;
-        prop.start = cpu_to_le64(s->reserved_regions[i].low);
-        prop.end = cpu_to_le64(s->reserved_regions[i].high);
+        prop.start = cpu_to_le64(range_lob(range));
+        prop.end = cpu_to_le64(range_upb(range));
 
         memcpy(buf, &prop, size);
 
@@ -666,19 +688,27 @@ static int virtio_iommu_probe(VirtIOIOMMU *s,
                               uint8_t *buf)
 {
     uint32_t ep_id = le32_to_cpu(req->endpoint);
+    IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id);
     size_t free = VIOMMU_PROBE_SIZE;
+    IOMMUDevice *sdev;
     ssize_t count;
 
-    if (!virtio_iommu_mr(s, ep_id)) {
+    if (!iommu_mr) {
         return VIRTIO_IOMMU_S_NOENT;
     }
 
-    count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free);
+    sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr);
+    if (!sdev) {
+        return -EINVAL;
+    }
+
+    count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free);
     if (count < 0) {
         return VIRTIO_IOMMU_S_INVAL;
     }
     buf += count;
     free -= count;
+    sdev->probe_done = true;
 
     return VIRTIO_IOMMU_S_OK;
 }
@@ -856,7 +886,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
     bool bypass_allowed;
     int granule;
     bool found;
-    int i;
+    GList *l;
 
     interval.low = addr;
     interval.high = addr + 1;
@@ -894,10 +924,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
         goto unlock;
     }
 
-    for (i = 0; i < s->nb_reserved_regions; i++) {
-        ReservedRegion *reg = &s->reserved_regions[i];
+    for (l = sdev->resv_regions; l; l = l->next) {
+        ReservedRegion *reg = l->data;
 
-        if (addr >= reg->low && addr <= reg->high) {
+        if (range_contains(&reg->range, addr)) {
             switch (reg->type) {
             case VIRTIO_IOMMU_RESV_MEM_T_MSI:
                 entry.perm = flag;
@@ -1131,6 +1161,106 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
     return 0;
 }
 
+/**
+ * rebuild_resv_regions: rebuild resv regions with both the
+ * info of host resv ranges and property set resv ranges
+ */
+static int rebuild_resv_regions(IOMMUDevice *sdev)
+{
+    GList *l;
+    int i = 0;
+
+    /* free the existing list and rebuild it from scratch */
+    g_list_free_full(sdev->resv_regions, g_free);
+    sdev->resv_regions = NULL;
+
+    /* First add host reserved regions if any, all tagged as RESERVED */
+    for (l = sdev->host_resv_ranges; l; l = l->next) {
+        ReservedRegion *reg = g_new0(ReservedRegion, 1);
+        Range *r = (Range *)l->data;
+
+        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
+        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
+        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+        trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
+                                             range_lob(&reg->range),
+                                             range_upb(&reg->range));
+        i++;
+    }
+    /*
+     * then add higher priority reserved regions set by the machine
+     * through properties
+     */
+    add_prop_resv_regions(sdev);
+    return 0;
+}
+
+/**
+ * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
+ *
+ * The function turns those into reserved ranges. Once some
+ * reserved ranges have been set, new reserved regions cannot be
+ * added outside of the original ones.
+ *
+ * @mr: IOMMU MR
+ * @iova_ranges: list of usable IOVA ranges
+ * @errp: error handle
+ */
+static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr,
+                                        GList *iova_ranges,
+                                        Error **errp)
+{
+    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    GList *current_ranges = sdev->host_resv_ranges;
+    GList *l, *tmp, *new_ranges = NULL;
+    int ret = -EINVAL;
+
+    /* check that each new resv region is included in an existing one */
+    if (sdev->host_resv_ranges) {
+        range_inverse_array(iova_ranges,
+                            &new_ranges,
+                            0, UINT64_MAX);
+
+        for (tmp = new_ranges; tmp; tmp = tmp->next) {
+            Range *newr = (Range *)tmp->data;
+            bool included = false;
+
+            for (l = current_ranges; l; l = l->next) {
+                Range * r = (Range *)l->data;
+
+                if (range_contains_range(r, newr)) {
+                    included = true;
+                    break;
+                }
+            }
+            if (!included) {
+                goto error;
+            }
+        }
+        /* all new reserved ranges are included in existing ones */
+        ret = 0;
+        goto out;
+    }
+
+    if (sdev->probe_done) {
+        warn_report("%s: Notified about new host reserved regions after probe",
+                    mr->parent_obj.name);
+    }
+
+    range_inverse_array(iova_ranges,
+                        &sdev->host_resv_ranges,
+                        0, UINT64_MAX);
+    rebuild_resv_regions(sdev);
+
+    return 0;
+error:
+    error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!",
+               mr->parent_obj.name);
+out:
+    g_list_free_full(new_ranges, g_free);
+    return ret;
+}
+
 static void virtio_iommu_system_reset(void *opaque)
 {
     VirtIOIOMMU *s = opaque;
@@ -1206,7 +1336,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
     s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
 
     if (s->primary_bus) {
-        pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
+        pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s);
     } else {
         error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
     }
@@ -1426,6 +1556,7 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
     imrc->replay = virtio_iommu_replay;
     imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
     imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
+    imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges;
 }
 
 static const TypeInfo virtio_iommu_info = {
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
index cc24812d2e..c3512c2dae 100644
--- a/hw/virtio/virtio-pmem.c
+++ b/hw/virtio/virtio-pmem.c
@@ -147,7 +147,10 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
 static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
                                                    Error **errp)
 {
-    assert(pmem->memdev);
+    if (!pmem->memdev) {
+        error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
+        return NULL;
+    }
 
     return &pmem->memdev->mr;
 }
diff --git a/hw/xen/xen-backend.c b/hw/xen/xen-backend.c
index 5b0fb76eae..b9bf70a9f5 100644
--- a/hw/xen/xen-backend.c
+++ b/hw/xen/xen-backend.c
@@ -101,6 +101,24 @@ static XenBackendInstance *xen_backend_list_find(XenDevice *xendev)
     return NULL;
 }
 
+bool xen_backend_exists(const char *type, const char *name)
+{
+    const XenBackendImpl *impl = xen_backend_table_lookup(type);
+    XenBackendInstance *backend;
+
+    if (!impl) {
+        return false;
+    }
+
+    QLIST_FOREACH(backend, &backend_list, entry) {
+        if (backend->impl == impl && !strcmp(backend->name, name)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void xen_backend_list_remove(XenBackendInstance *backend)
 {
     QLIST_REMOVE(backend, entry);
@@ -122,11 +140,6 @@ void xen_backend_device_create(XenBus *xenbus, const char *type,
     backend->name = g_strdup(name);
 
     impl->create(backend, opts, errp);
-    if (*errp) {
-        g_free(backend->name);
-        g_free(backend);
-        return;
-    }
 
     backend->impl = impl;
     xen_backend_list_add(backend);
@@ -165,7 +178,9 @@ bool xen_backend_try_device_destroy(XenDevice *xendev, Error **errp)
     }
 
     impl = backend->impl;
-    impl->destroy(backend, errp);
+    if (backend->xendev) {
+        impl->destroy(backend, errp);
+    }
 
     xen_backend_list_remove(backend);
     g_free(backend->name);
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index ece8ec40cd..4973e7d9c9 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -209,7 +209,8 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type)
                           NULL, "%u", &online) != 1)
             online = 0;
 
-        if (online && state == XenbusStateInitialising) {
+        if (online && state == XenbusStateInitialising &&
+            !xen_backend_exists(type, backend[i])) {
             Error *local_err = NULL;
 
             xen_bus_backend_create(xenbus, type, backend[i], backend_path,
@@ -711,8 +712,17 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
 {
     ERRP_GUARD();
     XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
+    XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
 
-    xendev->frontend_path = xen_device_get_frontend_path(xendev);
+    if (xendev_class->get_frontend_path) {
+        xendev->frontend_path = xendev_class->get_frontend_path(xendev, errp);
+        if (!xendev->frontend_path) {
+            error_prepend(errp, "failed to create frontend: ");
+            return;
+        }
+    } else {
+        xendev->frontend_path = xen_device_get_frontend_path(xendev);
+    }
 
     /*
      * The frontend area may have already been created by a legacy
@@ -912,6 +922,11 @@ void xen_device_notify_event_channel(XenDevice *xendev,
     }
 }
 
+unsigned int xen_event_channel_get_local_port(XenEventChannel *channel)
+{
+    return channel->local_port;
+}
+
 void xen_device_unbind_event_channel(XenDevice *xendev,
                                      XenEventChannel *channel,
                                      Error **errp)
@@ -1118,11 +1133,13 @@ static void xen_register_types(void)
 
 type_init(xen_register_types)
 
-void xen_bus_init(void)
+BusState *xen_bus_init(void)
 {
     DeviceState *dev = qdev_new(TYPE_XEN_BRIDGE);
     BusState *bus = qbus_new(TYPE_XEN_BUS, dev, NULL);
 
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     qbus_set_bus_hotplug_handler(bus);
+
+    return bus;
 }
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 4ded3cec23..124dd5f3d6 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -623,7 +623,6 @@ void xen_be_init(void)
 
     xen_set_dynamic_sysbus();
 
-    xen_be_register("console", &xen_console_ops);
     xen_be_register("vkbd", &xen_kbdmouse_ops);
 #ifdef CONFIG_VIRTFS
     xen_be_register("9pfs", &xen_9pfs_ops);
diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c
index 9b7304e544..3f77c675c6 100644
--- a/hw/xen/xen_devconfig.c
+++ b/hw/xen/xen_devconfig.c
@@ -46,34 +46,6 @@ static int xen_config_dev_all(char *fe, char *be)
 
 /* ------------------------------------------------------------- */
 
-int xen_config_dev_blk(DriveInfo *disk)
-{
-    char fe[256], be[256], device_name[32];
-    int vdev = 202 * 256 + 16 * disk->unit;
-    int cdrom = disk->media_cd;
-    const char *devtype = cdrom ? "cdrom" : "disk";
-    const char *mode    = cdrom ? "r"     : "w";
-    const char *filename = qemu_opt_get(disk->opts, "file");
-
-    snprintf(device_name, sizeof(device_name), "xvd%c", 'a' + disk->unit);
-    xen_pv_printf(NULL, 1, "config disk %d [%s]: %s\n",
-                  disk->unit, device_name, filename);
-    xen_config_dev_dirs("vbd", "qdisk", vdev, fe, be, sizeof(fe));
-
-    /* frontend */
-    xenstore_write_int(fe, "virtual-device",  vdev);
-    xenstore_write_str(fe, "device-type",     devtype);
-
-    /* backend */
-    xenstore_write_str(be, "dev",             device_name);
-    xenstore_write_str(be, "type",            "file");
-    xenstore_write_str(be, "params",          filename);
-    xenstore_write_str(be, "mode",            mode);
-
-    /* common stuff */
-    return xen_config_dev_all(fe, be);
-}
-
 int xen_config_dev_nic(NICInfo *nic)
 {
     char fe[256], be[256];
diff --git a/hw/xenpv/xen_machine_pv.c b/hw/xenpv/xen_machine_pv.c
index 17cda5ec13..9f9f137f99 100644
--- a/hw/xenpv/xen_machine_pv.c
+++ b/hw/xenpv/xen_machine_pv.c
@@ -32,7 +32,6 @@
 
 static void xen_init_pv(MachineState *machine)
 {
-    DriveInfo *dinfo;
     int i;
 
     setup_xen_backend_ops();
@@ -55,7 +54,6 @@ static void xen_init_pv(MachineState *machine)
     }
 
     xen_be_register("vfb", &xen_framebuffer_ops);
-    xen_be_register("qnic", &xen_netdev_ops);
 
     /* configure framebuffer */
     if (vga_interface_type == VGA_XENFB) {
@@ -64,14 +62,6 @@ static void xen_init_pv(MachineState *machine)
         vga_interface_created = true;
     }
 
-    /* configure disks */
-    for (i = 0; i < 16; i++) {
-        dinfo = drive_get(IF_XEN, 0, i);
-        if (!dinfo)
-            continue;
-        xen_config_dev_blk(dinfo);
-    }
-
     /* configure nics */
     for (i = 0; i < nb_nics; i++) {
         if (!nd_table[i].model || 0 != strcmp(nd_table[i].model, "xen"))
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index 19b16e58f8..6da1462c4f 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -26,6 +26,5 @@
 /* cputlb.c */
 void tlb_protect_code(ram_addr_t ram_addr);
 void tlb_unprotect_code(ram_addr_t ram_addr);
-void tlb_flush_counts(size_t *full, size_t *part, size_t *elide);
 #endif
 #endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9087d02769..831f7c996d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -24,6 +24,7 @@
 #include "qemu/bswap.h"
 #include "qemu/queue.h"
 #include "qemu/int128.h"
+#include "qemu/range.h"
 #include "qemu/notify.h"
 #include "qom/object.h"
 #include "qemu/rcu.h"
@@ -79,8 +80,7 @@ extern unsigned int global_dirty_tracking;
 typedef struct MemoryRegionOps MemoryRegionOps;
 
 struct ReservedRegion {
-    hwaddr low;
-    hwaddr high;
+    Range range;
     unsigned type;
 };
 
@@ -527,6 +527,26 @@ struct IOMMUMemoryRegionClass {
      int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
                                      uint64_t page_size_mask,
                                      Error **errp);
+    /**
+     * @iommu_set_iova_ranges:
+     *
+     * Propagate information about the usable IOVA ranges for a given IOMMU
+     * memory region. Used for example to propagate host physical device
+     * reserved memory region constraints to the virtual IOMMU.
+     *
+     * Optional method: if this method is not provided, then the default IOVA
+     * aperture is used.
+     *
+     * @iommu: the IOMMUMemoryRegion
+     *
+     * @iova_ranges: list of ordered IOVA ranges (at least one range)
+     *
+     * Returns 0 on success, or a negative error. In case of failure, the error
+     * object must be created.
+     */
+     int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
+                                  GList *iova_ranges,
+                                  Error **errp);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -1857,6 +1877,18 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
                                            Error **errp);
 
 /**
+ * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges
+ * for a given IOMMU MR region
+ *
+ * @iommu: IOMMU memory region
+ * @iova_ranges: list of ordered IOVA ranges (at least one range)
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu,
+                                        GList *iova_ranges,
+                                        Error **errp);
+
+/**
  * memory_region_name: get a memory region's name
  *
  * Returns the string that was used to initialize the memory region.
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index dffb0e73d2..0a5c258fe6 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -385,10 +385,11 @@ static ssize_t glue(load_elf, SZ)(const char *name, int fd,
     }
 
     if (pflags) {
-        *pflags = (elf_word)ehdr.e_flags;
+        *pflags = ehdr.e_flags;
+    }
+    if (pentry) {
+        *pentry = ehdr.e_entry;
     }
-    if (pentry)
-        *pentry = (uint64_t)(elf_sword)ehdr.e_entry;
 
     glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb, sym_cb);
 
@@ -610,10 +611,12 @@ static ssize_t glue(load_elf, SZ)(const char *name, int fd,
         }
     }
 
-    if (lowaddr)
-        *lowaddr = (uint64_t)(elf_sword)low;
-    if (highaddr)
-        *highaddr = (uint64_t)(elf_sword)high;
+    if (lowaddr) {
+        *lowaddr = low;
+    }
+    if (highaddr) {
+        *highaddr = high;
+    }
     ret = total_size;
  fail:
     if (mapped_file) {
diff --git a/include/hw/hyperv/dynmem-proto.h b/include/hw/hyperv/dynmem-proto.h
new file mode 100644
index 0000000000..d0f9090ac4
--- /dev/null
+++ b/include/hw/hyperv/dynmem-proto.h
@@ -0,0 +1,423 @@
+#ifndef HW_HYPERV_DYNMEM_PROTO_H
+#define HW_HYPERV_DYNMEM_PROTO_H
+
+/*
+ * Hyper-V Dynamic Memory Protocol definitions
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * Based on drivers/hv/hv_balloon.c from Linux kernel:
+ * Copyright (c) 2012, Microsoft Corporation.
+ *
+ * Author: K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Protocol versions. The low word is the minor version, the high word the major
+ * version.
+ *
+ * History:
+ * Initial version 1.0
+ * Changed to 0.1 on 2009/03/25
+ * Changes to 0.2 on 2009/05/14
+ * Changes to 0.3 on 2009/12/03
+ * Changed to 1.0 on 2011/04/05
+ * Changed to 2.0 on 2019/12/10
+ */
+
+#define DYNMEM_MAKE_VERSION(Major, Minor) ((uint32_t)(((Major) << 16) | (Minor)))
+#define DYNMEM_MAJOR_VERSION(Version) ((uint32_t)(Version) >> 16)
+#define DYNMEM_MINOR_VERSION(Version) ((uint32_t)(Version) & 0xff)
+
+enum {
+    DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
+    DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
+    DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
+
+    DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
+    DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
+    DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
+
+    DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
+};
+
+
+
+/*
+ * Message Types
+ */
+
+enum dm_message_type {
+    /*
+     * Version 0.3
+     */
+    DM_ERROR = 0,
+    DM_VERSION_REQUEST = 1,
+    DM_VERSION_RESPONSE = 2,
+    DM_CAPABILITIES_REPORT = 3,
+    DM_CAPABILITIES_RESPONSE = 4,
+    DM_STATUS_REPORT = 5,
+    DM_BALLOON_REQUEST = 6,
+    DM_BALLOON_RESPONSE = 7,
+    DM_UNBALLOON_REQUEST = 8,
+    DM_UNBALLOON_RESPONSE = 9,
+    DM_MEM_HOT_ADD_REQUEST = 10,
+    DM_MEM_HOT_ADD_RESPONSE = 11,
+    DM_VERSION_03_MAX = 11,
+    /*
+     * Version 1.0.
+     */
+    DM_INFO_MESSAGE = 12,
+    DM_VERSION_1_MAX = 12,
+
+    /*
+     * Version 2.0
+     */
+    DM_MEM_HOT_REMOVE_REQUEST = 13,
+    DM_MEM_HOT_REMOVE_RESPONSE = 14
+};
+
+
+/*
+ * Structures defining the dynamic memory management
+ * protocol.
+ */
+
+union dm_version {
+    struct {
+        uint16_t minor_version;
+        uint16_t major_version;
+    };
+    uint32_t version;
+} QEMU_PACKED;
+
+
+union dm_caps {
+    struct {
+        uint64_t balloon:1;
+        uint64_t hot_add:1;
+        /*
+         * To support guests that may have alignment
+         * limitations on hot-add, the guest can specify
+         * its alignment requirements; a value of n
+         * represents an alignment of 2^n in mega bytes.
+         */
+        uint64_t hot_add_alignment:4;
+        uint64_t hot_remove:1;
+        uint64_t reservedz:57;
+    } cap_bits;
+    uint64_t caps;
+} QEMU_PACKED;
+
+union dm_mem_page_range {
+    struct  {
+        /*
+         * The PFN number of the first page in the range.
+         * 40 bits is the architectural limit of a PFN
+         * number for AMD64.
+         */
+        uint64_t start_page:40;
+        /*
+         * The number of pages in the range.
+         */
+        uint64_t page_cnt:24;
+    } finfo;
+    uint64_t  page_range;
+} QEMU_PACKED;
+
+
+
+/*
+ * The header for all dynamic memory messages:
+ *
+ * type: Type of the message.
+ * size: Size of the message in bytes; including the header.
+ * trans_id: The guest is responsible for manufacturing this ID.
+ */
+
+struct dm_header {
+    uint16_t type;
+    uint16_t size;
+    uint32_t trans_id;
+} QEMU_PACKED;
+
+/*
+ * A generic message format for dynamic memory.
+ * Specific message formats are defined later in the file.
+ */
+
+struct dm_message {
+    struct dm_header hdr;
+    uint8_t data[]; /* enclosed message */
+} QEMU_PACKED;
+
+
+/*
+ * Specific message types supporting the dynamic memory protocol.
+ */
+
+/*
+ * Version negotiation message. Sent from the guest to the host.
+ * The guest is free to try different versions until the host
+ * accepts the version.
+ *
+ * dm_version: The protocol version requested.
+ * is_last_attempt: If TRUE, this is the last version guest will request.
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct dm_version_request {
+    struct dm_header hdr;
+    union dm_version version;
+    uint32_t is_last_attempt:1;
+    uint32_t reservedz:31;
+} QEMU_PACKED;
+
+/*
+ * Version response message; Host to Guest and indicates
+ * if the host has accepted the version sent by the guest.
+ *
+ * is_accepted: If TRUE, host has accepted the version and the guest
+ * should proceed to the next stage of the protocol. FALSE indicates that
+ * guest should re-try with a different version.
+ *
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct dm_version_response {
+    struct dm_header hdr;
+    uint64_t is_accepted:1;
+    uint64_t reservedz:63;
+} QEMU_PACKED;
+
+/*
+ * Message reporting capabilities. This is sent from the guest to the
+ * host.
+ */
+
+struct dm_capabilities {
+    struct dm_header hdr;
+    union dm_caps caps;
+    uint64_t min_page_cnt;
+    uint64_t max_page_number;
+} QEMU_PACKED;
+
+/*
+ * Response to the capabilities message. This is sent from the host to the
+ * guest. This message notifies if the host has accepted the guest's
+ * capabilities. If the host has not accepted, the guest must shutdown
+ * the service.
+ *
+ * is_accepted: Indicates if the host has accepted guest's capabilities.
+ * reservedz: Must be 0.
+ */
+
+struct dm_capabilities_resp_msg {
+    struct dm_header hdr;
+    uint64_t is_accepted:1;
+    uint64_t hot_remove:1;
+    uint64_t suppress_pressure_reports:1;
+    uint64_t reservedz:61;
+} QEMU_PACKED;
+
+/*
+ * This message is used to report memory pressure from the guest.
+ * This message is not part of any transaction and there is no
+ * response to this message.
+ *
+ * num_avail: Available memory in pages.
+ * num_committed: Committed memory in pages.
+ * page_file_size: The accumulated size of all page files
+ *                 in the system in pages.
+ * zero_free: The nunber of zero and free pages.
+ * page_file_writes: The writes to the page file in pages.
+ * io_diff: An indicator of file cache efficiency or page file activity,
+ *          calculated as File Cache Page Fault Count - Page Read Count.
+ *          This value is in pages.
+ *
+ * Some of these metrics are Windows specific and fortunately
+ * the algorithm on the host side that computes the guest memory
+ * pressure only uses num_committed value.
+ */
+
+struct dm_status {
+    struct dm_header hdr;
+    uint64_t num_avail;
+    uint64_t num_committed;
+    uint64_t page_file_size;
+    uint64_t zero_free;
+    uint32_t page_file_writes;
+    uint32_t io_diff;
+} QEMU_PACKED;
+
+
+/*
+ * Message to ask the guest to allocate memory - balloon up message.
+ * This message is sent from the host to the guest. The guest may not be
+ * able to allocate as much memory as requested.
+ *
+ * num_pages: number of pages to allocate.
+ */
+
+struct dm_balloon {
+    struct dm_header hdr;
+    uint32_t num_pages;
+    uint32_t reservedz;
+} QEMU_PACKED;
+
+
+/*
+ * Balloon response message; this message is sent from the guest
+ * to the host in response to the balloon message.
+ *
+ * reservedz: Reserved; must be set to zero.
+ * more_pages: If FALSE, this is the last message of the transaction.
+ * if TRUE there will atleast one more message from the guest.
+ *
+ * range_count: The number of ranges in the range array.
+ *
+ * range_array: An array of page ranges returned to the host.
+ *
+ */
+
+struct dm_balloon_response {
+    struct dm_header hdr;
+    uint32_t reservedz;
+    uint32_t more_pages:1;
+    uint32_t range_count:31;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+/*
+ * Un-balloon message; this message is sent from the host
+ * to the guest to give guest more memory.
+ *
+ * more_pages: If FALSE, this is the last message of the transaction.
+ * if TRUE there will atleast one more message from the guest.
+ *
+ * reservedz: Reserved; must be set to zero.
+ *
+ * range_count: The number of ranges in the range array.
+ *
+ * range_array: An array of page ranges returned to the host.
+ *
+ */
+
+struct dm_unballoon_request {
+    struct dm_header hdr;
+    uint32_t more_pages:1;
+    uint32_t reservedz:31;
+    uint32_t range_count;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+/*
+ * Un-balloon response message; this message is sent from the guest
+ * to the host in response to an unballoon request.
+ *
+ */
+
+struct dm_unballoon_response {
+    struct dm_header hdr;
+} QEMU_PACKED;
+
+
+/*
+ * Hot add request message. Message sent from the host to the guest.
+ *
+ * mem_range: Memory range to hot add.
+ *
+ */
+
+struct dm_hot_add {
+    struct dm_header hdr;
+    union dm_mem_page_range range;
+} QEMU_PACKED;
+
+/*
+ * Hot add response message.
+ * This message is sent by the guest to report the status of a hot add request.
+ * If page_count is less than the requested page count, then the host should
+ * assume all further hot add requests will fail, since this indicates that
+ * the guest has hit an upper physical memory barrier.
+ *
+ * Hot adds may also fail due to low resources; in this case, the guest must
+ * not complete this message until the hot add can succeed, and the host must
+ * not send a new hot add request until the response is sent.
+ * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
+ * times it fails the request.
+ *
+ *
+ * page_count: number of pages that were successfully hot added.
+ *
+ * result: result of the operation 1: success, 0: failure.
+ *
+ */
+
+struct dm_hot_add_response {
+    struct dm_header hdr;
+    uint32_t page_count;
+    uint32_t result;
+} QEMU_PACKED;
+
+struct dm_hot_remove {
+    struct dm_header hdr;
+    uint32_t virtual_node;
+    uint32_t page_count;
+    uint32_t qos_flags;
+    uint32_t reservedZ;
+} QEMU_PACKED;
+
+struct dm_hot_remove_response {
+    struct dm_header hdr;
+    uint32_t result;
+    uint32_t range_count;
+    uint64_t more_pages:1;
+    uint64_t reservedz:63;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+#define DM_REMOVE_QOS_LARGE (1 << 0)
+#define DM_REMOVE_QOS_LOCAL (1 << 1)
+#define DM_REMOVE_QOS_MASK (0x3)
+
+/*
+ * Types of information sent from host to the guest.
+ */
+
+enum dm_info_type {
+    INFO_TYPE_MAX_PAGE_CNT = 0,
+    MAX_INFO_TYPE
+};
+
+
+/*
+ * Header for the information message.
+ */
+
+struct dm_info_header {
+    enum dm_info_type type;
+    uint32_t data_size;
+    uint8_t  data[];
+} QEMU_PACKED;
+
+/*
+ * This message is sent from the host to the guest to pass
+ * some relevant information (win8 addition).
+ *
+ * reserved: no used.
+ * info_size: size of the information blob.
+ * info: information blob.
+ */
+
+struct dm_info_msg {
+    struct dm_header hdr;
+    uint32_t reserved;
+    uint32_t info_size;
+    uint8_t  info[];
+};
+
+#endif
diff --git a/include/hw/hyperv/hv-balloon.h b/include/hw/hyperv/hv-balloon.h
new file mode 100644
index 0000000000..c1efe70fc2
--- /dev/null
+++ b/include/hw/hyperv/hv-balloon.h
@@ -0,0 +1,18 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HV_BALLOON_H
+#define HW_HV_BALLOON_H
+
+#include "qom/object.h"
+
+#define TYPE_HV_BALLOON "hv-balloon"
+OBJECT_DECLARE_SIMPLE_TYPE(HvBalloon, HV_BALLOON)
+
+#endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 29a9724524..a10ceeabbf 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -33,6 +33,7 @@ typedef struct PCMachineState {
 
     /* Pointers to devices and objects: */
     PCIBus *bus;
+    BusState *xenbus;
     I2CBus *smbus;
     PFlashCFI01 *flash[2];
     ISADevice *pcspk;
@@ -184,7 +185,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 void pc_cmos_init(PCMachineState *pcms,
                   BusState *ide0, BusState *ide1,
                   ISADevice *s);
-void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus);
+void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus,
+                 BusState *xen_bus);
 
 void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs);
 
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index 3354d6c166..a1d62cc551 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -38,6 +38,10 @@ typedef struct MemoryDeviceState MemoryDeviceState;
  * address in guest physical memory can either be specified explicitly
  * or get assigned automatically.
  *
+ * Some memory device might not own a memory region in certain device
+ * configurations. Such devices can logically get (un)plugged, however,
+ * empty memory devices are mostly ignored by the memory device code.
+ *
  * Conceptually, memory devices only span one memory region. If multiple
  * successive memory regions are used, a covering memory region has to
  * be provided. Scattered memory regions are not supported for single
@@ -91,7 +95,8 @@ struct MemoryDeviceClass {
     uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp);
 
     /*
-     * Return the memory region of the memory device.
+     * Return the memory region of the memory device. If the device is
+     * completely empty, returns NULL without an error.
      *
      * Called when (un)plugging the memory device, to (un)map the
      * memory region in guest physical memory, but also to detect the
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index ea5aff118b..fa6313aabc 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -363,10 +363,42 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range);
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
-typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
+
+/**
+ * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers
+ * of a PCIBus
+ *
+ * Allows to modify the behavior of some IOMMU operations of the PCI
+ * framework for a set of devices on a PCI bus.
+ */
+typedef struct PCIIOMMUOps {
+    /**
+     * @get_address_space: get the address space for a set of devices
+     * on a PCI bus.
+     *
+     * Mandatory callback which returns a pointer to an #AddressSpace
+     *
+     * @bus: the #PCIBus being accessed.
+     *
+     * @opaque: the data passed to pci_setup_iommu().
+     *
+     * @devfn: device and function number
+     */
+   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+} PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+
+/**
+ * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
+ *
+ * Let PCI host bridges define specific operations.
+ *
+ * @bus: the #PCIBus being updated.
+ * @ops: the #PCIIOMMUOps
+ * @opaque: passed to callbacks of the @ops structure.
+ */
+void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque);
 
 pcibus_t pci_bar_address(PCIDevice *d,
                          int reg, uint8_t type, pcibus_t size);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 5653175957..2261312546 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -33,7 +33,7 @@ enum PCIBusFlags {
 struct PCIBus {
     BusState qbus;
     enum PCIBusFlags flags;
-    PCIIOMMUFunc iommu_fn;
+    const PCIIOMMUOps *iommu_ops;
     void *iommu_opaque;
     uint8_t devfn_min;
     uint32_t slot_reserved_mask;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 7780b9073a..a4a22accb9 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -99,6 +99,7 @@ typedef struct VFIOContainer {
     QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
     QLIST_ENTRY(VFIOContainer) next;
     QLIST_HEAD(, VFIODevice) device_list;
+    GList *iova_ranges;
 } VFIOContainer;
 
 typedef struct VFIOGuestIOMMU {
@@ -206,11 +207,6 @@ typedef struct {
     hwaddr pages;
 } VFIOBitmap;
 
-void vfio_host_win_add(VFIOContainer *container,
-                       hwaddr min_iova, hwaddr max_iova,
-                       uint64_t iova_pgsizes);
-int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
-                      hwaddr max_iova);
 VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
 void vfio_put_address_space(VFIOAddressSpace *space);
 bool vfio_devices_all_running_and_saving(VFIOContainer *container);
@@ -224,11 +220,14 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start);
 int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
                             hwaddr iova, hwaddr size);
 
+/* SPAPR specific */
 int vfio_container_add_section_window(VFIOContainer *container,
                                       MemoryRegionSection *section,
                                       Error **errp);
 void vfio_container_del_section_window(VFIOContainer *container,
                                        MemoryRegionSection *section);
+int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
+void vfio_spapr_container_deinit(VFIOContainer *container);
 
 void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
@@ -288,13 +287,6 @@ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
 struct vfio_info_cap_header *
 vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
 #endif
-extern const MemoryListener vfio_prereg_listener;
-
-int vfio_spapr_create_window(VFIOContainer *container,
-                             MemoryRegionSection *section,
-                             hwaddr *pgsize);
-int vfio_spapr_remove_window(VFIOContainer *container,
-                             hwaddr offset_within_address_space);
 
 bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
 void vfio_migration_exit(VFIODevice *vbasedev);
diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
deleted file mode 100644
index 86248f5436..0000000000
--- a/include/hw/vfio/vfio.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef HW_VFIO_H
-#define HW_VFIO_H
-
-bool vfio_eeh_as_ok(AddressSpace *as);
-int vfio_eeh_as_op(AddressSpace *as, uint32_t op);
-
-#endif
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index a93fc5383e..781ebaea8f 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -39,6 +39,9 @@ typedef struct IOMMUDevice {
     AddressSpace  as;
     MemoryRegion root;          /* The root container of the device */
     MemoryRegion bypass_mr;     /* The alias of shared memory MR */
+    GList *resv_regions;
+    GList *host_resv_ranges;
+    bool probe_done;
 } IOMMUDevice;
 
 typedef struct IOMMUPciBus {
@@ -55,8 +58,8 @@ struct VirtIOIOMMU {
     GHashTable *as_by_busptr;
     IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
     PCIBus *primary_bus;
-    ReservedRegion *reserved_regions;
-    uint32_t nb_reserved_regions;
+    ReservedRegion *prop_resv_regions;
+    uint32_t nr_prop_resv_regions;
     GTree *domains;
     QemuRecMutex mutex;
     GTree *endpoints;
diff --git a/include/hw/xen/interface/arch-arm.h b/include/hw/xen/interface/arch-arm.h
index 94b31511dd..1528ced509 100644
--- a/include/hw/xen/interface/arch-arm.h
+++ b/include/hw/xen/interface/arch-arm.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * arch-arm.h
  *
  * Guest OS interface to ARM Xen.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright 2011 (C) Citrix Systems
  */
 
@@ -361,6 +344,7 @@ typedef uint64_t xen_callback_t;
 #define PSR_DBG_MASK    (1<<9)        /* arm64: Debug Exception mask */
 #define PSR_IT_MASK     (0x0600fc00)  /* Thumb If-Then Mask */
 #define PSR_JAZELLE     (1<<24)       /* Jazelle Mode */
+#define PSR_Z           (1<<30)       /* Zero condition flag */
 
 /* 32 bit modes */
 #define PSR_MODE_USR 0x10
@@ -383,7 +367,15 @@ typedef uint64_t xen_callback_t;
 #define PSR_MODE_EL1t 0x04
 #define PSR_MODE_EL0t 0x00
 
-#define PSR_GUEST32_INIT  (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)
+/*
+ * We set PSR_Z to be able to boot Linux kernel versions with an invalid
+ * encoding of the first 8 NOP instructions. See commit a92882a4d270 in
+ * Linux.
+ *
+ * Note that PSR_Z is also set by U-Boot and QEMU -kernel when loading
+ * zImage kernels on aarch32.
+ */
+#define PSR_GUEST32_INIT (PSR_Z|PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)
 #define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h)
 
 #define SCTLR_GUEST_INIT    xen_mk_ullong(0x00c50078)
@@ -398,6 +390,10 @@ typedef uint64_t xen_callback_t;
 
 /* Physical Address Space */
 
+/* Virtio MMIO mappings */
+#define GUEST_VIRTIO_MMIO_BASE   xen_mk_ullong(0x02000000)
+#define GUEST_VIRTIO_MMIO_SIZE   xen_mk_ullong(0x00100000)
+
 /*
  * vGIC mappings: Only one set of mapping is used by the guest.
  * Therefore they can overlap.
@@ -484,6 +480,9 @@ typedef uint64_t xen_callback_t;
 
 #define GUEST_VPL011_SPI        32
 
+#define GUEST_VIRTIO_MMIO_SPI_FIRST   33
+#define GUEST_VIRTIO_MMIO_SPI_LAST    43
+
 /* PSCI functions */
 #define PSCI_cpu_suspend 0
 #define PSCI_cpu_off     1
diff --git a/include/hw/xen/interface/arch-x86/cpuid.h b/include/hw/xen/interface/arch-x86/cpuid.h
index ce46305bee..7ecd16ae05 100644
--- a/include/hw/xen/interface/arch-x86/cpuid.h
+++ b/include/hw/xen/interface/arch-x86/cpuid.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * arch-x86/cpuid.h
  *
  * CPUID interface to Xen.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2007 Citrix Systems, Inc.
  *
  * Authors:
@@ -102,6 +85,18 @@
 #define XEN_HVM_CPUID_IOMMU_MAPPINGS   (1u << 2)
 #define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in EBX */
 #define XEN_HVM_CPUID_DOMID_PRESENT    (1u << 4) /* domid is present in ECX */
+/*
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID      (1u << 5)
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
+#define XEN_HVM_CPUID_UPCALL_VECTOR    (1u << 6)
 
 /*
  * Leaf 6 (0x40000x05)
diff --git a/include/hw/xen/interface/arch-x86/xen-x86_32.h b/include/hw/xen/interface/arch-x86/xen-x86_32.h
index 19d7388633..139438e835 100644
--- a/include/hw/xen/interface/arch-x86/xen-x86_32.h
+++ b/include/hw/xen/interface/arch-x86/xen-x86_32.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * xen-x86_32.h
  *
  * Guest OS interface to x86 32-bit Xen.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2004-2007, K A Fraser
  */
 
diff --git a/include/hw/xen/interface/arch-x86/xen-x86_64.h b/include/hw/xen/interface/arch-x86/xen-x86_64.h
index 40aed14366..5d9035ed22 100644
--- a/include/hw/xen/interface/arch-x86/xen-x86_64.h
+++ b/include/hw/xen/interface/arch-x86/xen-x86_64.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * xen-x86_64.h
  *
  * Guest OS interface to x86 64-bit Xen.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2004-2006, K A Fraser
  */
 
diff --git a/include/hw/xen/interface/arch-x86/xen.h b/include/hw/xen/interface/arch-x86/xen.h
index 7acd94c8eb..c0f4551247 100644
--- a/include/hw/xen/interface/arch-x86/xen.h
+++ b/include/hw/xen/interface/arch-x86/xen.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * arch-x86/xen.h
  *
  * Guest OS interface to x86 Xen.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2004-2006, K A Fraser
  */
 
@@ -320,12 +303,9 @@ struct xen_arch_domainconfig {
     uint32_t misc_flags;
 };
 
-/* Location of online VCPU bitmap. */
-#define XEN_ACPI_CPU_MAP             0xaf00
-#define XEN_ACPI_CPU_MAP_LEN         ((HVM_MAX_VCPUS + 7) / 8)
+/* Max  XEN_X86_* constant. Used for ABI checking. */
+#define XEN_X86_MISC_FLAGS_MAX XEN_X86_MSR_RELAXED
 
-/* GPE0 bit set during CPU hotplug */
-#define XEN_ACPI_GPE0_CPUHP_BIT      2
 #endif
 
 /*
diff --git a/include/hw/xen/interface/event_channel.h b/include/hw/xen/interface/event_channel.h
index 73c9f38ce1..0d91a1c4af 100644
--- a/include/hw/xen/interface/event_channel.h
+++ b/include/hw/xen/interface/event_channel.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * event_channel.h
  *
  * Event channels between domains.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2003-2004, K A Fraser.
  */
 
diff --git a/include/hw/xen/interface/features.h b/include/hw/xen/interface/features.h
index 9ee2f760ef..d2a9175aae 100644
--- a/include/hw/xen/interface/features.h
+++ b/include/hw/xen/interface/features.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * features.h
  *
  * Feature flags, reported by XENVER_get_features.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
  */
 
diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h
index 7934d7b718..1dfa17a6d0 100644
--- a/include/hw/xen/interface/grant_table.h
+++ b/include/hw/xen/interface/grant_table.h
@@ -1,27 +1,10 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * grant_table.h
  *
  * Interface for granting foreign access to page frames, and receiving
  * page-ownership transfers.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2004, K A Fraser
  */
 
diff --git a/include/hw/xen/interface/hvm/hvm_op.h b/include/hw/xen/interface/hvm/hvm_op.h
index 870ec52060..e22adf0319 100644
--- a/include/hw/xen/interface/hvm/hvm_op.h
+++ b/include/hw/xen/interface/hvm/hvm_op.h
@@ -1,22 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2007, Keir Fraser
  */
 
diff --git a/include/hw/xen/interface/hvm/params.h b/include/hw/xen/interface/hvm/params.h
index c9d6e70d7b..a22b4ed45d 100644
--- a/include/hw/xen/interface/hvm/params.h
+++ b/include/hw/xen/interface/hvm/params.h
@@ -1,22 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2007, Keir Fraser
  */
 
diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h
index 4cdba79aba..22f1eef0c0 100644
--- a/include/hw/xen/interface/io/blkif.h
+++ b/include/hw/xen/interface/io/blkif.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * blkif.h
  *
  * Unified block-device I/O interface for Xen guest OSes.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2003-2004, Keir Fraser
  * Copyright (c) 2012, Spectra Logic Corporation
  */
@@ -363,6 +346,14 @@
  *      that the frontend requires that the logical block size is 512 as it
  *      is hardcoded (which is the case in some frontend implementations).
  *
+ * trusted
+ *      Values:         0/1 (boolean)
+ *      Default value:  1
+ *
+ *      A value of "0" indicates that the frontend should not trust the
+ *      backend, and should deploy whatever measures available to protect from
+ *      a malicious backend on the other end.
+ *
  *------------------------- Virtual Device Properties -------------------------
  *
  * device-type
diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h
index 4811f47220..4509b4b689 100644
--- a/include/hw/xen/interface/io/console.h
+++ b/include/hw/xen/interface/io/console.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * console.h
  *
  * Console I/O interface for Xen guest OSes.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2005, Keir Fraser
  */
 
diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h
index cc25aab32e..93c73195d8 100644
--- a/include/hw/xen/interface/io/fbif.h
+++ b/include/hw/xen/interface/io/fbif.h
@@ -1,24 +1,7 @@
+/* SPDX-License-Identifier: MIT */
 /*
  * fbif.h -- Xen virtual frame buffer device
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
  * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
  */
diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h
index a6b01c52c7..4bde6b3821 100644
--- a/include/hw/xen/interface/io/kbdif.h
+++ b/include/hw/xen/interface/io/kbdif.h
@@ -1,24 +1,7 @@
+/* SPDX-License-Identifier: MIT */
 /*
  * kbdif.h -- Xen virtual keyboard/mouse
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
  * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
  */
diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h
index 00dd258712..c13b85061d 100644
--- a/include/hw/xen/interface/io/netif.h
+++ b/include/hw/xen/interface/io/netif.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * netif.h
  *
  * Unified network-device I/O interface for Xen guest OSes.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2003-2004, Keir Fraser
  */
 
@@ -161,6 +144,12 @@
  */
 
 /*
+ * The setting of "trusted" node to "0" in the frontend path signals that the
+ * frontend should not trust the backend, and should deploy whatever measures
+ * available to protect from a malicious backend on the other end.
+ */
+
+/*
  * Control ring
  * ============
  *
diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h
index 52b4de0f81..7815e1ff0f 100644
--- a/include/hw/xen/interface/io/protocols.h
+++ b/include/hw/xen/interface/io/protocols.h
@@ -1,24 +1,7 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * protocols.h
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2008, Keir Fraser
  */
 
diff --git a/include/hw/xen/interface/io/ring.h b/include/hw/xen/interface/io/ring.h
index c486c457e0..025939278b 100644
--- a/include/hw/xen/interface/io/ring.h
+++ b/include/hw/xen/interface/io/ring.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * ring.h
  *
  * Shared producer-consumer ring macros.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Tim Deegan and Andrew Warfield November 2004.
  */
 
@@ -95,9 +78,8 @@ typedef unsigned int RING_IDX;
  * of the shared memory area (PAGE_SIZE, for instance). To initialise
  * the front half:
  *
- *     mytag_front_ring_t front_ring;
- *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
- *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *     mytag_front_ring_t ring;
+ *     XEN_FRONT_RING_INIT(&ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
  *
  * Initializing the back follows similarly (note that only the front
  * initializes the shared ring):
@@ -184,6 +166,11 @@ typedef struct __name##_back_ring __name##_back_ring_t
 
 #define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size)
 
+#define XEN_FRONT_RING_INIT(r, s, size) do {                            \
+    SHARED_RING_INIT(s);                                                \
+    FRONT_RING_INIT(r, s, size);                                        \
+} while (0)
+
 #define BACK_RING_ATTACH(_r, _s, _i, __size) do {                       \
     (_r)->rsp_prod_pvt = (_i);                                          \
     (_r)->req_cons = (_i);                                              \
@@ -208,11 +195,11 @@ typedef struct __name##_back_ring __name##_back_ring_t
     (RING_FREE_REQUESTS(_r) == 0)
 
 /* Test if there are outstanding messages to be processed on a ring. */
-#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
+#define XEN_RING_NR_UNCONSUMED_RESPONSES(_r)                            \
     ((_r)->sring->rsp_prod - (_r)->rsp_cons)
 
 #ifdef __GNUC__
-#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
+#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r) ({                          \
     unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
     unsigned int rsp = RING_SIZE(_r) -                                  \
         ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
@@ -220,13 +207,27 @@ typedef struct __name##_back_ring __name##_back_ring_t
 })
 #else
 /* Same as above, but without the nice GCC ({ ... }) syntax. */
-#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
+#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r)                             \
     ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
       (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
      ((_r)->sring->req_prod - (_r)->req_cons) :                         \
      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
 #endif
 
+#ifdef XEN_RING_HAS_UNCONSUMED_IS_BOOL
+/*
+ * These variants should only be used in case no caller is abusing them for
+ * obtaining the number of unconsumed responses/requests.
+ */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+    (!!XEN_RING_NR_UNCONSUMED_RESPONSES(_r))
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)  \
+    (!!XEN_RING_NR_UNCONSUMED_REQUESTS(_r))
+#else
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) XEN_RING_NR_UNCONSUMED_RESPONSES(_r)
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)  XEN_RING_NR_UNCONSUMED_REQUESTS(_r)
+#endif
+
 /* Direct access to individual ring elements, by index. */
 #define RING_GET_REQUEST(_r, _idx)                                      \
     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h
index c0a552e195..875af0dc7c 100644
--- a/include/hw/xen/interface/io/usbif.h
+++ b/include/hw/xen/interface/io/usbif.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
 /*
  * usbif.h
  *
@@ -5,24 +6,6 @@
  *
  * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
  * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef __XEN_PUBLIC_IO_USBIF_H__
diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h
index 927f9db552..9cd0cd7c67 100644
--- a/include/hw/xen/interface/io/xenbus.h
+++ b/include/hw/xen/interface/io/xenbus.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /*****************************************************************************
  * xenbus.h
  *
  * Xenbus protocol details.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (C) 2005 XenSource Ltd.
  */
 
diff --git a/include/hw/xen/interface/io/xs_wire.h b/include/hw/xen/interface/io/xs_wire.h
index 4dd6632669..04e6849feb 100644
--- a/include/hw/xen/interface/io/xs_wire.h
+++ b/include/hw/xen/interface/io/xs_wire.h
@@ -1,25 +1,8 @@
+/* SPDX-License-Identifier: MIT */
 /*
  * Details of the "wire" protocol between Xen Store Daemon and client
  * library or guest kernel.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (C) 2005 Rusty Russell IBM Corporation
  */
 
@@ -71,11 +54,12 @@ struct xsd_errors
 #ifdef EINVAL
 #define XSD_ERROR(x) { x, #x }
 /* LINTED: static unused */
-static struct xsd_errors xsd_errors[]
+static const struct xsd_errors xsd_errors[]
 #if defined(__GNUC__)
 __attribute__((unused))
 #endif
     = {
+    /* /!\ New errors should be added at the end of the array. */
     XSD_ERROR(EINVAL),
     XSD_ERROR(EACCES),
     XSD_ERROR(EEXIST),
@@ -90,7 +74,8 @@ __attribute__((unused))
     XSD_ERROR(EBUSY),
     XSD_ERROR(EAGAIN),
     XSD_ERROR(EISCONN),
-    XSD_ERROR(E2BIG)
+    XSD_ERROR(E2BIG),
+    XSD_ERROR(EPERM),
 };
 #endif
 
@@ -124,6 +109,7 @@ struct xenstore_domain_interface {
     XENSTORE_RING_IDX rsp_cons, rsp_prod;
     uint32_t server_features; /* Bitmap of features supported by the server */
     uint32_t connection;
+    uint32_t error;
 };
 
 /* Violating this is very bad.  See docs/misc/xenstore.txt. */
@@ -135,10 +121,18 @@ struct xenstore_domain_interface {
 
 /* The ability to reconnect a ring */
 #define XENSTORE_SERVER_FEATURE_RECONNECTION 1
+/* The presence of the "error" field in the ring page */
+#define XENSTORE_SERVER_FEATURE_ERROR        2
 
 /* Valid values for the connection field */
 #define XENSTORE_CONNECTED 0 /* the steady-state */
-#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
+#define XENSTORE_RECONNECT 1 /* reconnect in progress */
+
+/* Valid values for the error field */
+#define XENSTORE_ERROR_NONE    0 /* No error */
+#define XENSTORE_ERROR_COMM    1 /* Communication problem */
+#define XENSTORE_ERROR_RINGIDX 2 /* Invalid ring index */
+#define XENSTORE_ERROR_PROTO   3 /* Protocol violation (payload too long) */
 
 #endif /* _XS_WIRE_H */
 
diff --git a/include/hw/xen/interface/memory.h b/include/hw/xen/interface/memory.h
index 383a9468c3..29cf5c8239 100644
--- a/include/hw/xen/interface/memory.h
+++ b/include/hw/xen/interface/memory.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * memory.h
  *
  * Memory reservation and information.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */
 
@@ -541,12 +524,14 @@ struct xen_mem_sharing_op {
                 uint32_t gref;     /* IN: gref to debug         */
             } u;
         } debug;
-        struct mem_sharing_op_fork {      /* OP_FORK */
+        struct mem_sharing_op_fork {      /* OP_FORK{,_RESET} */
             domid_t parent_domain;        /* IN: parent's domain id */
 /* Only makes sense for short-lived forks */
 #define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
 /* Only makes sense for short-lived forks */
 #define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
+#define XENMEM_FORK_RESET_STATE        (1u << 2)
+#define XENMEM_FORK_RESET_MEMORY       (1u << 3)
             uint16_t flags;               /* IN: optional settings */
             uint32_t pad;                 /* Must be set to 0 */
         } fork;
@@ -662,6 +647,13 @@ struct xen_mem_acquire_resource {
      * two calls.
      */
     uint32_t nr_frames;
+    /*
+     * Padding field, must be zero on input.
+     * In a previous version this was an output field with the lowest bit
+     * named XENMEM_rsrc_acq_caller_owned. Future versions of this interface
+     * will not reuse this bit as an output with the field being zero on
+     * input.
+     */
     uint32_t pad;
     /*
      * IN - the index of the initial frame to be mapped. This parameter
diff --git a/include/hw/xen/interface/physdev.h b/include/hw/xen/interface/physdev.h
index d271766ad0..f0c0d4727c 100644
--- a/include/hw/xen/interface/physdev.h
+++ b/include/hw/xen/interface/physdev.h
@@ -1,22 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2006, Keir Fraser
  */
 
@@ -211,8 +194,8 @@ struct physdev_manage_pci_ext {
     /* IN */
     uint8_t bus;
     uint8_t devfn;
-    unsigned is_extfn;
-    unsigned is_virtfn;
+    uint32_t is_extfn;
+    uint32_t is_virtfn;
     struct {
         uint8_t bus;
         uint8_t devfn;
diff --git a/include/hw/xen/interface/sched.h b/include/hw/xen/interface/sched.h
index 811bd87c82..b4362c6a1d 100644
--- a/include/hw/xen/interface/sched.h
+++ b/include/hw/xen/interface/sched.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * sched.h
  *
  * Scheduler state interactions
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */
 
diff --git a/include/hw/xen/interface/trace.h b/include/hw/xen/interface/trace.h
index d5fa4aea8d..62a179971d 100644
--- a/include/hw/xen/interface/trace.h
+++ b/include/hw/xen/interface/trace.h
@@ -1,24 +1,7 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * include/public/trace.h
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Mark Williamson, (C) 2004 Intel Research Cambridge
  * Copyright (C) 2005 Bin Ren
  */
diff --git a/include/hw/xen/interface/vcpu.h b/include/hw/xen/interface/vcpu.h
index 3623af932f..81a3b3a743 100644
--- a/include/hw/xen/interface/vcpu.h
+++ b/include/hw/xen/interface/vcpu.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * vcpu.h
  *
  * VCPU initialisation, query, and hotplug.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */
 
diff --git a/include/hw/xen/interface/version.h b/include/hw/xen/interface/version.h
index 17a81e23cd..9c78b4f3b6 100644
--- a/include/hw/xen/interface/version.h
+++ b/include/hw/xen/interface/version.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * version.h
  *
  * Xen version, type, and compile information.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */
diff --git a/include/hw/xen/interface/xen-compat.h b/include/hw/xen/interface/xen-compat.h
index e1c027a95c..97fe698498 100644
--- a/include/hw/xen/interface/xen-compat.h
+++ b/include/hw/xen/interface/xen-compat.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * xen-compat.h
  *
  * Guest OS interface to Xen.  Compatibility layer.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2006, Christian Limpach
  */
 
diff --git a/include/hw/xen/interface/xen.h b/include/hw/xen/interface/xen.h
index e373592c33..920567e006 100644
--- a/include/hw/xen/interface/xen.h
+++ b/include/hw/xen/interface/xen.h
@@ -1,26 +1,9 @@
+/* SPDX-License-Identifier: MIT */
 /******************************************************************************
  * xen.h
  *
  * Guest OS interface to Xen.
  *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
  * Copyright (c) 2004, K A Fraser
  */
 
diff --git a/include/hw/xen/xen-backend.h b/include/hw/xen/xen-backend.h
index aac2fd454d..0f01631ae7 100644
--- a/include/hw/xen/xen-backend.h
+++ b/include/hw/xen/xen-backend.h
@@ -33,6 +33,7 @@ XenDevice *xen_backend_get_device(XenBackendInstance *backend);
 void xen_backend_register(const XenBackendInfo *info);
 const char **xen_backend_get_types(unsigned int *nr);
 
+bool xen_backend_exists(const char *type, const char *name);
 void xen_backend_device_create(XenBus *xenbus, const char *type,
                                const char *name, QDict *opts, Error **errp);
 bool xen_backend_try_device_destroy(XenDevice *xendev, Error **errp);
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index f435898164..334ddd1ff6 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -33,6 +33,7 @@ struct XenDevice {
 };
 typedef struct XenDevice XenDevice;
 
+typedef char *(*XenDeviceGetFrontendPath)(XenDevice *xendev, Error **errp);
 typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp);
 typedef void (*XenDeviceRealize)(XenDevice *xendev, Error **errp);
 typedef void (*XenDeviceFrontendChanged)(XenDevice *xendev,
@@ -46,6 +47,7 @@ struct XenDeviceClass {
     /*< public >*/
     const char *backend;
     const char *device;
+    XenDeviceGetFrontendPath get_frontend_path;
     XenDeviceGetName get_name;
     XenDeviceRealize realize;
     XenDeviceFrontendChanged frontend_changed;
@@ -73,7 +75,7 @@ struct XenBusClass {
 OBJECT_DECLARE_TYPE(XenBus, XenBusClass,
                     XEN_BUS)
 
-void xen_bus_init(void);
+BusState *xen_bus_init(void);
 
 void xen_device_backend_set_state(XenDevice *xendev,
                                   enum xenbus_state state);
@@ -129,5 +131,6 @@ void xen_device_notify_event_channel(XenDevice *xendev,
 void xen_device_unbind_event_channel(XenDevice *xendev,
                                      XenEventChannel *channel,
                                      Error **errp);
+unsigned int xen_event_channel_get_local_port(XenEventChannel *channel);
 
 #endif /* HW_XEN_BUS_H */
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index 6c307c5f2c..fc42146bc2 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -81,7 +81,6 @@ extern struct XenDevOps xen_usb_ops;          /* xen-usb.c         */
 
 /* configuration (aka xenbus setup) */
 void xen_config_cleanup(void);
-int xen_config_dev_blk(DriveInfo *disk);
 int xen_config_dev_nic(NICInfo *nic);
 int xen_config_dev_vfb(int vdev, const char *type);
 int xen_config_dev_vkbd(int vdev);
diff --git a/include/qemu/range.h b/include/qemu/range.h
index 7e2b1cc447..205e1da76d 100644
--- a/include/qemu/range.h
+++ b/include/qemu/range.h
@@ -217,6 +217,20 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
     return !(last2 < first1 || last1 < first2);
 }
 
+/*
+ * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
+ * Both @a and @b must not be empty.
+ */
+int range_compare(Range *a, Range *b);
+
 GList *range_list_insert(GList *list, Range *data);
 
+/*
+ * Inverse an array of sorted ranges over the [low, high] span, ie.
+ * original ranges becomes holes in the newly allocated inv_ranges
+ */
+void range_inverse_array(GList *in_ranges,
+                         GList **out_ranges,
+                         uint64_t low, uint64_t high);
+
 #endif
diff --git a/include/qemu/reserved-region.h b/include/qemu/reserved-region.h
new file mode 100644
index 0000000000..8e6f0a97e2
--- /dev/null
+++ b/include/qemu/reserved-region.h
@@ -0,0 +1,32 @@
+/*
+ * QEMU ReservedRegion helpers
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_RESERVED_REGION_H
+#define QEMU_RESERVED_REGION_H
+
+#include "exec/memory.h"
+
+/*
+ * Insert a new region into a sorted list of reserved regions. In case
+ * there is overlap with existing regions, the new added region has
+ * higher priority and replaces the overlapped segment.
+ */
+GList *resv_region_list_insert(GList *list, ReservedRegion *reg);
+
+#endif
diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
index e24a1099e4..869f84af09 100644
--- a/include/qemu/uuid.h
+++ b/include/qemu/uuid.h
@@ -78,9 +78,10 @@ typedef struct {
                  "%02hhx%02hhx-" \
                  "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
 
-#define UUID_FMT_LEN 36
-
 #define UUID_NONE "00000000-0000-0000-0000-000000000000"
+QEMU_BUILD_BUG_ON(sizeof(UUID_NONE) - 1 != 36);
+
+#define UUID_STR_LEN sizeof(UUID_NONE)
 
 void qemu_uuid_generate(QemuUUID *out);
 
diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h
index 595abfbe40..961c702c4e 100644
--- a/include/sysemu/kvm_xen.h
+++ b/include/sysemu/kvm_xen.h
@@ -22,6 +22,7 @@
 int kvm_xen_soft_reset(void);
 uint32_t kvm_xen_get_caps(void);
 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
+bool kvm_xen_has_vcpu_callback_vector(void);
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
 void kvm_xen_set_callback_asserted(void);
 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
index 677aea6dd1..2d932a515e 100644
--- a/include/tcg/tcg-op-common.h
+++ b/include/tcg/tcg-op-common.h
@@ -12,236 +12,25 @@
 #include "exec/helper-proto-common.h"
 #include "exec/helper-gen-common.h"
 
-/* Basic output routines.  Not for general consumption.  */
-
-void tcg_gen_op1(TCGOpcode, TCGArg);
-void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
-void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
-void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
-void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
-void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
-
-void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
-void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
-void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
-
-static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
-{
-    tcg_gen_op1(opc, tcgv_i32_arg(a1));
-}
-
-static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
-{
-    tcg_gen_op1(opc, tcgv_i64_arg(a1));
-}
-
-static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
-{
-    tcg_gen_op1(opc, a1);
-}
-
-static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
-{
-    tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
-}
-
-static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2)
-{
-    tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
-}
-
-static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2)
-{
-    tcg_gen_op2(opc, tcgv_i32_arg(a1), a2);
-}
-
-static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2)
-{
-    tcg_gen_op2(opc, tcgv_i64_arg(a1), a2);
-}
-
-static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2)
-{
-    tcg_gen_op2(opc, a1, a2);
-}
-
-static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1,
-                                   TCGv_i32 a2, TCGv_i32 a3)
-{
-    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3));
-}
-
-static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1,
-                                   TCGv_i64 a2, TCGv_i64 a3)
-{
-    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3));
-}
-
-static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1,
-                                    TCGv_i32 a2, TCGArg a3)
-{
-    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
-}
-
-static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1,
-                                    TCGv_i64 a2, TCGArg a3)
-{
-    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
-}
-
-static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
-                                       TCGv_ptr base, TCGArg offset)
-{
-    tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset);
-}
-
-static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
-                                       TCGv_ptr base, TCGArg offset)
-{
-    tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset);
-}
-
-static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                   TCGv_i32 a3, TCGv_i32 a4)
-{
-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), tcgv_i32_arg(a4));
-}
-
-static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                   TCGv_i64 a3, TCGv_i64 a4)
-{
-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), tcgv_i64_arg(a4));
-}
-
-static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                    TCGv_i32 a3, TCGArg a4)
-{
-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), a4);
-}
-
-static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                    TCGv_i64 a3, TCGArg a4)
-{
-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), a4);
-}
-
-static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                     TCGArg a3, TCGArg a4)
-{
-    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
-}
-
-static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                     TCGArg a3, TCGArg a4)
-{
-    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
-}
-
-static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                   TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5)
-{
-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5));
-}
-
-static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                   TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5)
-{
-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5));
-}
-
-static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                    TCGv_i32 a3, TCGv_i32 a4, TCGArg a5)
-{
-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5);
-}
-
-static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                    TCGv_i64 a3, TCGv_i64 a4, TCGArg a5)
-{
-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5);
-}
-
-static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                     TCGv_i32 a3, TCGArg a4, TCGArg a5)
-{
-    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), a4, a5);
-}
-
-static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                     TCGv_i64 a3, TCGArg a4, TCGArg a5)
-{
-    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), a4, a5);
-}
-
-static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                   TCGv_i32 a3, TCGv_i32 a4,
-                                   TCGv_i32 a5, TCGv_i32 a6)
-{
-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5),
-                tcgv_i32_arg(a6));
-}
-
-static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                   TCGv_i64 a3, TCGv_i64 a4,
-                                   TCGv_i64 a5, TCGv_i64 a6)
-{
-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5),
-                tcgv_i64_arg(a6));
-}
-
-static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                    TCGv_i32 a3, TCGv_i32 a4,
-                                    TCGv_i32 a5, TCGArg a6)
-{
-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6);
-}
-
-static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                    TCGv_i64 a3, TCGv_i64 a4,
-                                    TCGv_i64 a5, TCGArg a6)
-{
-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6);
-}
-
-static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
-                                     TCGv_i32 a3, TCGv_i32 a4,
-                                     TCGArg a5, TCGArg a6)
-{
-    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
-                tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
-}
-
-static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
-                                     TCGv_i64 a3, TCGv_i64 a4,
-                                     TCGArg a5, TCGArg a6)
-{
-    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
-                tcgv_i64_arg(a3), tcgv_i64_arg(a4), a5, a6);
-}
-
+TCGv_i32 tcg_constant_i32(int32_t val);
+TCGv_i64 tcg_constant_i64(int64_t val);
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
+
+TCGv_i32 tcg_temp_new_i32(void);
+TCGv_i64 tcg_temp_new_i64(void);
+TCGv_ptr tcg_temp_new_ptr(void);
+TCGv_i128 tcg_temp_new_i128(void);
+TCGv_vec tcg_temp_new_vec(TCGType type);
+TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
+
+TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name);
+TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name);
+TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name);
 
 /* Generic ops.  */
 
-static inline void gen_set_label(TCGLabel *l)
-{
-    l->present = 1;
-    tcg_gen_op1(INDEX_op_set_label, label_arg(l));
-}
-
+void gen_set_label(TCGLabel *l);
 void tcg_gen_br(TCGLabel *l);
 void tcg_gen_mb(TCGBar);
 
@@ -285,16 +74,8 @@ void tcg_gen_goto_tb(unsigned idx);
  */
 void tcg_gen_lookup_and_goto_ptr(void);
 
-static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type,
-                                           unsigned wr)
-{
-    tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr);
-}
-
-static inline void tcg_gen_plugin_cb_end(void)
-{
-    tcg_emit_op(INDEX_op_plugin_cb_end, 0);
-}
+void tcg_gen_plugin_cb_start(unsigned from, unsigned type, unsigned wr);
+void tcg_gen_plugin_cb_end(void);
 
 /* 32 bit ops */
 
@@ -374,128 +155,30 @@ void tcg_gen_abs_i32(TCGv_i32, TCGv_i32);
 /* Replicate a value of size @vece from @in to all the lanes in @out */
 void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in);
 
-static inline void tcg_gen_discard_i32(TCGv_i32 arg)
-{
-    tcg_gen_op1_i32(INDEX_op_discard, arg);
-}
-
-static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
-{
-    if (ret != arg) {
-        tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg);
-    }
-}
-
-static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2,
-                                     tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2,
-                                     tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2,
-                                  tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset);
-}
-
-static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2,
-                                   tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2,
-                                  tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg)
-{
-    if (TCG_TARGET_HAS_neg_i32) {
-        tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg);
-    } else {
-        tcg_gen_subfi_i32(ret, 0, arg);
-    }
-}
-
-static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
-{
-    if (TCG_TARGET_HAS_not_i32) {
-        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg);
-    } else {
-        tcg_gen_xori_i32(ret, arg, -1);
-    }
-}
+void tcg_gen_discard_i32(TCGv_i32 arg);
+void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg);
+
+void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+
+void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset);
+
+void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg);
 
 /* 64 bit ops */
 
@@ -580,130 +263,6 @@ void tcg_gen_abs_i64(TCGv_i64, TCGv_i64);
 /* Replicate a value of size @vece from @in to all the lanes in @out */
 void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in);
 
-#if TCG_TARGET_REG_BITS == 64
-static inline void tcg_gen_discard_i64(TCGv_i64 arg)
-{
-    tcg_gen_op1_i64(INDEX_op_discard, arg);
-}
-
-static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
-{
-    if (ret != arg) {
-        tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg);
-    }
-}
-
-static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                     tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                     tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                     tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                     tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2,
-                                  tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset);
-}
-
-static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
-                                   tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
-                                    tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2,
-                                  tcg_target_long offset)
-{
-    tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset);
-}
-
-static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2);
-}
-
-static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
-{
-    tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2);
-}
-#else /* TCG_TARGET_REG_BITS == 32 */
 void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
 void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
 void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
@@ -728,16 +287,8 @@ void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
-#endif /* TCG_TARGET_REG_BITS */
+void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg);
 
-static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
-{
-    if (TCG_TARGET_HAS_neg_i64) {
-        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg);
-    } else {
-        tcg_gen_subfi_i64(ret, 0, arg);
-    }
-}
 
 /* Size changing operations.  */
 
@@ -748,19 +299,17 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg);
 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
+void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi);
 
-void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
 void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
 void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);
 
+/* 128 bit ops */
+
+void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
 void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset);
 void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset);
 
-static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
-{
-    tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
-}
-
 /* Local load/store bit ops */
 
 void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
@@ -926,6 +475,9 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 # define NAT  TCGv_i64
 #endif
 
+TCGv_ptr tcg_constant_ptr_int(intptr_t x);
+#define tcg_constant_ptr(X)  tcg_constant_ptr_int((intptr_t)(X))
+
 static inline void tcg_gen_ld_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o)
 {
     glue(tcg_gen_ld_,PTR)((NAT)r, a, o);
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 6eff3d9106..b80227fa1c 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -47,7 +47,7 @@ DEF(mb, 0, 0, 1, 0)
 DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 DEF(setcond_i32, 1, 2, 1, 0)
 DEF(negsetcond_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_negsetcond_i32))
-DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
+DEF(movcond_i32, 1, 4, 1, 0)
 /* load/store */
 DEF(ld8u_i32, 1, 1, 1, 0)
 DEF(ld8s_i32, 1, 1, 1, 0)
@@ -100,7 +100,7 @@ DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
 DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
 DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
 DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
-DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))
+DEF(neg_i32, 1, 1, 0, 0)
 DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
 DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
 DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
@@ -113,7 +113,7 @@ DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
 DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
 DEF(setcond_i64, 1, 2, 1, IMPL64)
 DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
-DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
+DEF(movcond_i64, 1, 4, 1, IMPL64)
 /* load/store */
 DEF(ld8u_i64, 1, 1, 1, IMPL64)
 DEF(ld8s_i64, 1, 1, 1, IMPL64)
@@ -171,7 +171,7 @@ DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
 DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
 DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
 DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
-DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))
+DEF(neg_i64, 1, 1, 0, IMPL64)
 DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
 DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
 DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
index dded2917e5..44192c55a9 100644
--- a/include/tcg/tcg-temp-internal.h
+++ b/include/tcg/tcg-temp-internal.h
@@ -31,53 +31,15 @@
 
 void tcg_temp_free_internal(TCGTemp *);
 
-static inline void tcg_temp_free_i32(TCGv_i32 arg)
-{
-    tcg_temp_free_internal(tcgv_i32_temp(arg));
-}
-
-static inline void tcg_temp_free_i64(TCGv_i64 arg)
-{
-    tcg_temp_free_internal(tcgv_i64_temp(arg));
-}
-
-static inline void tcg_temp_free_i128(TCGv_i128 arg)
-{
-    tcg_temp_free_internal(tcgv_i128_temp(arg));
-}
-
-static inline void tcg_temp_free_ptr(TCGv_ptr arg)
-{
-    tcg_temp_free_internal(tcgv_ptr_temp(arg));
-}
-
-static inline void tcg_temp_free_vec(TCGv_vec arg)
-{
-    tcg_temp_free_internal(tcgv_vec_temp(arg));
-}
-
-static inline TCGv_i32 tcg_temp_ebb_new_i32(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB);
-    return temp_tcgv_i32(t);
-}
-
-static inline TCGv_i64 tcg_temp_ebb_new_i64(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB);
-    return temp_tcgv_i64(t);
-}
-
-static inline TCGv_i128 tcg_temp_ebb_new_i128(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB);
-    return temp_tcgv_i128(t);
-}
-
-static inline TCGv_ptr tcg_temp_ebb_new_ptr(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB);
-    return temp_tcgv_ptr(t);
-}
+void tcg_temp_free_i32(TCGv_i32 arg);
+void tcg_temp_free_i64(TCGv_i64 arg);
+void tcg_temp_free_i128(TCGv_i128 arg);
+void tcg_temp_free_ptr(TCGv_ptr arg);
+void tcg_temp_free_vec(TCGv_vec arg);
+
+TCGv_i32 tcg_temp_ebb_new_i32(void);
+TCGv_i64 tcg_temp_ebb_new_i64(void);
+TCGv_ptr tcg_temp_ebb_new_ptr(void);
+TCGv_i128 tcg_temp_ebb_new_i128(void);
 
 #endif /* TCG_TEMP_FREE_H */
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index a9282cdcc6..daf2a5bf9e 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -82,7 +82,6 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_bswap16_i64      0
 #define TCG_TARGET_HAS_bswap32_i64      0
 #define TCG_TARGET_HAS_bswap64_i64      0
-#define TCG_TARGET_HAS_neg_i64          0
 #define TCG_TARGET_HAS_not_i64          0
 #define TCG_TARGET_HAS_andc_i64         0
 #define TCG_TARGET_HAS_orc_i64          0
@@ -96,7 +95,6 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_extract_i64      0
 #define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_movcond_i64      0
 #define TCG_TARGET_HAS_negsetcond_i64   0
 #define TCG_TARGET_HAS_add2_i64         0
 #define TCG_TARGET_HAS_sub2_i64         0
@@ -795,60 +793,6 @@ void tb_target_set_jmp_target(const TranslationBlock *, int,
 
 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
 
-TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr,
-                                     intptr_t, const char *);
-TCGTemp *tcg_temp_new_internal(TCGType, TCGTempKind);
-TCGv_vec tcg_temp_new_vec(TCGType type);
-TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
-
-static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
-                                              const char *name)
-{
-    TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
-    return temp_tcgv_i32(t);
-}
-
-static inline TCGv_i32 tcg_temp_new_i32(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB);
-    return temp_tcgv_i32(t);
-}
-
-static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset,
-                                              const char *name)
-{
-    TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
-    return temp_tcgv_i64(t);
-}
-
-static inline TCGv_i64 tcg_temp_new_i64(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB);
-    return temp_tcgv_i64(t);
-}
-
-static inline TCGv_i128 tcg_temp_new_i128(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB);
-    return temp_tcgv_i128(t);
-}
-
-static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset,
-                                              const char *name)
-{
-    TCGTemp *t = tcg_global_mem_new_internal(TCG_TYPE_PTR, reg, offset, name);
-    return temp_tcgv_ptr(t);
-}
-
-static inline TCGv_ptr tcg_temp_new_ptr(void)
-{
-    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB);
-    return temp_tcgv_ptr(t);
-}
-
-void tcg_dump_info(GString *buf);
-void tcg_dump_op_count(GString *buf);
-
 #define TCG_CT_CONST  1 /* any constant of register size */
 
 typedef struct TCGArgConstraint {
@@ -937,32 +881,6 @@ void tcg_remove_ops_after(TCGOp *op);
 
 void tcg_optimize(TCGContext *s);
 
-/*
- * Locate or create a read-only temporary that is a constant.
- * This kind of temporary need not be freed, but for convenience
- * will be silently ignored by tcg_temp_free_*.
- */
-TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
-
-static inline TCGv_i32 tcg_constant_i32(int32_t val)
-{
-    return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
-}
-
-static inline TCGv_i64 tcg_constant_i64(int64_t val)
-{
-    return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
-}
-
-TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
-TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
-
-#if UINTPTR_MAX == UINT32_MAX
-# define tcg_constant_ptr(x)     ((TCGv_ptr)tcg_constant_i32((intptr_t)(x)))
-#else
-# define tcg_constant_ptr(x)     ((TCGv_ptr)tcg_constant_i64((intptr_t)(x)))
-#endif
-
 TCGLabel *gen_new_label(void);
 
 /**
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 02ffb51e99..3a899b0608 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -782,6 +782,11 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
                              "Error not from zero copy");
             return -1;
         }
+        if (serr->ee_data < serr->ee_info) {
+            error_setg_errno(errp, serr->ee_origin,
+                             "Wrong notification bounds");
+            return -1;
+        }
 
         /* No errors, count successfully finished sendmsg()*/
         sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c
index 8ab1335106..d5232f37fe 100644
--- a/linux-user/hppa/cpu_loop.c
+++ b/linux-user/hppa/cpu_loop.c
@@ -147,12 +147,10 @@ void cpu_loop(CPUHPPAState *env)
             force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, env->iaoq_f);
             break;
         case EXCP_ILL:
-            EXCP_DUMP(env, "qemu: EXCP_ILL exception %#x\n", trapnr);
             force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPC, env->iaoq_f);
             break;
         case EXCP_PRIV_OPR:
             /* check for glibc ABORT_INSTRUCTION "iitlbp %r0,(%sr0, %r0)" */
-            EXCP_DUMP(env, "qemu: EXCP_PRIV_OPR exception %#x\n", trapnr);
             if (env->cr[CR_IIR] == 0x04000000) {
 		    force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPC, env->iaoq_f);
             } else {
@@ -160,7 +158,6 @@ void cpu_loop(CPUHPPAState *env)
             }
             break;
         case EXCP_PRIV_REG:
-            EXCP_DUMP(env, "qemu: EXCP_PRIV_REG exception %#x\n", trapnr);
             force_sig_fault(TARGET_SIGILL, TARGET_ILL_PRVREG, env->iaoq_f);
             break;
         case EXCP_OVERFLOW:
@@ -173,7 +170,6 @@ void cpu_loop(CPUHPPAState *env)
             force_sig_fault(TARGET_SIGFPE, 0, env->iaoq_f);
             break;
         case EXCP_BREAK:
-            EXCP_DUMP(env, "qemu: EXCP_BREAK exception %#x\n", trapnr);
             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->iaoq_f & ~3);
             break;
         case EXCP_DEBUG:
diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index 17920e9ceb..d08a97dae6 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -86,7 +86,7 @@ static void setup_sigcontext(struct target_sigcontext *sc, CPUArchState *env)
 
 static void restore_sigcontext(CPUArchState *env, struct target_sigcontext *sc)
 {
-    target_ulong psw;
+    abi_ulong psw;
     int i;
 
     __get_user(psw, &sc->sc_gr[0]);
@@ -150,10 +150,10 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     haddr = ka->_sa_handler;
     if (haddr & 2) {
         /* Function descriptor.  */
-        target_ulong *fdesc, dest;
+        abi_ptr *fdesc, dest;
 
         haddr &= -4;
-        fdesc = lock_user(VERIFY_READ, haddr, 2 * sizeof(target_ulong), 1);
+        fdesc = lock_user(VERIFY_READ, haddr, 2 * sizeof(abi_ptr), 1);
         if (!fdesc) {
             goto give_sigsegv;
         }
diff --git a/linux-user/hppa/target_elf.h b/linux-user/hppa/target_elf.h
index 82b4e9535e..19cae8bd65 100644
--- a/linux-user/hppa/target_elf.h
+++ b/linux-user/hppa/target_elf.h
@@ -9,6 +9,6 @@
 #define HPPA_TARGET_ELF_H
 static inline const char *cpu_get_model(uint32_t eflags)
 {
-    return "any";
+    return "hppa";
 }
 #endif
diff --git a/linux-user/riscv/target_elf.h b/linux-user/riscv/target_elf.h
index 9dd65652ee..dedd5956f3 100644
--- a/linux-user/riscv/target_elf.h
+++ b/linux-user/riscv/target_elf.h
@@ -9,7 +9,6 @@
 #define RISCV_TARGET_ELF_H
 static inline const char *cpu_get_model(uint32_t eflags)
 {
-    /* TYPE_RISCV_CPU_ANY */
-    return "any";
+    return "max";
 }
 #endif
diff --git a/meson.build b/meson.build
index dcef8b1e79..51a51075db 100644
--- a/meson.build
+++ b/meson.build
@@ -1323,6 +1323,30 @@ if not get_option('glusterfs').auto() or have_block
   endif
 endif
 
+hv_balloon = false
+if get_option('hv_balloon').allowed() and have_system
+  if cc.links('''
+    #include <string.h>
+    #include <gmodule.h>
+    int main(void) {
+        GTree *tree;
+
+        tree = g_tree_new((GCompareFunc)strcmp);
+        (void)g_tree_node_first(tree);
+        g_tree_destroy(tree);
+        return 0;
+    }
+  ''', dependencies: glib)
+    hv_balloon = true
+  else
+    if get_option('hv_balloon').enabled()
+      error('could not enable hv-balloon, update your glib')
+    else
+      warning('could not find glib support for hv-balloon, disabling')
+    endif
+  endif
+endif
+
 libssh = not_found
 if not get_option('libssh').auto() or have_block
   libssh = dependency('libssh', version: '>=0.8.7',
@@ -2855,7 +2879,8 @@ host_kconfig = \
   (targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
   (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
   (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
-  (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : [])
+  (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + \
+  (hv_balloon ? ['CONFIG_HV_BALLOON_POSSIBLE=y'] : [])
 
 ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
 
@@ -4321,6 +4346,7 @@ if targetos == 'windows'
 endif
 summary_info += {'seccomp support':   seccomp}
 summary_info += {'GlusterFS support': glusterfs}
+summary_info += {'hv-balloon support': hv_balloon}
 summary_info += {'TPM support':       have_tpm}
 summary_info += {'libssh support':    libssh}
 summary_info += {'lzo support':       lzo}
diff --git a/meson_options.txt b/meson_options.txt
index 3c7398f3c6..5c212fcd45 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -150,6 +150,8 @@ option('gio', type : 'feature', value : 'auto',
        description: 'use libgio for D-Bus support')
 option('glusterfs', type : 'feature', value : 'auto',
        description: 'Glusterfs block device driver')
+option('hv_balloon', type : 'feature', value : 'auto',
+       description: 'hv-balloon driver (requires Glib 2.68+ GTree API)')
 option('libdw', type : 'feature', value : 'auto',
        description: 'debuginfo support')
 option('libiscsi', type : 'feature', value : 'auto',
diff --git a/migration/savevm.c b/migration/savevm.c
index bc98c2ea6f..eec5503a42 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -471,8 +471,8 @@ static bool vmstate_uuid_needed(void *opaque)
 static int vmstate_uuid_post_load(void *opaque, int version_id)
 {
     SaveState *state = opaque;
-    char uuid_src[UUID_FMT_LEN + 1];
-    char uuid_dst[UUID_FMT_LEN + 1];
+    char uuid_src[UUID_STR_LEN];
+    char uuid_dst[UUID_STR_LEN];
 
     if (!qemu_uuid_set) {
         /*
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 941f87815a..01ede1babd 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -315,6 +315,7 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
     [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
     [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
     [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_HV_BALLOON_STATUS_REPORT] = { 1000 * SCALE_MS },
 };
 
 /*
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 99961256f2..ca390c5700 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3528,16 +3528,20 @@
 # @pass-discard-other: whether discard requests for the data source
 #     should be issued on other occasions where a cluster gets freed
 #
-# @discard-no-unref: when enabled, discards from the guest will not
-#     cause cluster allocations to be relinquished.  This prevents
-#     qcow2 fragmentation that would be caused by such discards.
-#     Besides potential performance degradation, such fragmentation
-#     can lead to increased allocation of clusters past the end of the
-#     image file, resulting in image files whose file length can grow
-#     much larger than their guest disk size would suggest.  If image
-#     file length is of concern (e.g. when storing qcow2 images
-#     directly on block devices), you should consider enabling this
-#     option.  (since 8.1)
+# @discard-no-unref: when enabled, data clusters will remain
+#     preallocated when they are no longer used, e.g. because they are
+#     discarded or converted to zero clusters.  As usual, whether the
+#     old data is discarded or kept on the protocol level (i.e. in the
+#     image file) depends on the setting of the pass-discard-request
+#     option.  Keeping the clusters preallocated prevents qcow2
+#     fragmentation that would otherwise be caused by freeing and
+#     re-allocating them later.  Besides potential performance
+#     degradation, such fragmentation can lead to increased allocation
+#     of clusters past the end of the image file, resulting in image
+#     files whose file length can grow much larger than their guest disk
+#     size would suggest.  If image file length is of concern (e.g. when
+#     storing qcow2 images directly on block devices), you should
+#     consider enabling this option.  (since 8.1)
 #
 # @overlap-check: which overlap checks to perform for writes to the
 #     image, defaults to 'cached' (since 2.2)
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index c8d7d9868d..7b7149f81c 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -231,7 +231,8 @@
   'if': { 'any': [ 'TARGET_S390X',
                    'TARGET_I386',
                    'TARGET_ARM',
-                   'TARGET_LOONGARCH64' ] } }
+                   'TARGET_LOONGARCH64',
+                   'TARGET_RISCV' ] } }
 
 ##
 # @query-cpu-model-expansion:
@@ -277,7 +278,8 @@
   'if': { 'any': [ 'TARGET_S390X',
                    'TARGET_I386',
                    'TARGET_ARM',
-                   'TARGET_LOONGARCH64' ] } }
+                   'TARGET_LOONGARCH64',
+                   'TARGET_RISCV' ] } }
 
 ##
 # @CpuDefinitionInfo:
diff --git a/qapi/machine.json b/qapi/machine.json
index 6c9d2f6dcf..b6d634b30d 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1138,6 +1138,68 @@
   'data': { 'actual': 'int' } }
 
 ##
+# @HvBalloonInfo:
+#
+# hv-balloon guest-provided memory status information.
+#
+# @committed: the amount of memory in use inside the guest plus the
+#     amount of the memory unusable inside the guest (ballooned out,
+#     offline, etc.)
+#
+# @available: the amount of the memory inside the guest available for
+#     new allocations ("free")
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonInfo',
+  'data': { 'committed': 'size', 'available': 'size' } }
+
+##
+# @query-hv-balloon-status-report:
+#
+# Returns the hv-balloon driver data contained in the last received "STATUS"
+# message from the guest.
+#
+# Returns:
+# - @HvBalloonInfo on success
+# - If no hv-balloon device is present, guest memory status reporting
+#   is not enabled or no guest memory status report received yet,
+#   GenericError
+#
+# Since: 8.2
+#
+# Example:
+#
+# -> { "execute": "query-hv-balloon-status-report" }
+# <- { "return": {
+#          "committed": 816640000,
+#          "available": 3333054464
+#       }
+#    }
+##
+{ 'command': 'query-hv-balloon-status-report', 'returns': 'HvBalloonInfo' }
+
+##
+# @HV_BALLOON_STATUS_REPORT:
+#
+# Emitted when the hv-balloon driver receives a "STATUS" message from
+# the guest.
+#
+# Note: this event is rate-limited.
+#
+# Since: 8.2
+#
+# Example:
+#
+# <- { "event": "HV_BALLOON_STATUS_REPORT",
+#      "data": { "committed": 816640000, "available": 3333054464 },
+#      "timestamp": { "seconds": 1600295492, "microseconds": 661044 } }
+#
+##
+{ 'event': 'HV_BALLOON_STATUS_REPORT',
+  'data': 'HvBalloonInfo' }
+
+##
 # @MemoryInfo:
 #
 # Actual memory information in bytes.
@@ -1290,6 +1352,29 @@
 }
 
 ##
+# @HvBalloonDeviceInfo:
+#
+# hv-balloon provided memory state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @max-size: the maximum size of memory that the device can provide
+#
+# @memdev: memory backend linked with device
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonDeviceInfo',
+  'data': { '*id': 'str',
+            '*memaddr': 'size',
+            'max-size': 'size',
+            '*memdev': 'str'
+          }
+}
+
+##
 # @MemoryDeviceInfoKind:
 #
 # @nvdimm: since 2.12
@@ -1300,10 +1385,13 @@
 #
 # @sgx-epc: since 6.2.
 #
+# @hv-balloon: since 8.2.
+#
 # Since: 2.1
 ##
 { 'enum': 'MemoryDeviceInfoKind',
-  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc' ] }
+  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc',
+            'hv-balloon' ] }
 
 ##
 # @PCDIMMDeviceInfoWrapper:
@@ -1338,6 +1426,14 @@
   'data': { 'data': 'SgxEPCDeviceInfo' } }
 
 ##
+# @HvBalloonDeviceInfoWrapper:
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonDeviceInfoWrapper',
+  'data': { 'data': 'HvBalloonDeviceInfo' } }
+
+##
 # @MemoryDeviceInfo:
 #
 # Union containing information about a memory device
@@ -1351,7 +1447,8 @@
             'nvdimm': 'PCDIMMDeviceInfoWrapper',
             'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper',
             'virtio-mem': 'VirtioMEMDeviceInfoWrapper',
-            'sgx-epc': 'SgxEPCDeviceInfoWrapper'
+            'sgx-epc': 'SgxEPCDeviceInfoWrapper',
+            'hv-balloon': 'HvBalloonDeviceInfoWrapper'
           }
 }
 
diff --git a/qapi/ui.json b/qapi/ui.json
index 006616aa77..3718d40fcf 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1409,13 +1409,18 @@
 #     codes match their position on non-Mac keyboards and you can use
 #     Meta/Super and Alt where you expect them.  (default: off)
 #
+# @zoom-to-fit: Zoom guest display to fit into the host window. When
+#     turned off the host window will be resized instead. Defaults to
+#     "off". (Since 8.2)
+#
 # Since: 7.0
 ##
 { 'struct': 'DisplayCocoa',
   'data': {
       '*left-command-key': 'bool',
       '*full-grab': 'bool',
-      '*swap-opt-cmd': 'bool'
+      '*swap-opt-cmd': 'bool',
+      '*zoom-to-fit': 'bool'
   } }
 
 ##
diff --git a/qemu-options.hx b/qemu-options.hx
index e26230bac5..3eee3c33eb 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1457,9 +1457,13 @@ SRST
             (on/off; default: off)
 
         ``discard-no-unref``
-            When enabled, discards from the guest will not cause cluster
-            allocations to be relinquished. This prevents qcow2 fragmentation
-            that would be caused by such discards. Besides potential
+            When enabled, data clusters will remain preallocated when they are
+            no longer used, e.g. because they are discarded or converted to
+            zero clusters. As usual, whether the old data is discarded or kept
+            on the protocol level (i.e. in the image file) depends on the
+            setting of the pass-discard-request option. Keeping the clusters
+            preallocated prevents qcow2 fragmentation that would otherwise be
+            caused by freeing and re-allocating them later. Besides potential
             performance degradation, such fragmentation can lead to increased
             allocation of clusters past the end of the image file,
             resulting in image files whose file length can grow much larger
@@ -3982,14 +3986,22 @@ ERST
 DEF("initrd", HAS_ARG, QEMU_OPTION_initrd, \
            "-initrd file    use 'file' as initial ram disk\n", QEMU_ARCH_ALL)
 SRST
+
 ``-initrd file``
     Use file as initial ram disk.
 
 ``-initrd "file1 arg=foo,file2"``
     This syntax is only available with multiboot.
 
-    Use file1 and file2 as modules and pass arg=foo as parameter to the
-    first module.
+    Use file1 and file2 as modules and pass ``arg=foo`` as parameter to the
+    first module. Commas can be provided in module parameters by doubling
+    them on the command line to escape them:
+
+``-initrd "bzImage earlyprintk=xen,,keep root=/dev/xvda1,initrd.img"``
+    Multiboot only. Use bzImage as the first module with
+    "``earlyprintk=xen,keep root=/dev/xvda1``" as its command line,
+    and initrd.img as the second module.
+
 ERST
 
 DEF("dtb", HAS_ARG, QEMU_OPTION_dtb, \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 7ca4b77eae..e9d6d39279 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -123,6 +123,7 @@ meson_options_help() {
   printf "%s\n" '  gtk-clipboard   clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)'
   printf "%s\n" '  guest-agent     Build QEMU Guest Agent'
   printf "%s\n" '  guest-agent-msi Build MSI package for the QEMU Guest Agent'
+  printf "%s\n" '  hv-balloon      hv-balloon driver (requires Glib 2.68+ GTree API)'
   printf "%s\n" '  hvf             HVF acceleration support'
   printf "%s\n" '  iconv           Font glyph conversion support'
   printf "%s\n" '  jack            JACK sound support'
@@ -333,6 +334,8 @@ _meson_option_parse() {
     --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;;
     --enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;;
     --disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;;
+    --enable-hv-balloon) printf "%s" -Dhv_balloon=enabled ;;
+    --disable-hv-balloon) printf "%s" -Dhv_balloon=disabled ;;
     --enable-hvf) printf "%s" -Dhvf=enabled ;;
     --disable-hvf) printf "%s" -Dhvf=disabled ;;
     --iasl=*) quote_sh "-Diasl=$2" ;;
diff --git a/system/memory.c b/system/memory.c
index 4928f2525d..304fa843ea 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1921,6 +1921,19 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
     return ret;
 }
 
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr,
+                                        GList *iova_ranges,
+                                        Error **errp)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+    int ret = 0;
+
+    if (imrc->iommu_set_iova_ranges) {
+        ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp);
+    }
+    return ret;
+}
+
 int memory_region_register_iommu_notifier(MemoryRegion *mr,
                                           IOMMUNotifier *n, Error **errp)
 {
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 0cf1147074..8a20dce3c8 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -462,7 +462,7 @@ LDAPR           sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5
 # Load/store register (pointer authentication)
 
 # LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous
-%ldra_imm       22:s1 12:9 !function=times_2
+%ldra_imm       22:s1 12:9 !function=times_8
 
 LDRA            11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm
 
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 9efe00cf6c..3c3bb3431a 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -205,6 +205,11 @@ static inline int times_4(DisasContext *s, int x)
     return x * 4;
 }
 
+static inline int times_8(DisasContext *s, int x)
+{
+    return x * 8;
+}
+
 static inline int times_2_plus_1(DisasContext *s, int x)
 {
     return x * 2 + 1;
diff --git a/target/hppa/cpu-param.h b/target/hppa/cpu-param.h
index c2791ae5f2..6746869a3b 100644
--- a/target/hppa/cpu-param.h
+++ b/target/hppa/cpu-param.h
@@ -8,26 +8,16 @@
 #ifndef HPPA_CPU_PARAM_H
 #define HPPA_CPU_PARAM_H
 
-#ifdef TARGET_HPPA64
-# define TARGET_LONG_BITS             64
-# define TARGET_REGISTER_BITS         64
-# define TARGET_VIRT_ADDR_SPACE_BITS  64
-# define TARGET_PHYS_ADDR_SPACE_BITS  64
-#elif defined(CONFIG_USER_ONLY)
-# define TARGET_LONG_BITS             32
-# define TARGET_REGISTER_BITS         32
-# define TARGET_VIRT_ADDR_SPACE_BITS  32
+#define TARGET_LONG_BITS              64
+
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_ABI32)
 # define TARGET_PHYS_ADDR_SPACE_BITS  32
+# define TARGET_VIRT_ADDR_SPACE_BITS  32
 #else
-/*
- * In order to form the GVA from space:offset,
- * we need a 64-bit virtual address space.
- */
-# define TARGET_LONG_BITS             64
-# define TARGET_REGISTER_BITS         32
+# define TARGET_PHYS_ADDR_SPACE_BITS  64
 # define TARGET_VIRT_ADDR_SPACE_BITS  64
-# define TARGET_PHYS_ADDR_SPACE_BITS  32
 #endif
+
 #define TARGET_PAGE_BITS 12
 
 #endif
diff --git a/target/hppa/cpu-qom.h b/target/hppa/cpu-qom.h
index b96e0318c7..4a85ebf5e0 100644
--- a/target/hppa/cpu-qom.h
+++ b/target/hppa/cpu-qom.h
@@ -24,6 +24,7 @@
 #include "qom/object.h"
 
 #define TYPE_HPPA_CPU "hppa-cpu"
+#define TYPE_HPPA64_CPU "hppa64-cpu"
 
 OBJECT_DECLARE_CPU_TYPE(HPPACPU, HPPACPUClass, HPPA_CPU)
 
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index 1644297bf8..04de1689d7 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -77,9 +77,10 @@ static void hppa_restore_state_to_opc(CPUState *cs,
     HPPACPU *cpu = HPPA_CPU(cs);
 
     cpu->env.iaoq_f = data[0];
-    if (data[1] != (target_ureg)-1) {
+    if (data[1] != (target_ulong)-1) {
         cpu->env.iaoq_b = data[1];
     }
+    cpu->env.unwind_breg = data[2];
     /*
      * Since we were executing the instruction at IAOQ_F, and took some
      * sort of action that provoked the cpu_restore_state, we can infer
@@ -137,8 +138,10 @@ static void hppa_cpu_realizefn(DeviceState *dev, Error **errp)
 #ifndef CONFIG_USER_ONLY
     {
         HPPACPU *cpu = HPPA_CPU(cs);
+
         cpu->alarm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                         hppa_cpu_alarm_timer, cpu);
+        hppa_ptlbe(&cpu->env);
     }
 #endif
 }
@@ -156,7 +159,39 @@ static void hppa_cpu_initfn(Object *obj)
 
 static ObjectClass *hppa_cpu_class_by_name(const char *cpu_model)
 {
-    return object_class_by_name(TYPE_HPPA_CPU);
+    g_autofree char *typename = g_strconcat(cpu_model, "-cpu", NULL);
+    ObjectClass *oc = object_class_by_name(typename);
+
+    if (oc &&
+        !object_class_is_abstract(oc) &&
+        object_class_dynamic_cast(oc, TYPE_HPPA_CPU)) {
+        return oc;
+    }
+    return NULL;
+}
+
+static void hppa_cpu_list_entry(gpointer data, gpointer user_data)
+{
+    ObjectClass *oc = data;
+    CPUClass *cc = CPU_CLASS(oc);
+    const char *tname = object_class_get_name(oc);
+    g_autofree char *name = g_strndup(tname, strchr(tname, '-') - tname);
+
+    if (cc->deprecation_note) {
+        qemu_printf("  %s (deprecated)\n", name);
+    } else {
+        qemu_printf("  %s\n", name);
+    }
+}
+
+void hppa_cpu_list(void)
+{
+    GSList *list;
+
+    list = object_class_get_list_sorted(TYPE_HPPA_CPU, false);
+    qemu_printf("Available CPUs:\n");
+    g_slist_foreach(list, hppa_cpu_list_entry, NULL);
+    g_slist_free(list);
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -207,20 +242,21 @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
     cc->tcg_ops = &hppa_tcg_ops;
 }
 
-static const TypeInfo hppa_cpu_type_info = {
-    .name = TYPE_HPPA_CPU,
-    .parent = TYPE_CPU,
-    .instance_size = sizeof(HPPACPU),
-    .instance_align = __alignof(HPPACPU),
-    .instance_init = hppa_cpu_initfn,
-    .abstract = false,
-    .class_size = sizeof(HPPACPUClass),
-    .class_init = hppa_cpu_class_init,
+static const TypeInfo hppa_cpu_type_infos[] = {
+    {
+        .name = TYPE_HPPA_CPU,
+        .parent = TYPE_CPU,
+        .instance_size = sizeof(HPPACPU),
+        .instance_align = __alignof(HPPACPU),
+        .instance_init = hppa_cpu_initfn,
+        .abstract = false,
+        .class_size = sizeof(HPPACPUClass),
+        .class_init = hppa_cpu_class_init,
+    },
+    {
+        .name = TYPE_HPPA64_CPU,
+        .parent = TYPE_HPPA_CPU,
+    },
 };
 
-static void hppa_cpu_register_types(void)
-{
-    type_register_static(&hppa_cpu_type_info);
-}
-
-type_init(hppa_cpu_register_types)
+DEFINE_TYPES(hppa_cpu_type_infos)
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 798d0c26d7..144794d089 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -23,6 +23,7 @@
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
 #include "qemu/cpu-float.h"
+#include "qemu/interval-tree.h"
 
 /* PA-RISC 1.x processors have a strong memory model.  */
 /* ??? While we do not yet implement PA-RISC 2.0, those processors have
@@ -30,21 +31,33 @@
    basis.  It's probably easier to fall back to a strong memory model.  */
 #define TCG_GUEST_DEFAULT_MO        TCG_MO_ALL
 
-#define MMU_KERNEL_IDX   11
-#define MMU_PL1_IDX      12
-#define MMU_PL2_IDX      13
-#define MMU_USER_IDX     14
-#define MMU_PHYS_IDX     15
+#define MMU_KERNEL_IDX    7
+#define MMU_KERNEL_P_IDX  8
+#define MMU_PL1_IDX       9
+#define MMU_PL1_P_IDX     10
+#define MMU_PL2_IDX       11
+#define MMU_PL2_P_IDX     12
+#define MMU_USER_IDX      13
+#define MMU_USER_P_IDX    14
+#define MMU_PHYS_IDX      15
 
-#define PRIV_TO_MMU_IDX(priv)    (MMU_KERNEL_IDX + (priv))
-#define MMU_IDX_TO_PRIV(mmu_idx) ((mmu_idx) - MMU_KERNEL_IDX)
+#define MMU_IDX_TO_PRIV(MIDX)       (((MIDX) - MMU_KERNEL_IDX) / 2)
+#define MMU_IDX_TO_P(MIDX)          (((MIDX) - MMU_KERNEL_IDX) & 1)
+#define PRIV_P_TO_MMU_IDX(PRIV, P)  ((PRIV) * 2 + !!(P) + MMU_KERNEL_IDX)
 
-#define TARGET_INSN_START_EXTRA_WORDS 1
+#define TARGET_INSN_START_EXTRA_WORDS 2
 
 /* No need to flush MMU_PHYS_IDX  */
 #define HPPA_MMU_FLUSH_MASK                             \
-        (1 << MMU_KERNEL_IDX | 1 << MMU_PL1_IDX |       \
-         1 << MMU_PL2_IDX    | 1 << MMU_USER_IDX)
+        (1 << MMU_KERNEL_IDX | 1 << MMU_KERNEL_P_IDX |  \
+         1 << MMU_PL1_IDX    | 1 << MMU_PL1_P_IDX    |  \
+         1 << MMU_PL2_IDX    | 1 << MMU_PL2_P_IDX    |  \
+         1 << MMU_USER_IDX   | 1 << MMU_USER_P_IDX)
+
+/* Indicies to flush for access_id changes. */
+#define HPPA_MMU_FLUSH_P_MASK \
+        (1 << MMU_KERNEL_P_IDX | 1 << MMU_PL1_P_IDX  |  \
+         1 << MMU_PL2_P_IDX    | 1 << MMU_USER_P_IDX)
 
 /* Hardware exceptions, interrupts, faults, and traps.  */
 #define EXCP_HPMC                1  /* high priority machine check */
@@ -107,11 +120,7 @@
 #define PSW_T            0x01000000
 #define PSW_S            0x02000000
 #define PSW_E            0x04000000
-#ifdef TARGET_HPPA64
 #define PSW_W            0x08000000 /* PA2.0 only */
-#else
-#define PSW_W            0
-#endif
 #define PSW_Z            0x40000000 /* PA1.x only */
 #define PSW_Y            0x80000000 /* PA1.x only */
 
@@ -124,15 +133,12 @@
 #define PSW_SM_P         PSW_P
 #define PSW_SM_Q         PSW_Q      /* Enable Interrupt State Collection */
 #define PSW_SM_R         PSW_R      /* Enable Recover Counter Trap */
-#ifdef TARGET_HPPA64
 #define PSW_SM_E         0x100
 #define PSW_SM_W         0x200      /* PA2.0 only : Enable Wide Mode */
-#else
-#define PSW_SM_E         0
-#define PSW_SM_W         0
-#endif
 
 #define CR_RC            0
+#define CR_PSW_DEFAULT   6          /* see SeaBIOS PDC_PSW firmware call */
+#define  PDC_PSW_WIDE_BIT 2
 #define CR_PID1          8
 #define CR_PID2          9
 #define CR_PID3          12
@@ -150,45 +156,37 @@
 #define CR_IPSW          22
 #define CR_EIRR          23
 
-#if TARGET_REGISTER_BITS == 32
-typedef uint32_t target_ureg;
-typedef int32_t  target_sreg;
-#define TREG_FMT_lx   "%08"PRIx32
-#define TREG_FMT_ld   "%"PRId32
-#else
-typedef uint64_t target_ureg;
-typedef int64_t  target_sreg;
-#define TREG_FMT_lx   "%016"PRIx64
-#define TREG_FMT_ld   "%"PRId64
-#endif
+typedef struct HPPATLBEntry {
+    union {
+        IntervalTreeNode itree;
+        struct HPPATLBEntry *unused_next;
+    };
+
+    target_ulong pa;
+
+    unsigned entry_valid : 1;
 
-typedef struct {
-    uint64_t va_b;
-    uint64_t va_e;
-    target_ureg pa;
     unsigned u : 1;
     unsigned t : 1;
     unsigned d : 1;
     unsigned b : 1;
-    unsigned page_size : 4;
     unsigned ar_type : 3;
     unsigned ar_pl1 : 2;
     unsigned ar_pl2 : 2;
-    unsigned entry_valid : 1;
     unsigned access_id : 16;
-} hppa_tlb_entry;
+} HPPATLBEntry;
 
 typedef struct CPUArchState {
-    target_ureg iaoq_f;      /* front */
-    target_ureg iaoq_b;      /* back, aka next instruction */
+    target_ulong iaoq_f;     /* front */
+    target_ulong iaoq_b;     /* back, aka next instruction */
 
-    target_ureg gr[32];
+    target_ulong gr[32];
     uint64_t fr[32];
     uint64_t sr[8];          /* stored shifted into place for gva */
 
-    target_ureg psw;         /* All psw bits except the following:  */
-    target_ureg psw_n;       /* boolean */
-    target_sreg psw_v;       /* in most significant bit */
+    target_ulong psw;        /* All psw bits except the following:  */
+    target_ulong psw_n;      /* boolean */
+    target_long psw_v;       /* in most significant bit */
 
     /* Splitting the carry-borrow field into the MSB and "the rest", allows
      * for "the rest" to be deleted when it is unused, but the MSB is in use.
@@ -197,8 +195,8 @@ typedef struct CPUArchState {
      * host has the appropriate add-with-carry insn to compute the msb).
      * Therefore the carry bits are stored as: cb_msb : cb & 0x11111110.
      */
-    target_ureg psw_cb;      /* in least significant bit of next nibble */
-    target_ureg psw_cb_msb;  /* boolean */
+    target_ulong psw_cb;     /* in least significant bit of next nibble */
+    target_ulong psw_cb_msb; /* boolean */
 
     uint64_t iasq_f;
     uint64_t iasq_b;
@@ -206,24 +204,40 @@ typedef struct CPUArchState {
     uint32_t fr0_shadow;     /* flags, c, ca/cq, rm, d, enables */
     float_status fp_status;
 
-    target_ureg cr[32];      /* control registers */
-    target_ureg cr_back[2];  /* back of cr17/cr18 */
-    target_ureg shadow[7];   /* shadow registers */
+    target_ulong cr[32];     /* control registers */
+    target_ulong cr_back[2]; /* back of cr17/cr18 */
+    target_ulong shadow[7];  /* shadow registers */
 
-    /* ??? The number of entries isn't specified by the architecture.  */
-#ifdef TARGET_HPPA64
-#define HPPA_BTLB_FIXED         0       /* BTLBs are not supported in 64-bit machines */
-#else
-#define HPPA_BTLB_FIXED         16
-#endif
-#define HPPA_BTLB_VARIABLE      0
+    /*
+     * During unwind of a memory insn, the base register of the address.
+     * This is used to construct CR_IOR for pa2.0.
+     */
+    uint32_t unwind_breg;
+
+    /*
+     * ??? The number of entries isn't specified by the architecture.
+     * BTLBs are not supported in 64-bit machines.
+     */
+#define PA10_BTLB_FIXED         16
+#define PA10_BTLB_VARIABLE      0
 #define HPPA_TLB_ENTRIES        256
-#define HPPA_BTLB_ENTRIES       (HPPA_BTLB_FIXED + HPPA_BTLB_VARIABLE)
 
-    /* ??? Implement a unified itlb/dtlb for the moment.  */
-    /* ??? We should use a more intelligent data structure.  */
-    hppa_tlb_entry tlb[HPPA_TLB_ENTRIES];
+    /* Index for round-robin tlb eviction. */
     uint32_t tlb_last;
+
+    /*
+     * For pa1.x, the partial initialized, still invalid tlb entry
+     * which has had ITLBA performed, but not yet ITLBP.
+     */
+    HPPATLBEntry *tlb_partial;
+
+    /* Linked list of all invalid (unused) tlb entries. */
+    HPPATLBEntry *tlb_unused;
+
+    /* Root of the search tree for all valid tlb entries. */
+    IntervalTreeRoot tlb_root;
+
+    HPPATLBEntry tlb[HPPA_TLB_ENTRIES];
 } CPUHPPAState;
 
 /**
@@ -243,13 +257,23 @@ struct ArchCPU {
 
 #include "exec/cpu-all.h"
 
+static inline bool hppa_is_pa20(CPUHPPAState *env)
+{
+    return object_dynamic_cast(OBJECT(env_cpu(env)), TYPE_HPPA64_CPU) != NULL;
+}
+
+static inline int HPPA_BTLB_ENTRIES(CPUHPPAState *env)
+{
+    return hppa_is_pa20(env) ? 0 : PA10_BTLB_FIXED + PA10_BTLB_VARIABLE;
+}
+
 static inline int cpu_mmu_index(CPUHPPAState *env, bool ifetch)
 {
 #ifdef CONFIG_USER_ONLY
     return MMU_USER_IDX;
 #else
     if (env->psw & (ifetch ? PSW_C : PSW_D)) {
-        return PRIV_TO_MMU_IDX(env->iaoq_f & 3);
+        return PRIV_P_TO_MMU_IDX(env->iaoq_f & 3, env->psw & PSW_P);
     }
     return MMU_PHYS_IDX;  /* mmu disabled */
 #endif
@@ -259,23 +283,26 @@ void hppa_translate_init(void);
 
 #define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
 
-static inline target_ulong hppa_form_gva_psw(target_ureg psw, uint64_t spc,
-                                             target_ureg off)
+static inline target_ulong hppa_form_gva_psw(target_ulong psw, uint64_t spc,
+                                             target_ulong off)
 {
 #ifdef CONFIG_USER_ONLY
     return off;
 #else
-    off &= (psw & PSW_W ? 0x3fffffffffffffffull : 0xffffffffull);
+    off &= psw & PSW_W ? MAKE_64BIT_MASK(0, 62) : MAKE_64BIT_MASK(0, 32);
     return spc | off;
 #endif
 }
 
 static inline target_ulong hppa_form_gva(CPUHPPAState *env, uint64_t spc,
-                                         target_ureg off)
+                                         target_ulong off)
 {
     return hppa_form_gva_psw(env->psw, spc, off);
 }
 
+hwaddr hppa_abs_to_phys_pa2_w0(vaddr addr);
+hwaddr hppa_abs_to_phys_pa2_w1(vaddr addr);
+
 /*
  * Since PSW_{I,CB} will never need to be in tb->flags, reuse them.
  * TB_FLAG_SR_SAME indicates that SR4 through SR7 all contain the
@@ -299,13 +326,12 @@ static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc,
     *cs_base = env->iaoq_b & -4;
     flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
 #else
-    /* ??? E, T, H, L, B, P bits need to be here, when implemented.  */
-    flags |= env->psw & (PSW_W | PSW_C | PSW_D);
+    /* ??? E, T, H, L, B bits need to be here, when implemented.  */
+    flags |= env->psw & (PSW_W | PSW_C | PSW_D | PSW_P);
     flags |= (env->iaoq_f & 3) << TB_FLAG_PRIV_SHIFT;
 
-    *pc = (env->psw & PSW_C
-           ? hppa_form_gva_psw(env->psw, env->iasq_f, env->iaoq_f & -4)
-           : env->iaoq_f & -4);
+    *pc = hppa_form_gva_psw(env->psw, (env->psw & PSW_C ? env->iasq_f : 0),
+                            env->iaoq_f & -4);
     *cs_base = env->iasq_f;
 
     /* Insert a difference between IAOQ_B and IAOQ_F within the otherwise zero
@@ -313,8 +339,8 @@ static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc,
        which is the primary case we care about -- using goto_tb within a page.
        Failure is indicated by a zero difference.  */
     if (env->iasq_f == env->iasq_b) {
-        target_sreg diff = env->iaoq_b - env->iaoq_f;
-        if (TARGET_REGISTER_BITS == 32 || diff == (int32_t)diff) {
+        target_long diff = env->iaoq_b - env->iaoq_f;
+        if (diff == (int32_t)diff) {
             *cs_base |= (uint32_t)diff;
         }
     }
@@ -328,8 +354,8 @@ static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc,
     *pflags = flags;
 }
 
-target_ureg cpu_hppa_get_psw(CPUHPPAState *env);
-void cpu_hppa_put_psw(CPUHPPAState *env, target_ureg);
+target_ulong cpu_hppa_get_psw(CPUHPPAState *env);
+void cpu_hppa_put_psw(CPUHPPAState *env, target_ulong);
 void cpu_hppa_loaded_fr0(CPUHPPAState *env);
 
 #ifdef CONFIG_USER_ONLY
@@ -342,6 +368,7 @@ int hppa_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int hppa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void hppa_cpu_dump_state(CPUState *cs, FILE *f, int);
 #ifndef CONFIG_USER_ONLY
+void hppa_ptlbe(CPUHPPAState *env);
 hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr);
 bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                        MMUAccessType access_type, int mmu_idx,
@@ -350,7 +377,7 @@ void hppa_cpu_do_interrupt(CPUState *cpu);
 bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req);
 int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
                               int type, hwaddr *pphys, int *pprot,
-                              hppa_tlb_entry **tlb_entry);
+                              HPPATLBEntry **tlb_entry);
 extern const MemoryRegionOps hppa_io_eir_ops;
 extern const VMStateDescription vmstate_hppa_cpu;
 void hppa_cpu_alarm_timer(void *);
@@ -358,4 +385,9 @@ int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr);
 #endif
 G_NORETURN void hppa_dynamic_excp(CPUHPPAState *env, int excp, uintptr_t ra);
 
+#define CPU_RESOLVING_TYPE TYPE_HPPA_CPU
+
+#define cpu_list hppa_cpu_list
+void hppa_cpu_list(void);
+
 #endif /* HPPA_CPU_H */
diff --git a/target/hppa/gdbstub.c b/target/hppa/gdbstub.c
index 48a514384f..4a965b38d7 100644
--- a/target/hppa/gdbstub.c
+++ b/target/hppa/gdbstub.c
@@ -21,11 +21,16 @@
 #include "cpu.h"
 #include "gdbstub/helpers.h"
 
+/*
+ * GDB 15 only supports PA1.0 via the remote protocol, and ignores
+ * any provided xml.  Which means that any attempt to provide more
+ * data results in "Remote 'g' packet reply is too long".
+ */
+
 int hppa_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
 {
-    HPPACPU *cpu = HPPA_CPU(cs);
-    CPUHPPAState *env = &cpu->env;
-    target_ureg val;
+    CPUHPPAState *env = cpu_env(cs);
+    uint32_t val;
 
     switch (n) {
     case 0:
@@ -139,24 +144,13 @@ int hppa_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
         break;
     }
 
-    if (TARGET_REGISTER_BITS == 64) {
-        return gdb_get_reg64(mem_buf, val);
-    } else {
-        return gdb_get_reg32(mem_buf, val);
-    }
+    return gdb_get_reg32(mem_buf, val);
 }
 
 int hppa_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
-    HPPACPU *cpu = HPPA_CPU(cs);
-    CPUHPPAState *env = &cpu->env;
-    target_ureg val;
-
-    if (TARGET_REGISTER_BITS == 64) {
-        val = ldq_p(mem_buf);
-    } else {
-        val = ldl_p(mem_buf);
-    }
+    CPUHPPAState *env = cpu_env(cs);
+    uint32_t val = ldl_p(mem_buf);
 
     switch (n) {
     case 0:
@@ -166,7 +160,7 @@ int hppa_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
         env->gr[n] = val;
         break;
     case 32:
-        env->cr[CR_SAR] = val;
+        env->cr[CR_SAR] = val & (hppa_is_pa20(env) ? 63 : 31);
         break;
     case 33:
         env->iaoq_f = val;
@@ -278,5 +272,5 @@ int hppa_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
         }
         break;
     }
-    return sizeof(target_ureg);
+    return 4;
 }
diff --git a/target/hppa/helper.c b/target/hppa/helper.c
index a8d3f456ee..859644c47a 100644
--- a/target/hppa/helper.c
+++ b/target/hppa/helper.c
@@ -25,22 +25,32 @@
 #include "exec/helper-proto.h"
 #include "qemu/qemu-print.h"
 
-target_ureg cpu_hppa_get_psw(CPUHPPAState *env)
+target_ulong cpu_hppa_get_psw(CPUHPPAState *env)
 {
-    target_ureg psw;
+    target_ulong psw;
+    target_ulong mask1 = (target_ulong)-1 / 0xf;
+    target_ulong maskf = (target_ulong)-1 / 0xffff * 0xf;
 
     /* Fold carry bits down to 8 consecutive bits.  */
-    /* ??? Needs tweaking for hppa64.  */
-    /* .......b...c...d...e...f...g...h */
-    psw = (env->psw_cb >> 4) & 0x01111111;
-    /* .......b..bc..cd..de..ef..fg..gh */
+    /* ^^^b^^^c^^^d^^^e^^^f^^^g^^^h^^^i^^^j^^^k^^^l^^^m^^^n^^^o^^^p^^^^ */
+    psw = (env->psw_cb >> 4) & mask1;
+    /* .......b...c...d...e...f...g...h...i...j...k...l...m...n...o...p */
     psw |= psw >> 3;
-    /* .............bcd............efgh */
-    psw |= (psw >> 6) & 0x000f000f;
-    /* .........................bcdefgh */
-    psw |= (psw >> 12) & 0xf;
-    psw |= env->psw_cb_msb << 7;
-    psw = (psw & 0xff) << 8;
+    /* .......b..bc..cd..de..ef..fg..gh..hi..ij..jk..kl..lm..mn..no..op */
+    psw |= psw >> 6;
+    psw &= maskf;
+    /* .............bcd............efgh............ijkl............mnop */
+    psw |= psw >> 12;
+    /* .............bcd.........bcdefgh........efghijkl........ijklmnop */
+    psw |= env->psw_cb_msb << 39;
+    /* .............bcd........abcdefgh........efghijkl........ijklmnop */
+
+    /* For hppa64, the two 8-bit fields are discontiguous. */
+    if (hppa_is_pa20(env)) {
+        psw = (psw & 0xff00000000ull) | ((psw & 0xff) << 8);
+    } else {
+        psw = (psw & 0xff) << 8;
+    }
 
     psw |= env->psw_n * PSW_N;
     psw |= (env->psw_v < 0) * PSW_V;
@@ -49,16 +59,36 @@ target_ureg cpu_hppa_get_psw(CPUHPPAState *env)
     return psw;
 }
 
-void cpu_hppa_put_psw(CPUHPPAState *env, target_ureg psw)
+void cpu_hppa_put_psw(CPUHPPAState *env, target_ulong psw)
 {
-    target_ureg old_psw = env->psw;
-    target_ureg cb = 0;
+    uint64_t reserved;
+    target_ulong cb = 0;
+
+    /* Do not allow reserved bits to be set. */
+    if (hppa_is_pa20(env)) {
+        reserved = MAKE_64BIT_MASK(40, 24) | MAKE_64BIT_MASK(28, 4);
+        reserved |= PSW_G;                  /* PA1.x only */
+        reserved |= PSW_E;                  /* not implemented */
+    } else {
+        reserved = MAKE_64BIT_MASK(32, 32) | MAKE_64BIT_MASK(28, 2);
+        reserved |= PSW_O | PSW_W;          /* PA2.0 only */
+        reserved |= PSW_E | PSW_Y | PSW_Z;  /* not implemented */
+    }
+    psw &= ~reserved;
 
     env->psw = psw & ~(PSW_N | PSW_V | PSW_CB);
     env->psw_n = (psw / PSW_N) & 1;
     env->psw_v = -((psw / PSW_V) & 1);
-    env->psw_cb_msb = (psw >> 15) & 1;
 
+    env->psw_cb_msb = (psw >> 39) & 1;
+    cb |= ((psw >> 38) & 1) << 60;
+    cb |= ((psw >> 37) & 1) << 56;
+    cb |= ((psw >> 36) & 1) << 52;
+    cb |= ((psw >> 35) & 1) << 48;
+    cb |= ((psw >> 34) & 1) << 44;
+    cb |= ((psw >> 33) & 1) << 40;
+    cb |= ((psw >> 32) & 1) << 36;
+    cb |= ((psw >> 15) & 1) << 32;
     cb |= ((psw >> 14) & 1) << 28;
     cb |= ((psw >> 13) & 1) << 24;
     cb |= ((psw >> 12) & 1) << 20;
@@ -67,29 +97,30 @@ void cpu_hppa_put_psw(CPUHPPAState *env, target_ureg psw)
     cb |= ((psw >>  9) & 1) <<  8;
     cb |= ((psw >>  8) & 1) <<  4;
     env->psw_cb = cb;
-
-    /* If PSW_P changes, it affects how we translate addresses.  */
-    if ((psw ^ old_psw) & PSW_P) {
-#ifndef CONFIG_USER_ONLY
-        tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK);
-#endif
-    }
 }
 
 void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
-    HPPACPU *cpu = HPPA_CPU(cs);
-    CPUHPPAState *env = &cpu->env;
-    target_ureg psw = cpu_hppa_get_psw(env);
-    target_ureg psw_cb;
+    CPUHPPAState *env = cpu_env(cs);
+    target_ulong psw = cpu_hppa_get_psw(env);
+    target_ulong psw_cb;
     char psw_c[20];
-    int i;
+    int i, w;
+    uint64_t m;
+
+    if (hppa_is_pa20(env)) {
+        w = 16;
+        m = UINT64_MAX;
+    } else {
+        w = 8;
+        m = UINT32_MAX;
+    }
 
     qemu_fprintf(f, "IA_F " TARGET_FMT_lx " IA_B " TARGET_FMT_lx
-                 " IIR " TREG_FMT_lx  "\n",
+                 " IIR %0*" PRIx64 "\n",
                  hppa_form_gva_psw(psw, env->iasq_f, env->iaoq_f),
                  hppa_form_gva_psw(psw, env->iasq_b, env->iaoq_b),
-                 env->cr[CR_IIR]);
+                 w, m & env->cr[CR_IIR]);
 
     psw_c[0]  = (psw & PSW_W ? 'W' : '-');
     psw_c[1]  = (psw & PSW_E ? 'E' : '-');
@@ -110,13 +141,15 @@ void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     psw_c[16] = (psw & PSW_D ? 'D' : '-');
     psw_c[17] = (psw & PSW_I ? 'I' : '-');
     psw_c[18] = '\0';
-    psw_cb = ((env->psw_cb >> 4) & 0x01111111) | (env->psw_cb_msb << 28);
+    psw_cb = ((env->psw_cb >> 4) & 0x1111111111111111ull)
+           | (env->psw_cb_msb << 60);
 
-    qemu_fprintf(f, "PSW  " TREG_FMT_lx " CB   " TREG_FMT_lx " %s\n",
-                 psw, psw_cb, psw_c);
+    qemu_fprintf(f, "PSW  %0*" PRIx64 " CB   %0*" PRIx64 " %s\n",
+                 w, m & psw, w, m & psw_cb, psw_c);
 
     for (i = 0; i < 32; i++) {
-        qemu_fprintf(f, "GR%02d " TREG_FMT_lx "%c", i, env->gr[i],
+        qemu_fprintf(f, "GR%02d %0*" PRIx64 "%c",
+                     i, w, m & env->gr[i],
                      (i & 3) == 3 ? '\n' : ' ');
     }
 #ifndef CONFIG_USER_ONLY
diff --git a/target/hppa/helper.h b/target/hppa/helper.h
index 647f043c85..20698f68ed 100644
--- a/target/hppa/helper.h
+++ b/target/hppa/helper.h
@@ -1,24 +1,28 @@
-#if TARGET_REGISTER_BITS == 64
-# define dh_alias_tr     i64
-# define dh_typecode_tr  dh_typecode_i64
-#else
-# define dh_alias_tr     i32
-# define dh_typecode_tr  dh_typecode_i32
-#endif
-#define dh_ctype_tr      target_ureg
-
 DEF_HELPER_2(excp, noreturn, env, int)
-DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tr)
-DEF_HELPER_FLAGS_2(tcond, TCG_CALL_NO_WG, void, env, tr)
+DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_2(tcond, TCG_CALL_NO_WG, void, env, tl)
 
-DEF_HELPER_FLAGS_3(stby_b, TCG_CALL_NO_WG, void, env, tl, tr)
-DEF_HELPER_FLAGS_3(stby_b_parallel, TCG_CALL_NO_WG, void, env, tl, tr)
-DEF_HELPER_FLAGS_3(stby_e, TCG_CALL_NO_WG, void, env, tl, tr)
-DEF_HELPER_FLAGS_3(stby_e_parallel, TCG_CALL_NO_WG, void, env, tl, tr)
+DEF_HELPER_FLAGS_3(stby_b, TCG_CALL_NO_WG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(stby_b_parallel, TCG_CALL_NO_WG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(stby_e, TCG_CALL_NO_WG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(stby_e_parallel, TCG_CALL_NO_WG, void, env, tl, tl)
+
+DEF_HELPER_FLAGS_3(stdby_b, TCG_CALL_NO_WG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(stdby_b_parallel, TCG_CALL_NO_WG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(stdby_e, TCG_CALL_NO_WG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(stdby_e_parallel, TCG_CALL_NO_WG, void, env, tl, tl)
 
 DEF_HELPER_FLAGS_1(ldc_check, TCG_CALL_NO_RWG, void, tl)
 
-DEF_HELPER_FLAGS_4(probe, TCG_CALL_NO_WG, tr, env, tl, i32, i32)
+DEF_HELPER_FLAGS_2(hadd_ss, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(hadd_us, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(havg, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(hshladd, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
+DEF_HELPER_FLAGS_3(hshradd, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
+DEF_HELPER_FLAGS_2(hsub_ss, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(hsub_us, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_4(probe, TCG_CALL_NO_WG, tl, env, tl, i32, i32)
 
 DEF_HELPER_FLAGS_1(loaded_fr0, TCG_CALL_NO_RWG, void, env)
 
@@ -77,7 +81,7 @@ DEF_HELPER_FLAGS_4(fmpynfadd_s, TCG_CALL_NO_RWG, i32, env, i32, i32, i32)
 DEF_HELPER_FLAGS_4(fmpyfadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(fmpynfadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64, i64)
 
-DEF_HELPER_FLAGS_0(read_interval_timer, TCG_CALL_NO_RWG, tr)
+DEF_HELPER_FLAGS_0(read_interval_timer, TCG_CALL_NO_RWG, tl)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_1(halt, noreturn, env)
@@ -85,15 +89,18 @@ DEF_HELPER_1(reset, noreturn, env)
 DEF_HELPER_1(getshadowregs, void, env)
 DEF_HELPER_1(rfi, void, env)
 DEF_HELPER_1(rfi_r, void, env)
-DEF_HELPER_FLAGS_2(write_interval_timer, TCG_CALL_NO_RWG, void, env, tr)
-DEF_HELPER_FLAGS_2(write_eirr, TCG_CALL_NO_RWG, void, env, tr)
-DEF_HELPER_FLAGS_2(write_eiem, TCG_CALL_NO_RWG, void, env, tr)
-DEF_HELPER_FLAGS_2(swap_system_mask, TCG_CALL_NO_RWG, tr, env, tr)
-DEF_HELPER_FLAGS_3(itlba, TCG_CALL_NO_RWG, void, env, tl, tr)
-DEF_HELPER_FLAGS_3(itlbp, TCG_CALL_NO_RWG, void, env, tl, tr)
+DEF_HELPER_FLAGS_2(write_interval_timer, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(write_eirr, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(write_eiem, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(swap_system_mask, TCG_CALL_NO_RWG, tl, env, tl)
+DEF_HELPER_FLAGS_3(itlba_pa11, TCG_CALL_NO_RWG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(itlbp_pa11, TCG_CALL_NO_RWG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(idtlbt_pa20, TCG_CALL_NO_RWG, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(iitlbt_pa20, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_FLAGS_2(ptlb, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(ptlb_l, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_1(ptlbe, TCG_CALL_NO_RWG, void, env)
-DEF_HELPER_FLAGS_2(lpa, TCG_CALL_NO_WG, tr, env, tl)
+DEF_HELPER_FLAGS_2(lpa, TCG_CALL_NO_WG, tl, env, tl)
 DEF_HELPER_FLAGS_1(change_prot_id, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_1(diag_btlb, void, env)
 #endif
diff --git a/target/hppa/insns.decode b/target/hppa/insns.decode
index aebe03ccfd..f5a3f02fd1 100644
--- a/target/hppa/insns.decode
+++ b/target/hppa/insns.decode
@@ -46,11 +46,16 @@
 
 %im5_0          0:s1 1:4
 %im5_16         16:s1 17:4
+%len5           0:5      !function=assemble_6
+%len6_8         8:1 0:5  !function=assemble_6
+%len6_12        12:1 0:5 !function=assemble_6
+%cpos6_11       11:1 5:5
 %ma_to_m        5:1 13:1 !function=ma_to_m
 %ma2_to_m       2:2      !function=ma_to_m
 %pos_to_m       0:1      !function=pos_to_m
 %neg_to_m       0:1      !function=neg_to_m
 %a_to_m         2:1      !function=neg_to_m
+%cmpbid_c       13:2     !function=cmpbid_c
 
 ####
 # Argument set definitions
@@ -59,28 +64,43 @@
 # All insns that need to form a virtual address should use this set.
 &ldst           t b x disp sp m scale size
 
-&rr_cf          t r cf
+&rr_cf_d        t r cf d
+&rrr            t r1 r2
 &rrr_cf         t r1 r2 cf
-&rrr_cf_sh      t r1 r2 cf sh
+&rrr_cf_d       t r1 r2 cf d
+&rrr_sh         t r1 r2 sh
+&rrr_cf_d_sh    t r1 r2 cf d sh
+&rri            t r i
 &rri_cf         t r i cf
+&rri_cf_d       t r i cf d
 
 &rrb_c_f        disp n c f r1 r2
+&rrb_c_d_f      disp n c d f r1 r2
 &rib_c_f        disp n c f r i
+&rib_c_d_f      disp n c d f r i
 
 ####
 # Format definitions
 ####
 
-@rr_cf          ...... r:5 ..... cf:4 ....... t:5       &rr_cf
+@rr_cf_d        ...... r:5 ..... cf:4 ...... d:1 t:5    &rr_cf_d
+@rrr            ...... r2:5 r1:5 .... ....... t:5       &rrr
 @rrr_cf         ...... r2:5 r1:5 cf:4 ....... t:5       &rrr_cf
-@rrr_cf_sh      ...... r2:5 r1:5 cf:4 .... sh:2 . t:5   &rrr_cf_sh
-@rrr_cf_sh0     ...... r2:5 r1:5 cf:4 ....... t:5       &rrr_cf_sh sh=0
+@rrr_cf_d       ...... r2:5 r1:5 cf:4 ...... d:1 t:5    &rrr_cf_d
+@rrr_sh         ...... r2:5 r1:5 ........ sh:2 . t:5    &rrr_sh
+@rrr_cf_d_sh    ...... r2:5 r1:5 cf:4 .... sh:2 d:1 t:5 &rrr_cf_d_sh
+@rrr_cf_d_sh0   ...... r2:5 r1:5 cf:4 ...... d:1 t:5    &rrr_cf_d_sh sh=0
 @rri_cf         ...... r:5  t:5  cf:4 . ...........     &rri_cf i=%lowsign_11
+@rri_cf_d       ...... r:5  t:5  cf:4 d:1 ...........   &rri_cf_d i=%lowsign_11
 
 @rrb_cf         ...... r2:5 r1:5 c:3 ........... n:1 .  \
                 &rrb_c_f disp=%assemble_12
+@rrb_cdf        ...... r2:5 r1:5 c:3 ........... n:1 .  \
+                &rrb_c_d_f disp=%assemble_12
 @rib_cf         ...... r:5 ..... c:3 ........... n:1 .  \
                 &rib_c_f disp=%assemble_12 i=%im5_16
+@rib_cdf        ...... r:5 ..... c:3 ........... n:1 .  \
+                &rib_c_d_f disp=%assemble_12 i=%im5_16
 
 ####
 # System
@@ -130,6 +150,7 @@ nop_addrx       000001 ..... ..... -- 01001110 . 00000  @addrx # pdc
 
 probe           000001 b:5 ri:5 sp:2 imm:1 100011 write:1 0 t:5
 
+# pa1.x tlb insert instructions
 ixtlbx          000001 b:5 r:5 sp:2 0100000 addr:1 0 00000      data=1
 ixtlbx          000001 b:5 r:5 ... 000000 addr:1 0 00000        \
                 sp=%assemble_sr3x data=0
@@ -137,9 +158,26 @@ ixtlbx          000001 b:5 r:5 ... 000000 addr:1 0 00000        \
 # pcxl and pcxl2 Fast TLB Insert instructions
 ixtlbxf         000001 00000 r:5 00 0 data:1 01000 addr:1 0 00000
 
-pxtlbx          000001 b:5 x:5 sp:2 0100100 local:1 m:1 -----   data=1
-pxtlbx          000001 b:5 x:5 ... 000100 local:1 m:1 -----     \
-                sp=%assemble_sr3x data=0
+# pa2.0 tlb insert idtlbt and iitlbt instructions
+ixtlbt          000001 r2:5 r1:5 000 data:1 100000 0 00000    # idtlbt
+
+# pdtlb, pitlb
+pxtlb           000001 b:5 x:5 sp:2 01001000 m:1 ----- \
+                &ldst disp=0 scale=0 size=0 t=0
+pxtlb           000001 b:5 x:5 ...   0001000 m:1 ----- \
+                &ldst disp=0 scale=0 size=0 t=0 sp=%assemble_sr3x
+
+# ... pa20 local
+pxtlb_l         000001 b:5 x:5 sp:2 01011000 m:1 ----- \
+                &ldst disp=0 scale=0 size=0 t=0
+pxtlb_l         000001 b:5 x:5 ...   0011000 m:1 ----- \
+                &ldst disp=0 scale=0 size=0 t=0 sp=%assemble_sr3x
+
+# pdtlbe, pitlbe
+pxtlbe          000001 b:5 x:5 sp:2 01001001 m:1 ----- \
+                &ldst disp=0 scale=0 size=0 t=0
+pxtlbe          000001 b:5 x:5 ...   0001001 m:1 ----- \
+                &ldst disp=0 scale=0 size=0 t=0 sp=%assemble_sr3x
 
 lpa             000001 b:5 x:5 sp:2 01001101 m:1 t:5    \
                 &ldst disp=0 scale=0 size=0
@@ -150,30 +188,36 @@ lci             000001 ----- ----- -- 01001100 0 t:5
 # Arith/Log
 ####
 
-andcm           000010 ..... ..... .... 000000 - .....  @rrr_cf
-and             000010 ..... ..... .... 001000 - .....  @rrr_cf
-or              000010 ..... ..... .... 001001 - .....  @rrr_cf
-xor             000010 ..... ..... .... 001010 0 .....  @rrr_cf
-uxor            000010 ..... ..... .... 001110 0 .....  @rrr_cf
+andcm           000010 ..... ..... .... 000000 . .....  @rrr_cf_d
+and             000010 ..... ..... .... 001000 . .....  @rrr_cf_d
+or              000010 ..... ..... .... 001001 . .....  @rrr_cf_d
+xor             000010 ..... ..... .... 001010 . .....  @rrr_cf_d
+uxor            000010 ..... ..... .... 001110 . .....  @rrr_cf_d
 ds              000010 ..... ..... .... 010001 0 .....  @rrr_cf
-cmpclr          000010 ..... ..... .... 100010 0 .....  @rrr_cf
-uaddcm          000010 ..... ..... .... 100110 0 .....  @rrr_cf
-uaddcm_tc       000010 ..... ..... .... 100111 0 .....  @rrr_cf
-dcor            000010 ..... 00000 .... 101110 0 .....  @rr_cf
-dcor_i          000010 ..... 00000 .... 101111 0 .....  @rr_cf
-
-add             000010 ..... ..... .... 0110.. - .....  @rrr_cf_sh
-add_l           000010 ..... ..... .... 1010.. 0 .....  @rrr_cf_sh
-add_tsv         000010 ..... ..... .... 1110.. 0 .....  @rrr_cf_sh
-add_c           000010 ..... ..... .... 011100 0 .....  @rrr_cf_sh0
-add_c_tsv       000010 ..... ..... .... 111100 0 .....  @rrr_cf_sh0
-
-sub             000010 ..... ..... .... 010000 - .....  @rrr_cf
-sub_tsv         000010 ..... ..... .... 110000 0 .....  @rrr_cf
-sub_tc          000010 ..... ..... .... 010011 0 .....  @rrr_cf
-sub_tsv_tc      000010 ..... ..... .... 110011 0 .....  @rrr_cf
-sub_b           000010 ..... ..... .... 010100 0 .....  @rrr_cf
-sub_b_tsv       000010 ..... ..... .... 110100 0 .....  @rrr_cf
+cmpclr          000010 ..... ..... .... 100010 . .....  @rrr_cf_d
+uaddcm          000010 ..... ..... .... 100110 . .....  @rrr_cf_d
+uaddcm_tc       000010 ..... ..... .... 100111 . .....  @rrr_cf_d
+dcor            000010 ..... 00000 .... 101110 . .....  @rr_cf_d
+dcor_i          000010 ..... 00000 .... 101111 . .....  @rr_cf_d
+
+add             000010 ..... ..... .... 0110.. . .....  @rrr_cf_d_sh
+add_l           000010 ..... ..... .... 1010.. . .....  @rrr_cf_d_sh
+add_tsv         000010 ..... ..... .... 1110.. . .....  @rrr_cf_d_sh
+{
+  add_c         000010 ..... ..... .... 011100 . .....  @rrr_cf_d_sh0
+  hshladd       000010 ..... ..... 0000 0111.. 0 .....  @rrr_sh
+}
+add_c_tsv       000010 ..... ..... .... 111100 . .....  @rrr_cf_d_sh0
+
+sub             000010 ..... ..... .... 010000 . .....  @rrr_cf_d
+sub_tsv         000010 ..... ..... .... 110000 . .....  @rrr_cf_d
+sub_tc          000010 ..... ..... .... 010011 . .....  @rrr_cf_d
+sub_tsv_tc      000010 ..... ..... .... 110011 . .....  @rrr_cf_d
+{
+  sub_b         000010 ..... ..... .... 010100 . .....  @rrr_cf_d
+  hshradd       000010 ..... ..... 0000 0101.. 0 .....  @rrr_sh
+}
+sub_b_tsv       000010 ..... ..... .... 110100 . .....  @rrr_cf_d
 
 ldil            001000 t:5 .....................        i=%assemble_21
 addil           001010 r:5 .....................        i=%assemble_21
@@ -187,7 +231,28 @@ addi_tc_tsv     101100 ..... ..... .... 1 ...........   @rri_cf
 subi            100101 ..... ..... .... 0 ...........   @rri_cf
 subi_tsv        100101 ..... ..... .... 1 ...........   @rri_cf
 
-cmpiclr         100100 ..... ..... .... 0 ...........   @rri_cf
+cmpiclr         100100 ..... ..... .... . ...........   @rri_cf_d
+
+hadd            000010 ..... ..... 00000011 11 0 .....  @rrr
+hadd_ss         000010 ..... ..... 00000011 01 0 .....  @rrr
+hadd_us         000010 ..... ..... 00000011 00 0 .....  @rrr
+
+havg            000010 ..... ..... 00000010 11 0 .....  @rrr
+
+hshl            111110 00000 r:5   100010 i:4  0 t:5    &rri
+hshr_s          111110 r:5   00000 110011 i:4  0 t:5    &rri
+hshr_u          111110 r:5   00000 110010 i:4  0 t:5    &rri
+
+hsub            000010 ..... ..... 00000001 11 0 .....  @rrr
+hsub_ss         000010 ..... ..... 00000001 01 0 .....  @rrr
+hsub_us         000010 ..... ..... 00000001 00 0 .....  @rrr
+
+mixh_l          111110 ..... ..... 1 00 00100000 .....  @rrr
+mixh_r          111110 ..... ..... 1 10 00100000 .....  @rrr
+mixw_l          111110 ..... ..... 1 00 00000000 .....  @rrr
+mixw_r          111110 ..... ..... 1 10 00000000 .....  @rrr
+
+permh           111110 r1:5  r2:5  0 c0:2 0 c1:2 c2:2 c3:2 0 t:5
 
 ####
 # Index Mem
@@ -204,10 +269,16 @@ ld              000011 ..... ..... .. . 0 -- 00 size:2 ......   @ldstx
 st              000011 ..... ..... .. . 1 -- 10 size:2 ......   @stim5
 ldc             000011 ..... ..... .. . 1 -- 0111      ......   @ldim5 size=2
 ldc             000011 ..... ..... .. . 0 -- 0111      ......   @ldstx size=2
+ldc             000011 ..... ..... .. . 1 -- 0101      ......   @ldim5 size=3
+ldc             000011 ..... ..... .. . 0 -- 0101      ......   @ldstx size=3
 lda             000011 ..... ..... .. . 1 -- 0110      ......   @ldim5 size=2
 lda             000011 ..... ..... .. . 0 -- 0110      ......   @ldstx size=2
+lda             000011 ..... ..... .. . 1 -- 0100      ......   @ldim5 size=3
+lda             000011 ..... ..... .. . 0 -- 0100      ......   @ldstx size=3
 sta             000011 ..... ..... .. . 1 -- 1110      ......   @stim5 size=2
+sta             000011 ..... ..... .. . 1 -- 1111      ......   @stim5 size=3
 stby            000011 b:5 r:5 sp:2 a:1 1 -- 1100 m:1   .....   disp=%im5_0
+stdby           000011 b:5 r:5 sp:2 a:1 1 -- 1101 m:1   .....   disp=%im5_0
 
 @fldstwx        ...... b:5 x:5   sp:2 scale:1 ....... m:1 ..... \
                 &ldst t=%rt64 disp=0 size=2
@@ -233,6 +304,8 @@ fstd            001011 ..... ..... .. . 1 -- 100 0 . .....      @fldstdi
 # Offset Mem
 ####
 
+@ldstim11       ...... b:5 t:5 sp:2 ..............      \
+                &ldst disp=%assemble_11a m=%ma2_to_m x=0 scale=0 size=3
 @ldstim14       ...... b:5 t:5 sp:2 ..............      \
                 &ldst disp=%lowsign_14 x=0 scale=0 m=0
 @ldstim14m      ...... b:5 t:5 sp:2 ..............      \
@@ -264,11 +337,11 @@ fstw            011110 b:5 ..... sp:2 ..............    \
 fstw            011111 b:5 ..... sp:2 ...........0..    \
                 &ldst disp=%assemble_12a t=%rm64 m=0 x=0 scale=0 size=2
 
-fldd            010100 b:5 t:5   sp:2 .......... .. 1 . \
-                &ldst disp=%assemble_11a m=%ma2_to_m x=0 scale=0 size=3
+ld              010100 ..... ..... .. ............0.    @ldstim11
+fldd            010100 ..... ..... .. ............1.    @ldstim11
 
-fstd            011100 b:5 t:5   sp:2 .......... .. 1 . \
-                &ldst disp=%assemble_11a m=%ma2_to_m x=0 scale=0 size=3
+st              011100 ..... ..... .. ............0.    @ldstim11
+fstd            011100 ..... ..... .. ............1.    @ldstim11
 
 ####
 # Floating-point Multiply Add
@@ -286,16 +359,20 @@ fmpysub_d       100110 ..... ..... ..... ..... 1 .....  @mpyadd
 # Conditional Branches
 ####
 
-bb_sar          110000 00000 r:5 c:1 10 ........... n:1 .  disp=%assemble_12
-bb_imm          110001 p:5   r:5 c:1 10 ........... n:1 .  disp=%assemble_12
+bb_sar          110000 00000 r:5 c:1 1 d:1 ........... n:1 . disp=%assemble_12
+bb_imm          110001 p:5   r:5 c:1 1 d:1 ........... n:1 . disp=%assemble_12
 
 movb            110010 ..... ..... ... ........... . .  @rrb_cf f=0
 movbi           110011 ..... ..... ... ........... . .  @rib_cf f=0
 
-cmpb            100000 ..... ..... ... ........... . .  @rrb_cf f=0
-cmpb            100010 ..... ..... ... ........... . .  @rrb_cf f=1
-cmpbi           100001 ..... ..... ... ........... . .  @rib_cf f=0
-cmpbi           100011 ..... ..... ... ........... . .  @rib_cf f=1
+cmpb            100000 ..... ..... ... ........... . .  @rrb_cdf d=0 f=0
+cmpb            100010 ..... ..... ... ........... . .  @rrb_cdf d=0 f=1
+cmpb            100111 ..... ..... ... ........... . .  @rrb_cdf d=1 f=0
+cmpb            101111 ..... ..... ... ........... . .  @rrb_cdf d=1 f=1
+cmpbi           100001 ..... ..... ... ........... . .  @rib_cdf d=0 f=0
+cmpbi           100011 ..... ..... ... ........... . .  @rib_cdf d=0 f=1
+cmpbi           111011 r:5 ..... f:1 .. ........... n:1 . \
+                &rib_c_d_f d=1 disp=%assemble_12 c=%cmpbid_c i=%im5_16
 
 addb            101000 ..... ..... ... ........... . .  @rrb_cf f=0
 addb            101010 ..... ..... ... ........... . .  @rrb_cf f=1
@@ -306,16 +383,28 @@ addbi           101011 ..... ..... ... ........... . .  @rib_cf f=1
 # Shift, Extract, Deposit
 ####
 
-shrpw_sar       110100 r2:5 r1:5 c:3 00 0    00000  t:5
-shrpw_imm       110100 r2:5 r1:5 c:3 01 0    cpos:5 t:5
-
-extrw_sar       110100 r:5  t:5  c:3 10 se:1 00000  clen:5
-extrw_imm       110100 r:5  t:5  c:3 11 se:1 pos:5  clen:5
-
-depw_sar        110101 t:5 r:5   c:3 00 nz:1 00000  clen:5
-depw_imm        110101 t:5 r:5   c:3 01 nz:1 cpos:5 clen:5
-depwi_sar       110101 t:5 ..... c:3 10 nz:1 00000  clen:5      i=%im5_16
-depwi_imm       110101 t:5 ..... c:3 11 nz:1 cpos:5 clen:5      i=%im5_16
+shrp_sar        110100 r2:5 r1:5 c:3 00 0 d:1 0000  t:5
+shrp_imm        110100 r2:5 r1:5 c:3 01 0 cpos:5    t:5       d=0
+shrp_imm        110100 r2:5 r1:5 c:3 0. 1 .....  t:5          \
+                d=1 cpos=%cpos6_11
+
+extr_sar        110100 r:5  t:5  c:3 10 se:1 00 000 .....     d=0 len=%len5
+extr_sar        110100 r:5  t:5  c:3 10 se:1 1. 000 .....     d=1 len=%len6_8
+extr_imm        110100 r:5  t:5  c:3 11 se:1 pos:5  .....     d=0 len=%len5
+extr_imm        110110 r:5  t:5  c:3 .. se:1 ..... .....      \
+                d=1 len=%len6_12 pos=%cpos6_11
+
+dep_sar         110101 t:5 r:5   c:3 00 nz:1 00 000 .....     d=0 len=%len5
+dep_sar         110101 t:5 r:5   c:3 00 nz:1 1. 000 .....     d=1 len=%len6_8
+dep_imm         110101 t:5 r:5   c:3 01 nz:1 cpos:5 .....     d=0 len=%len5
+dep_imm         111100 t:5 r:5   c:3 .. nz:1 ..... .....      \
+                d=1 len=%len6_12 cpos=%cpos6_11
+depi_sar        110101 t:5 ..... c:3 10 nz:1 d:1 . 000 .....  \
+                i=%im5_16 len=%len6_8
+depi_imm        110101 t:5 ..... c:3 11 nz:1 cpos:5 .....     \
+                d=0 i=%im5_16 len=%len5
+depi_imm        111101 t:5 ..... c:3 .. nz:1 ..... .....      \
+                d=1 i=%im5_16 len=%len6_12 cpos=%cpos6_11
 
 ####
 # Branch External
@@ -343,6 +432,8 @@ bl              111010 ..... ..... 101 ........... n:1 .        &BL l=2 \
                 disp=%assemble_22
 b_gate          111010 ..... ..... 001 ........... .   .        @bl
 blr             111010 l:5   x:5   010 00000000000 n:1 0
+nopbts          111010 00000 00000 010 0---------1   0 1    # clrbts/popbts
+nopbts          111010 00000 ----- 010 00000000000   0 1    # pushbts/pushnom
 bv              111010 b:5   x:5   110 00000000000 n:1 0
 bve             111010 b:5   00000 110 10000000000 n:1 -        l=0
 bve             111010 b:5   00000 111 10000000000 n:1 -        l=2
@@ -384,7 +475,7 @@ fmpyfadd_d      101110 rm1:5 rm2:5 ... 0 1 ..0 0 0 neg:1 t:5    ra3=%rc32
 
 @f0e_f_3        ...... ..... ..... ... .0 110 ..0 .....    \
                 &fclass3 r1=%ra64 r2=%rb64 t=%rt64
-@f0e_d_3        ...... r1:5  r2:5  ... 01 110 000 t:5
+@f0e_d_3        ...... r1:5  r2:5  ... 01 110 000 t:5      &fclass3
 
 # Floating point class 0
 
diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c
index 3ab9934a1d..467ee7daf5 100644
--- a/target/hppa/int_helper.c
+++ b/target/hppa/int_helper.c
@@ -52,9 +52,17 @@ static void io_eir_write(void *opaque, hwaddr addr,
                          uint64_t data, unsigned size)
 {
     HPPACPU *cpu = opaque;
-    int le_bit = ~data & (TARGET_REGISTER_BITS - 1);
+    CPUHPPAState *env = &cpu->env;
+    int widthm1 = 31;
+    int le_bit;
+
+    /* The default PSW.W controls the width of EIRR. */
+    if (hppa_is_pa20(env) && env->cr[CR_PSW_DEFAULT] & PDC_PSW_WIDE_BIT) {
+        widthm1 = 63;
+    }
+    le_bit = ~data & widthm1;
 
-    cpu->env.cr[CR_EIRR] |= (target_ureg)1 << le_bit;
+    env->cr[CR_EIRR] |= 1ull << le_bit;
     eval_interrupt(cpu);
 }
 
@@ -73,7 +81,7 @@ void hppa_cpu_alarm_timer(void *opaque)
     io_eir_write(opaque, 0, 0, 4);
 }
 
-void HELPER(write_eirr)(CPUHPPAState *env, target_ureg val)
+void HELPER(write_eirr)(CPUHPPAState *env, target_ulong val)
 {
     env->cr[CR_EIRR] &= ~val;
     qemu_mutex_lock_iothread();
@@ -81,7 +89,7 @@ void HELPER(write_eirr)(CPUHPPAState *env, target_ureg val)
     qemu_mutex_unlock_iothread();
 }
 
-void HELPER(write_eiem)(CPUHPPAState *env, target_ureg val)
+void HELPER(write_eiem)(CPUHPPAState *env, target_ulong val)
 {
     env->cr[CR_EIEM] = val;
     qemu_mutex_lock_iothread();
@@ -94,25 +102,37 @@ void hppa_cpu_do_interrupt(CPUState *cs)
     HPPACPU *cpu = HPPA_CPU(cs);
     CPUHPPAState *env = &cpu->env;
     int i = cs->exception_index;
-    target_ureg iaoq_f = env->iaoq_f;
-    target_ureg iaoq_b = env->iaoq_b;
-    uint64_t iasq_f = env->iasq_f;
-    uint64_t iasq_b = env->iasq_b;
-
-    target_ureg old_psw;
+    uint64_t old_psw;
 
     /* As documented in pa2.0 -- interruption handling.  */
     /* step 1 */
     env->cr[CR_IPSW] = old_psw = cpu_hppa_get_psw(env);
 
-    /* step 2 -- note PSW_W == 0 for !HPPA64.  */
-    cpu_hppa_put_psw(env, PSW_W | (i == EXCP_HPMC ? PSW_M : 0));
+    /* step 2 -- Note PSW_W is masked out again for pa1.x */
+    cpu_hppa_put_psw(env,
+                     (env->cr[CR_PSW_DEFAULT] & PDC_PSW_WIDE_BIT ? PSW_W : 0) |
+                     (i == EXCP_HPMC ? PSW_M : 0));
 
     /* step 3 */
-    env->cr[CR_IIASQ] = iasq_f >> 32;
-    env->cr_back[0] = iasq_b >> 32;
-    env->cr[CR_IIAOQ] = iaoq_f;
-    env->cr_back[1] = iaoq_b;
+    /*
+     * For pa1.x, IIASQ is simply a copy of IASQ.
+     * For pa2.0, IIASQ is the top bits of the virtual address,
+     *            or zero if translation is disabled.
+     */
+    if (!hppa_is_pa20(env)) {
+        env->cr[CR_IIASQ] = env->iasq_f >> 32;
+        env->cr_back[0] = env->iasq_b >> 32;
+    } else if (old_psw & PSW_C) {
+        env->cr[CR_IIASQ] =
+            hppa_form_gva_psw(old_psw, env->iasq_f, env->iaoq_f) >> 32;
+        env->cr_back[0] =
+            hppa_form_gva_psw(old_psw, env->iasq_f, env->iaoq_f) >> 32;
+    } else {
+        env->cr[CR_IIASQ] = 0;
+        env->cr_back[0] = 0;
+    }
+    env->cr[CR_IIAOQ] = env->iaoq_f;
+    env->cr_back[1] = env->iaoq_b;
 
     if (old_psw & PSW_Q) {
         /* step 5 */
@@ -145,14 +165,13 @@ void hppa_cpu_do_interrupt(CPUState *cs)
                 /* ??? An alternate fool-proof method would be to store the
                    instruction data into the unwind info.  That's probably
                    a bit too much in the way of extra storage required.  */
-                vaddr vaddr;
-                hwaddr paddr;
+                vaddr vaddr = env->iaoq_f & -4;
+                hwaddr paddr = vaddr;
 
-                paddr = vaddr = iaoq_f & -4;
                 if (old_psw & PSW_C) {
                     int prot, t;
 
-                    vaddr = hppa_form_gva_psw(old_psw, iasq_f, vaddr);
+                    vaddr = hppa_form_gva_psw(old_psw, env->iasq_f, vaddr);
                     t = hppa_get_physical_address(env, vaddr, MMU_KERNEL_IDX,
                                                   0, &paddr, &prot, NULL);
                     if (t >= 0) {
@@ -182,14 +201,14 @@ void hppa_cpu_do_interrupt(CPUState *cs)
 
     /* step 7 */
     if (i == EXCP_TOC) {
-        env->iaoq_f = FIRMWARE_START;
+        env->iaoq_f = hppa_form_gva(env, 0, FIRMWARE_START);
         /* help SeaBIOS and provide iaoq_b and iasq_back in shadow regs */
         env->gr[24] = env->cr_back[0];
         env->gr[25] = env->cr_back[1];
     } else {
-        env->iaoq_f = env->cr[CR_IVA] + 32 * i;
+        env->iaoq_f = hppa_form_gva(env, 0, env->cr[CR_IVA] + 32 * i);
     }
-    env->iaoq_b = env->iaoq_f + 4;
+    env->iaoq_b = hppa_form_gva(env, 0, env->iaoq_f + 4);
     env->iasq_f = 0;
     env->iasq_b = 0;
 
@@ -239,14 +258,10 @@ void hppa_cpu_do_interrupt(CPUState *cs)
             snprintf(unknown, sizeof(unknown), "unknown %d", i);
             name = unknown;
         }
-        qemu_log("INT %6d: %s @ " TARGET_FMT_lx "," TARGET_FMT_lx
-                 " -> " TREG_FMT_lx " " TARGET_FMT_lx "\n",
-                 ++count, name,
-                 hppa_form_gva(env, iasq_f, iaoq_f),
-                 hppa_form_gva(env, iasq_b, iaoq_b),
-                 env->iaoq_f,
-                 hppa_form_gva(env, (uint64_t)env->cr[CR_ISR] << 32,
-                               env->cr[CR_IOR]));
+        qemu_log("INT %6d: %s @ " TARGET_FMT_lx ":" TARGET_FMT_lx
+                 " for " TARGET_FMT_lx ":" TARGET_FMT_lx "\n",
+                 ++count, name, env->cr[CR_IIASQ], env->cr[CR_IIAOQ],
+                 env->cr[CR_ISR], env->cr[CR_IOR]);
     }
     cs->exception_index = -1;
 }
diff --git a/target/hppa/machine.c b/target/hppa/machine.c
index 905991d7f9..2f8e8cc5a1 100644
--- a/target/hppa/machine.c
+++ b/target/hppa/machine.c
@@ -21,33 +21,12 @@
 #include "cpu.h"
 #include "migration/cpu.h"
 
-#if TARGET_REGISTER_BITS == 64
-#define qemu_put_betr   qemu_put_be64
-#define qemu_get_betr   qemu_get_be64
-#define VMSTATE_UINTTL_V(_f, _s, _v) \
-    VMSTATE_UINT64_V(_f, _s, _v)
-#define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v) \
-    VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)
-#else
-#define qemu_put_betr   qemu_put_be32
-#define qemu_get_betr   qemu_get_be32
-#define VMSTATE_UINTTR_V(_f, _s, _v) \
-    VMSTATE_UINT32_V(_f, _s, _v)
-#define VMSTATE_UINTTR_ARRAY_V(_f, _s, _n, _v) \
-    VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)
-#endif
-
-#define VMSTATE_UINTTR(_f, _s) \
-    VMSTATE_UINTTR_V(_f, _s, 0)
-#define VMSTATE_UINTTR_ARRAY(_f, _s, _n) \
-    VMSTATE_UINTTR_ARRAY_V(_f, _s, _n, 0)
-
 
 static int get_psw(QEMUFile *f, void *opaque, size_t size,
                    const VMStateField *field)
 {
     CPUHPPAState *env = opaque;
-    cpu_hppa_put_psw(env, qemu_get_betr(f));
+    cpu_hppa_put_psw(env, qemu_get_be64(f));
     return 0;
 }
 
@@ -55,7 +34,7 @@ static int put_psw(QEMUFile *f, void *opaque, size_t size,
                    const VMStateField *field, JSONWriter *vmdesc)
 {
     CPUHPPAState *env = opaque;
-    qemu_put_betr(f, cpu_hppa_get_psw(env));
+    qemu_put_be64(f, cpu_hppa_get_psw(env));
     return 0;
 }
 
@@ -65,70 +44,138 @@ static const VMStateInfo vmstate_psw = {
     .put = put_psw,
 };
 
-/* FIXME: Use the PA2.0 format, which is a superset of the PA1.1 format.  */
 static int get_tlb(QEMUFile *f, void *opaque, size_t size,
                    const VMStateField *field)
 {
-    hppa_tlb_entry *ent = opaque;
-    uint32_t val;
-
-    memset(ent, 0, sizeof(*ent));
-
-    ent->va_b = qemu_get_be64(f);
-    ent->pa = qemu_get_betr(f);
-    val = qemu_get_be32(f);
-
-    ent->entry_valid = extract32(val, 0, 1);
-    ent->access_id = extract32(val, 1, 18);
-    ent->u = extract32(val, 19, 1);
-    ent->ar_pl2 = extract32(val, 20, 2);
-    ent->ar_pl1 = extract32(val, 22, 2);
-    ent->ar_type = extract32(val, 24, 3);
-    ent->b = extract32(val, 27, 1);
-    ent->d = extract32(val, 28, 1);
-    ent->t = extract32(val, 29, 1);
-
-    ent->va_e = ent->va_b + TARGET_PAGE_SIZE - 1;
+    HPPATLBEntry *ent = opaque;
+    uint64_t val;
+
+    ent->itree.start = qemu_get_be64(f);
+    ent->itree.last = qemu_get_be64(f);
+    ent->pa = qemu_get_be64(f);
+    val = qemu_get_be64(f);
+
+    if (val) {
+        ent->t = extract64(val, 61, 1);
+        ent->d = extract64(val, 60, 1);
+        ent->b = extract64(val, 59, 1);
+        ent->ar_type = extract64(val, 56, 3);
+        ent->ar_pl1 = extract64(val, 54, 2);
+        ent->ar_pl2 = extract64(val, 52, 2);
+        ent->u = extract64(val, 51, 1);
+        /* o = bit 50 */
+        /* p = bit 49 */
+        ent->access_id = extract64(val, 1, 31);
+        ent->entry_valid = 1;
+    }
     return 0;
 }
 
 static int put_tlb(QEMUFile *f, void *opaque, size_t size,
                    const VMStateField *field, JSONWriter *vmdesc)
 {
-    hppa_tlb_entry *ent = opaque;
-    uint32_t val = 0;
+    HPPATLBEntry *ent = opaque;
+    uint64_t val = 0;
 
     if (ent->entry_valid) {
         val = 1;
-        val = deposit32(val, 1, 18, ent->access_id);
-        val = deposit32(val, 19, 1, ent->u);
-        val = deposit32(val, 20, 2, ent->ar_pl2);
-        val = deposit32(val, 22, 2, ent->ar_pl1);
-        val = deposit32(val, 24, 3, ent->ar_type);
-        val = deposit32(val, 27, 1, ent->b);
-        val = deposit32(val, 28, 1, ent->d);
-        val = deposit32(val, 29, 1, ent->t);
+        val = deposit64(val, 61, 1, ent->t);
+        val = deposit64(val, 60, 1, ent->d);
+        val = deposit64(val, 59, 1, ent->b);
+        val = deposit64(val, 56, 3, ent->ar_type);
+        val = deposit64(val, 54, 2, ent->ar_pl1);
+        val = deposit64(val, 52, 2, ent->ar_pl2);
+        val = deposit64(val, 51, 1, ent->u);
+        /* o = bit 50 */
+        /* p = bit 49 */
+        val = deposit64(val, 1, 31, ent->access_id);
     }
 
-    qemu_put_be64(f, ent->va_b);
-    qemu_put_betr(f, ent->pa);
-    qemu_put_be32(f, val);
+    qemu_put_be64(f, ent->itree.start);
+    qemu_put_be64(f, ent->itree.last);
+    qemu_put_be64(f, ent->pa);
+    qemu_put_be64(f, val);
     return 0;
 }
 
-static const VMStateInfo vmstate_tlb = {
+static const VMStateInfo vmstate_tlb_entry = {
     .name = "tlb entry",
     .get = get_tlb,
     .put = put_tlb,
 };
 
-static VMStateField vmstate_env_fields[] = {
-    VMSTATE_UINTTR_ARRAY(gr, CPUHPPAState, 32),
+static int tlb_pre_load(void *opaque)
+{
+    CPUHPPAState *env = opaque;
+
+    /*
+     * Zap the entire tlb, on-the-side data structures and all.
+     * Each tlb entry will have data re-filled by put_tlb.
+     */
+    memset(env->tlb, 0, sizeof(env->tlb));
+    memset(&env->tlb_root, 0, sizeof(env->tlb_root));
+    env->tlb_unused = NULL;
+    env->tlb_partial = NULL;
+
+    return 0;
+}
+
+static int tlb_post_load(void *opaque, int version_id)
+{
+    CPUHPPAState *env = opaque;
+    uint32_t btlb_entries = HPPA_BTLB_ENTRIES(env);
+    HPPATLBEntry **unused = &env->tlb_unused;
+    HPPATLBEntry *partial = NULL;
+
+    /*
+     * Re-create the interval tree from the valid entries.
+     * Truely invalid entries should have start == end == 0.
+     * Otherwise it should be the in-flight tlb_partial entry.
+     */
+    for (uint32_t i = 0; i < ARRAY_SIZE(env->tlb); ++i) {
+        HPPATLBEntry *e = &env->tlb[i];
+
+        if (e->entry_valid) {
+            interval_tree_insert(&e->itree, &env->tlb_root);
+        } else if (i < btlb_entries) {
+            /* btlb not in unused list */
+        } else if (partial == NULL && e->itree.start < e->itree.last) {
+            partial = e;
+        } else {
+            *unused = e;
+            unused = &e->unused_next;
+        }
+    }
+    env->tlb_partial = partial;
+    *unused = NULL;
+
+    return 0;
+}
+
+static const VMStateField vmstate_tlb_fields[] = {
+    VMSTATE_ARRAY(tlb, CPUHPPAState,
+                  ARRAY_SIZE(((CPUHPPAState *)0)->tlb),
+                  0, vmstate_tlb_entry, HPPATLBEntry),
+    VMSTATE_UINT32(tlb_last, CPUHPPAState),
+    VMSTATE_END_OF_LIST()
+};
+
+static const VMStateDescription vmstate_tlb = {
+    .name = "env/tlb",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = vmstate_tlb_fields,
+    .pre_load = tlb_pre_load,
+    .post_load = tlb_post_load,
+};
+
+static const VMStateField vmstate_env_fields[] = {
+    VMSTATE_UINT64_ARRAY(gr, CPUHPPAState, 32),
     VMSTATE_UINT64_ARRAY(fr, CPUHPPAState, 32),
     VMSTATE_UINT64_ARRAY(sr, CPUHPPAState, 8),
-    VMSTATE_UINTTR_ARRAY(cr, CPUHPPAState, 32),
-    VMSTATE_UINTTR_ARRAY(cr_back, CPUHPPAState, 2),
-    VMSTATE_UINTTR_ARRAY(shadow, CPUHPPAState, 7),
+    VMSTATE_UINT64_ARRAY(cr, CPUHPPAState, 32),
+    VMSTATE_UINT64_ARRAY(cr_back, CPUHPPAState, 2),
+    VMSTATE_UINT64_ARRAY(shadow, CPUHPPAState, 7),
 
     /* Save the architecture value of the psw, not the internally
        expanded version.  Since this architecture value does not
@@ -145,28 +192,29 @@ static VMStateField vmstate_env_fields[] = {
         .offset = 0
     },
 
-    VMSTATE_UINTTR(iaoq_f, CPUHPPAState),
-    VMSTATE_UINTTR(iaoq_b, CPUHPPAState),
+    VMSTATE_UINT64(iaoq_f, CPUHPPAState),
+    VMSTATE_UINT64(iaoq_b, CPUHPPAState),
     VMSTATE_UINT64(iasq_f, CPUHPPAState),
     VMSTATE_UINT64(iasq_b, CPUHPPAState),
 
     VMSTATE_UINT32(fr0_shadow, CPUHPPAState),
-
-    VMSTATE_ARRAY(tlb, CPUHPPAState, ARRAY_SIZE(((CPUHPPAState *)0)->tlb),
-                  0, vmstate_tlb, hppa_tlb_entry),
-    VMSTATE_UINT32(tlb_last, CPUHPPAState),
-
     VMSTATE_END_OF_LIST()
 };
 
+static const VMStateDescription *vmstate_env_subsections[] = {
+    &vmstate_tlb,
+    NULL
+};
+
 static const VMStateDescription vmstate_env = {
     .name = "env",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = vmstate_env_fields,
+    .subsections = vmstate_env_subsections,
 };
 
-static VMStateField vmstate_cpu_fields[] = {
+static const VMStateField vmstate_cpu_fields[] = {
     VMSTATE_CPU(),
     VMSTATE_STRUCT(env, HPPACPU, 1, vmstate_env, CPUHPPAState),
     VMSTATE_END_OF_LIST()
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index 350485f619..858ce6ec7f 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -25,72 +25,136 @@
 #include "hw/core/cpu.h"
 #include "trace.h"
 
-static hppa_tlb_entry *hppa_find_tlb(CPUHPPAState *env, vaddr addr)
+hwaddr hppa_abs_to_phys_pa2_w1(vaddr addr)
 {
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(env->tlb); ++i) {
-        hppa_tlb_entry *ent = &env->tlb[i];
-        if (ent->va_b <= addr && addr <= ent->va_e) {
-            trace_hppa_tlb_find_entry(env, ent + i, ent->entry_valid,
-                                      ent->va_b, ent->va_e, ent->pa);
-            return ent;
-        }
+    if (likely(extract64(addr, 58, 4) != 0xf)) {
+        /* Memory address space */
+        return addr & MAKE_64BIT_MASK(0, 62);
+    }
+    if (extract64(addr, 54, 4) != 0) {
+        /* I/O address space */
+        return addr | MAKE_64BIT_MASK(62, 2);
+    }
+    /* PDC address space */
+    return (addr & MAKE_64BIT_MASK(0, 54)) | MAKE_64BIT_MASK(60, 4);
+}
+
+hwaddr hppa_abs_to_phys_pa2_w0(vaddr addr)
+{
+    if (likely(extract32(addr, 28, 4) != 0xf)) {
+        /* Memory address space */
+        return addr & MAKE_64BIT_MASK(0, 32);
+    }
+    if (extract32(addr, 24, 4) != 0) {
+        /* I/O address space */
+        return addr | MAKE_64BIT_MASK(32, 32);
+    }
+    /* PDC address space */
+    return (addr & MAKE_64BIT_MASK(0, 24)) | MAKE_64BIT_MASK(60, 4);
+}
+
+static hwaddr hppa_abs_to_phys(CPUHPPAState *env, vaddr addr)
+{
+    if (!hppa_is_pa20(env)) {
+        return addr;
+    } else if (env->psw & PSW_W) {
+        return hppa_abs_to_phys_pa2_w1(addr);
+    } else {
+        return hppa_abs_to_phys_pa2_w0(addr);
+    }
+}
+
+static HPPATLBEntry *hppa_find_tlb(CPUHPPAState *env, vaddr addr)
+{
+    IntervalTreeNode *i = interval_tree_iter_first(&env->tlb_root, addr, addr);
+
+    if (i) {
+        HPPATLBEntry *ent = container_of(i, HPPATLBEntry, itree);
+        trace_hppa_tlb_find_entry(env, ent, ent->entry_valid,
+                                  ent->itree.start, ent->itree.last, ent->pa);
+        return ent;
     }
     trace_hppa_tlb_find_entry_not_found(env, addr);
     return NULL;
 }
 
-static void hppa_flush_tlb_ent(CPUHPPAState *env, hppa_tlb_entry *ent,
+static void hppa_flush_tlb_ent(CPUHPPAState *env, HPPATLBEntry *ent,
                                bool force_flush_btlb)
 {
     CPUState *cs = env_cpu(env);
+    bool is_btlb;
 
     if (!ent->entry_valid) {
         return;
     }
 
-    trace_hppa_tlb_flush_ent(env, ent, ent->va_b, ent->va_e, ent->pa);
+    trace_hppa_tlb_flush_ent(env, ent, ent->itree.start,
+                             ent->itree.last, ent->pa);
 
-    tlb_flush_range_by_mmuidx(cs, ent->va_b,
-                                ent->va_e - ent->va_b + 1,
-                                HPPA_MMU_FLUSH_MASK, TARGET_LONG_BITS);
+    tlb_flush_range_by_mmuidx(cs, ent->itree.start,
+                              ent->itree.last - ent->itree.start + 1,
+                              HPPA_MMU_FLUSH_MASK, TARGET_LONG_BITS);
 
-    /* never clear BTLBs, unless forced to do so. */
-    if (ent < &env->tlb[HPPA_BTLB_ENTRIES] && !force_flush_btlb) {
+    /* Never clear BTLBs, unless forced to do so. */
+    is_btlb = ent < &env->tlb[HPPA_BTLB_ENTRIES(env)];
+    if (is_btlb && !force_flush_btlb) {
         return;
     }
 
+    interval_tree_remove(&ent->itree, &env->tlb_root);
     memset(ent, 0, sizeof(*ent));
-    ent->va_b = -1;
+
+    if (!is_btlb) {
+        ent->unused_next = env->tlb_unused;
+        env->tlb_unused = ent;
+    }
 }
 
-static hppa_tlb_entry *hppa_alloc_tlb_ent(CPUHPPAState *env)
+static void hppa_flush_tlb_range(CPUHPPAState *env, vaddr va_b, vaddr va_e)
 {
-    hppa_tlb_entry *ent;
-    uint32_t i;
+    IntervalTreeNode *i, *n;
 
-    if (env->tlb_last < HPPA_BTLB_ENTRIES || env->tlb_last >= ARRAY_SIZE(env->tlb)) {
-        i = HPPA_BTLB_ENTRIES;
-        env->tlb_last = HPPA_BTLB_ENTRIES + 1;
-    } else {
-        i = env->tlb_last;
-        env->tlb_last++;
+    i = interval_tree_iter_first(&env->tlb_root, va_b, va_e);
+    for (; i ; i = n) {
+        HPPATLBEntry *ent = container_of(i, HPPATLBEntry, itree);
+
+        /*
+         * Find the next entry now: In the normal case the current entry
+         * will be removed, but in the BTLB case it will remain.
+         */
+        n = interval_tree_iter_next(i, va_b, va_e);
+        hppa_flush_tlb_ent(env, ent, false);
     }
+}
+
+static HPPATLBEntry *hppa_alloc_tlb_ent(CPUHPPAState *env)
+{
+    HPPATLBEntry *ent = env->tlb_unused;
+
+    if (ent == NULL) {
+        uint32_t btlb_entries = HPPA_BTLB_ENTRIES(env);
+        uint32_t i = env->tlb_last;
 
-    ent = &env->tlb[i];
+        if (i < btlb_entries || i >= ARRAY_SIZE(env->tlb)) {
+            i = btlb_entries;
+        }
+        env->tlb_last = i + 1;
 
-    hppa_flush_tlb_ent(env, ent, false);
+        ent = &env->tlb[i];
+        hppa_flush_tlb_ent(env, ent, false);
+    }
+
+    env->tlb_unused = ent->unused_next;
     return ent;
 }
 
 int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
                               int type, hwaddr *pphys, int *pprot,
-                              hppa_tlb_entry **tlb_entry)
+                              HPPATLBEntry **tlb_entry)
 {
     hwaddr phys;
     int prot, r_prot, w_prot, x_prot, priv;
-    hppa_tlb_entry *ent;
+    HPPATLBEntry *ent;
     int ret = -1;
 
     if (tlb_entry) {
@@ -106,7 +170,7 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
 
     /* Find a valid tlb entry that matches the virtual address.  */
     ent = hppa_find_tlb(env, addr);
-    if (ent == NULL || !ent->entry_valid) {
+    if (ent == NULL) {
         phys = 0;
         prot = 0;
         ret = (type == PAGE_EXEC) ? EXCP_ITLB_MISS : EXCP_DTLB_MISS;
@@ -118,7 +182,7 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
     }
 
     /* We now know the physical address.  */
-    phys = ent->pa + (addr - ent->va_b);
+    phys = ent->pa + (addr - ent->itree.start);
 
     /* Map TLB access_rights field to QEMU protection.  */
     priv = MMU_IDX_TO_PRIV(mmu_idx);
@@ -144,7 +208,7 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
     }
 
     /* access_id == 0 means public page and no check is performed */
-    if ((env->psw & PSW_P) && ent->access_id) {
+    if (ent->access_id && MMU_IDX_TO_P(mmu_idx)) {
         /* If bits [31:1] match, and bit 0 is set, suppress write.  */
         int match = ent->access_id * 2 + 1;
 
@@ -197,7 +261,7 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
     }
 
  egress:
-    *pphys = phys;
+    *pphys = phys = hppa_abs_to_phys(env, phys);
     *pprot = prot;
     trace_hppa_tlb_get_physical_address(env, ret, prot, addr, phys);
     return ret;
@@ -213,7 +277,7 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     /* ??? We really ought to know if the code mmu is disabled too,
        in order to get the correct debugging dumps.  */
     if (!(cpu->env.psw & PSW_D)) {
-        return addr;
+        return hppa_abs_to_phys(&cpu->env, addr);
     }
 
     excp = hppa_get_physical_address(&cpu->env, addr, MMU_KERNEL_IDX, 0,
@@ -225,13 +289,60 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     return excp == EXCP_DTLB_MISS ? -1 : phys;
 }
 
+G_NORETURN static void
+raise_exception_with_ior(CPUHPPAState *env, int excp, uintptr_t retaddr,
+                         vaddr addr, bool mmu_disabled)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = excp;
+
+    if (env->psw & PSW_Q) {
+        /*
+         * For pa1.x, the offset and space never overlap, and so we
+         * simply extract the high and low part of the virtual address.
+         *
+         * For pa2.0, the formation of these are described in section
+         * "Interruption Parameter Registers", page 2-15.
+         */
+        env->cr[CR_IOR] = (uint32_t)addr;
+        env->cr[CR_ISR] = addr >> 32;
+
+        if (hppa_is_pa20(env)) {
+            if (mmu_disabled) {
+                /*
+                 * If data translation was disabled, the ISR contains
+                 * the upper portion of the abs address, zero-extended.
+                 */
+                env->cr[CR_ISR] &= 0x3fffffff;
+            } else {
+                /*
+                 * If data translation was enabled, the upper two bits
+                 * of the IOR (the b field) are equal to the two space
+                 * bits from the base register used to form the gva.
+                 */
+                uint64_t b;
+
+                cpu_restore_state(cs, retaddr);
+
+                b = env->gr[env->unwind_breg];
+                b >>= (env->psw & PSW_W ? 62 : 30);
+                env->cr[CR_IOR] |= b << 62;
+
+                cpu_loop_exit(cs);
+            }
+        }
+    }
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
 bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
                        MMUAccessType type, int mmu_idx,
                        bool probe, uintptr_t retaddr)
 {
     HPPACPU *cpu = HPPA_CPU(cs);
     CPUHPPAState *env = &cpu->env;
-    hppa_tlb_entry *ent;
+    HPPATLBEntry *ent;
     int prot, excp, a_prot;
     hwaddr phys;
 
@@ -254,56 +365,51 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
             return false;
         }
         trace_hppa_tlb_fill_excp(env, addr, size, type, mmu_idx);
+
         /* Failure.  Raise the indicated exception.  */
-        cs->exception_index = excp;
-        if (cpu->env.psw & PSW_Q) {
-            /* ??? Needs tweaking for hppa64.  */
-            cpu->env.cr[CR_IOR] = addr;
-            cpu->env.cr[CR_ISR] = addr >> 32;
-        }
-        cpu_loop_exit_restore(cs, retaddr);
+        raise_exception_with_ior(env, excp, retaddr,
+                                 addr, mmu_idx == MMU_PHYS_IDX);
     }
 
     trace_hppa_tlb_fill_success(env, addr & TARGET_PAGE_MASK,
                                 phys & TARGET_PAGE_MASK, size, type, mmu_idx);
-    /* Success!  Store the translation into the QEMU TLB.  */
+
+    /*
+     * Success!  Store the translation into the QEMU TLB.
+     * Note that we always install a single-page entry, because that
+     * is what works best with softmmu -- anything else will trigger
+     * the large page protection mask.  We do not require this,
+     * because we record the large page here in the hppa tlb.
+     */
     tlb_set_page(cs, addr & TARGET_PAGE_MASK, phys & TARGET_PAGE_MASK,
-                 prot, mmu_idx, TARGET_PAGE_SIZE << (ent ? 2 * ent->page_size : 0));
+                 prot, mmu_idx, TARGET_PAGE_SIZE);
     return true;
 }
 
 /* Insert (Insn/Data) TLB Address.  Note this is PA 1.1 only.  */
-void HELPER(itlba)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
+void HELPER(itlba_pa11)(CPUHPPAState *env, target_ulong addr, target_ulong reg)
 {
-    hppa_tlb_entry *empty = NULL;
-    int i;
-
-    /* Zap any old entries covering ADDR; notice empty entries on the way.  */
-    for (i = HPPA_BTLB_ENTRIES; i < ARRAY_SIZE(env->tlb); ++i) {
-        hppa_tlb_entry *ent = &env->tlb[i];
-        if (ent->va_b <= addr && addr <= ent->va_e) {
-            if (ent->entry_valid) {
-                hppa_flush_tlb_ent(env, ent, false);
-            }
-            if (!empty) {
-                empty = ent;
-            }
-        }
-    }
+    HPPATLBEntry *ent;
 
-    /* If we didn't see an empty entry, evict one.  */
-    if (empty == NULL) {
-        empty = hppa_alloc_tlb_ent(env);
+    /* Zap any old entries covering ADDR. */
+    addr &= TARGET_PAGE_MASK;
+    hppa_flush_tlb_range(env, addr, addr + TARGET_PAGE_SIZE - 1);
+
+    ent = env->tlb_partial;
+    if (ent == NULL) {
+        ent = hppa_alloc_tlb_ent(env);
+        env->tlb_partial = ent;
     }
 
-    /* Note that empty->entry_valid == 0 already.  */
-    empty->va_b = addr & TARGET_PAGE_MASK;
-    empty->va_e = empty->va_b + TARGET_PAGE_SIZE - 1;
-    empty->pa = extract32(reg, 5, 20) << TARGET_PAGE_BITS;
-    trace_hppa_tlb_itlba(env, empty, empty->va_b, empty->va_e, empty->pa);
+    /* Note that ent->entry_valid == 0 already.  */
+    ent->itree.start = addr;
+    ent->itree.last = addr + TARGET_PAGE_SIZE - 1;
+    ent->pa = extract32(reg, 5, 20) << TARGET_PAGE_BITS;
+    trace_hppa_tlb_itlba(env, ent, ent->itree.start, ent->itree.last, ent->pa);
 }
 
-static void set_access_bits(CPUHPPAState *env, hppa_tlb_entry *ent, target_ureg reg)
+static void set_access_bits_pa11(CPUHPPAState *env, HPPATLBEntry *ent,
+                                 target_ulong reg)
 {
     ent->access_id = extract32(reg, 1, 18);
     ent->u = extract32(reg, 19, 1);
@@ -314,49 +420,153 @@ static void set_access_bits(CPUHPPAState *env, hppa_tlb_entry *ent, target_ureg
     ent->d = extract32(reg, 28, 1);
     ent->t = extract32(reg, 29, 1);
     ent->entry_valid = 1;
+
+    interval_tree_insert(&ent->itree, &env->tlb_root);
     trace_hppa_tlb_itlbp(env, ent, ent->access_id, ent->u, ent->ar_pl2,
                          ent->ar_pl1, ent->ar_type, ent->b, ent->d, ent->t);
 }
 
 /* Insert (Insn/Data) TLB Protection.  Note this is PA 1.1 only.  */
-void HELPER(itlbp)(CPUHPPAState *env, target_ulong addr, target_ureg reg)
+void HELPER(itlbp_pa11)(CPUHPPAState *env, target_ulong addr, target_ulong reg)
 {
-    hppa_tlb_entry *ent = hppa_find_tlb(env, addr);
+    HPPATLBEntry *ent = env->tlb_partial;
 
-    if (unlikely(ent == NULL)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "ITLBP not following ITLBA\n");
-        return;
+    if (ent) {
+        env->tlb_partial = NULL;
+        if (ent->itree.start <= addr && addr <= ent->itree.last) {
+            set_access_bits_pa11(env, ent, reg);
+            return;
+        }
     }
+    qemu_log_mask(LOG_GUEST_ERROR, "ITLBP not following ITLBA\n");
+}
+
+static void itlbt_pa20(CPUHPPAState *env, target_ulong r1,
+                       target_ulong r2, vaddr va_b)
+{
+    HPPATLBEntry *ent;
+    vaddr va_e;
+    uint64_t va_size;
+    int mask_shift;
+
+    mask_shift = 2 * (r1 & 0xf);
+    va_size = TARGET_PAGE_SIZE << mask_shift;
+    va_b &= -va_size;
+    va_e = va_b + va_size - 1;
+
+    hppa_flush_tlb_range(env, va_b, va_e);
+    ent = hppa_alloc_tlb_ent(env);
+
+    ent->itree.start = va_b;
+    ent->itree.last = va_e;
+    ent->pa = (r1 << 7) & (TARGET_PAGE_MASK << mask_shift);
+    ent->t = extract64(r2, 61, 1);
+    ent->d = extract64(r2, 60, 1);
+    ent->b = extract64(r2, 59, 1);
+    ent->ar_type = extract64(r2, 56, 3);
+    ent->ar_pl1 = extract64(r2, 54, 2);
+    ent->ar_pl2 = extract64(r2, 52, 2);
+    ent->u = extract64(r2, 51, 1);
+    /* o = bit 50 */
+    /* p = bit 49 */
+    ent->access_id = extract64(r2, 1, 31);
+    ent->entry_valid = 1;
 
-    set_access_bits(env, ent, reg);
+    interval_tree_insert(&ent->itree, &env->tlb_root);
+    trace_hppa_tlb_itlba(env, ent, ent->itree.start, ent->itree.last, ent->pa);
+    trace_hppa_tlb_itlbp(env, ent, ent->access_id, ent->u,
+                         ent->ar_pl2, ent->ar_pl1, ent->ar_type,
+                         ent->b, ent->d, ent->t);
 }
 
-/* Purge (Insn/Data) TLB.  This is explicitly page-based, and is
-   synchronous across all processors.  */
+void HELPER(idtlbt_pa20)(CPUHPPAState *env, target_ulong r1, target_ulong r2)
+{
+    vaddr va_b = deposit64(env->cr[CR_IOR], 32, 32, env->cr[CR_ISR]);
+    itlbt_pa20(env, r1, r2, va_b);
+}
+
+void HELPER(iitlbt_pa20)(CPUHPPAState *env, target_ulong r1, target_ulong r2)
+{
+    vaddr va_b = deposit64(env->cr[CR_IIAOQ], 32, 32, env->cr[CR_IIASQ]);
+    itlbt_pa20(env, r1, r2, va_b);
+}
+
+/* Purge (Insn/Data) TLB. */
 static void ptlb_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUHPPAState *env = cpu_env(cpu);
-    target_ulong addr = (target_ulong) data.target_ptr;
-    hppa_tlb_entry *ent = hppa_find_tlb(env, addr);
+    vaddr start = data.target_ptr;
+    vaddr end;
+
+    /*
+     * PA2.0 allows a range of pages encoded into GR[b], which we have
+     * copied into the bottom bits of the otherwise page-aligned address.
+     * PA1.x will always provide zero here, for a single page flush.
+     */
+    end = start & 0xf;
+    start &= TARGET_PAGE_MASK;
+    end = TARGET_PAGE_SIZE << (2 * end);
+    end = start + end - 1;
+
+    hppa_flush_tlb_range(env, start, end);
+}
 
-    if (ent && ent->entry_valid) {
-        hppa_flush_tlb_ent(env, ent, false);
-    }
+/* This is local to the current cpu. */
+void HELPER(ptlb_l)(CPUHPPAState *env, target_ulong addr)
+{
+    trace_hppa_tlb_ptlb_local(env);
+    ptlb_work(env_cpu(env), RUN_ON_CPU_TARGET_PTR(addr));
 }
 
+/* This is synchronous across all processors.  */
 void HELPER(ptlb)(CPUHPPAState *env, target_ulong addr)
 {
     CPUState *src = env_cpu(env);
     CPUState *cpu;
+    bool wait = false;
+
     trace_hppa_tlb_ptlb(env);
     run_on_cpu_data data = RUN_ON_CPU_TARGET_PTR(addr);
 
     CPU_FOREACH(cpu) {
         if (cpu != src) {
             async_run_on_cpu(cpu, ptlb_work, data);
+            wait = true;
         }
     }
-    async_safe_run_on_cpu(src, ptlb_work, data);
+    if (wait) {
+        async_safe_run_on_cpu(src, ptlb_work, data);
+    } else {
+        ptlb_work(src, data);
+    }
+}
+
+void hppa_ptlbe(CPUHPPAState *env)
+{
+    uint32_t btlb_entries = HPPA_BTLB_ENTRIES(env);
+    uint32_t i;
+
+    /* Zap the (non-btlb) tlb entries themselves. */
+    memset(&env->tlb[btlb_entries], 0,
+           sizeof(env->tlb) - btlb_entries * sizeof(env->tlb[0]));
+    env->tlb_last = btlb_entries;
+    env->tlb_partial = NULL;
+
+    /* Put them all onto the unused list. */
+    env->tlb_unused = &env->tlb[btlb_entries];
+    for (i = btlb_entries; i < ARRAY_SIZE(env->tlb) - 1; ++i) {
+        env->tlb[i].unused_next = &env->tlb[i + 1];
+    }
+
+    /* Re-initialize the interval tree with only the btlb entries. */
+    memset(&env->tlb_root, 0, sizeof(env->tlb_root));
+    for (i = 0; i < btlb_entries; ++i) {
+        if (env->tlb[i].entry_valid) {
+            interval_tree_insert(&env->tlb[i].itree, &env->tlb_root);
+        }
+    }
+
+    tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK);
 }
 
 /* Purge (Insn/Data) TLB entry.  This affects an implementation-defined
@@ -365,17 +575,12 @@ void HELPER(ptlbe)(CPUHPPAState *env)
 {
     trace_hppa_tlb_ptlbe(env);
     qemu_log_mask(CPU_LOG_MMU, "FLUSH ALL TLB ENTRIES\n");
-    memset(&env->tlb[HPPA_BTLB_ENTRIES], 0,
-        sizeof(env->tlb) - HPPA_BTLB_ENTRIES * sizeof(env->tlb[0]));
-    env->tlb_last = HPPA_BTLB_ENTRIES;
-    tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK);
+    hppa_ptlbe(env);
 }
 
 void cpu_hppa_change_prot_id(CPUHPPAState *env)
 {
-    if (env->psw & PSW_P) {
-        tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_MASK);
-    }
+    tlb_flush_by_mmuidx(env_cpu(env), HPPA_MMU_FLUSH_P_MASK);
 }
 
 void HELPER(change_prot_id)(CPUHPPAState *env)
@@ -383,7 +588,7 @@ void HELPER(change_prot_id)(CPUHPPAState *env)
     cpu_hppa_change_prot_id(env);
 }
 
-target_ureg HELPER(lpa)(CPUHPPAState *env, target_ulong addr)
+target_ulong HELPER(lpa)(CPUHPPAState *env, target_ulong addr)
 {
     hwaddr phys;
     int prot, excp;
@@ -391,16 +596,11 @@ target_ureg HELPER(lpa)(CPUHPPAState *env, target_ulong addr)
     excp = hppa_get_physical_address(env, addr, MMU_KERNEL_IDX, 0,
                                      &phys, &prot, NULL);
     if (excp >= 0) {
-        if (env->psw & PSW_Q) {
-            /* ??? Needs tweaking for hppa64.  */
-            env->cr[CR_IOR] = addr;
-            env->cr[CR_ISR] = addr >> 32;
-        }
         if (excp == EXCP_DTLB_MISS) {
             excp = EXCP_NA_DTLB_MISS;
         }
         trace_hppa_tlb_lpa_failed(env, addr);
-        hppa_dynamic_excp(env, excp, GETPC());
+        raise_exception_with_ior(env, excp, GETPC(), addr, false);
     }
     trace_hppa_tlb_lpa_success(env, addr, phys);
     return phys;
@@ -409,7 +609,7 @@ target_ureg HELPER(lpa)(CPUHPPAState *env, target_ulong addr)
 /* Return the ar_type of the TLB at VADDR, or -1.  */
 int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr)
 {
-    hppa_tlb_entry *ent = hppa_find_tlb(env, vaddr);
+    HPPATLBEntry *ent = hppa_find_tlb(env, vaddr);
     return ent ? ent->ar_type : -1;
 }
 
@@ -424,15 +624,17 @@ void HELPER(diag_btlb)(CPUHPPAState *env)
     unsigned int phys_page, len, slot;
     int mmu_idx = cpu_mmu_index(env, 0);
     uintptr_t ra = GETPC();
-    hppa_tlb_entry *btlb;
+    HPPATLBEntry *btlb;
     uint64_t virt_page;
     uint32_t *vaddr;
+    uint32_t btlb_entries = HPPA_BTLB_ENTRIES(env);
 
-#ifdef TARGET_HPPA64
     /* BTLBs are not supported on 64-bit CPUs */
-    env->gr[28] = -1; /* nonexistent procedure */
-    return;
-#endif
+    if (btlb_entries == 0) {
+        env->gr[28] = -1; /* nonexistent procedure */
+        return;
+    }
+
     env->gr[28] = 0; /* PDC_OK */
 
     switch (env->gr[25]) {
@@ -446,8 +648,8 @@ void HELPER(diag_btlb)(CPUHPPAState *env)
         } else {
             vaddr[0] = cpu_to_be32(1);
             vaddr[1] = cpu_to_be32(16 * 1024);
-            vaddr[2] = cpu_to_be32(HPPA_BTLB_FIXED);
-            vaddr[3] = cpu_to_be32(HPPA_BTLB_VARIABLE);
+            vaddr[2] = cpu_to_be32(PA10_BTLB_FIXED);
+            vaddr[3] = cpu_to_be32(PA10_BTLB_VARIABLE);
         }
         break;
     case 1:
@@ -464,15 +666,17 @@ void HELPER(diag_btlb)(CPUHPPAState *env)
                     (long long) virt_page << TARGET_PAGE_BITS,
                     (long long) (virt_page + len) << TARGET_PAGE_BITS,
                     (long long) virt_page, phys_page, len, slot);
-        if (slot < HPPA_BTLB_ENTRIES) {
+        if (slot < btlb_entries) {
             btlb = &env->tlb[slot];
-            /* force flush of possibly existing BTLB entry */
+
+            /* Force flush of possibly existing BTLB entry. */
             hppa_flush_tlb_ent(env, btlb, true);
-            /* create new BTLB entry */
-            btlb->va_b = virt_page << TARGET_PAGE_BITS;
-            btlb->va_e = btlb->va_b + len * TARGET_PAGE_SIZE - 1;
+
+            /* Create new BTLB entry */
+            btlb->itree.start = virt_page << TARGET_PAGE_BITS;
+            btlb->itree.last = btlb->itree.start + len * TARGET_PAGE_SIZE - 1;
             btlb->pa = phys_page << TARGET_PAGE_BITS;
-            set_access_bits(env, btlb, env->gr[20]);
+            set_access_bits_pa11(env, btlb, env->gr[20]);
             btlb->t = 0;
             btlb->d = 1;
         } else {
@@ -484,7 +688,7 @@ void HELPER(diag_btlb)(CPUHPPAState *env)
         slot = env->gr[22];
         qemu_log_mask(CPU_LOG_MMU, "PDC_BLOCK_TLB: PDC_BTLB_PURGE slot %d\n",
                                     slot);
-        if (slot < HPPA_BTLB_ENTRIES) {
+        if (slot < btlb_entries) {
             btlb = &env->tlb[slot];
             hppa_flush_tlb_ent(env, btlb, true);
         } else {
@@ -494,7 +698,7 @@ void HELPER(diag_btlb)(CPUHPPAState *env)
     case 3:
         /* Purge all BTLB entries */
         qemu_log_mask(CPU_LOG_MMU, "PDC_BLOCK_TLB: PDC_BTLB_PURGE_ALL\n");
-        for (slot = 0; slot < HPPA_BTLB_ENTRIES; slot++) {
+        for (slot = 0; slot < btlb_entries; slot++) {
             btlb = &env->tlb[slot];
             hppa_flush_tlb_ent(env, btlb, true);
         }
diff --git a/target/hppa/op_helper.c b/target/hppa/op_helper.c
index 837e2b3117..a0e31c0c25 100644
--- a/target/hppa/op_helper.c
+++ b/target/hppa/op_helper.c
@@ -42,25 +42,25 @@ G_NORETURN void hppa_dynamic_excp(CPUHPPAState *env, int excp, uintptr_t ra)
     cpu_loop_exit_restore(cs, ra);
 }
 
-void HELPER(tsv)(CPUHPPAState *env, target_ureg cond)
+void HELPER(tsv)(CPUHPPAState *env, target_ulong cond)
 {
-    if (unlikely((target_sreg)cond < 0)) {
+    if (unlikely((target_long)cond < 0)) {
         hppa_dynamic_excp(env, EXCP_OVERFLOW, GETPC());
     }
 }
 
-void HELPER(tcond)(CPUHPPAState *env, target_ureg cond)
+void HELPER(tcond)(CPUHPPAState *env, target_ulong cond)
 {
     if (unlikely(cond)) {
         hppa_dynamic_excp(env, EXCP_COND, GETPC());
     }
 }
 
-static void atomic_store_3(CPUHPPAState *env, target_ulong addr,
-                           uint32_t val, uintptr_t ra)
+static void atomic_store_mask32(CPUHPPAState *env, target_ulong addr,
+                                uint32_t val, uint32_t mask, uintptr_t ra)
 {
     int mmu_idx = cpu_mmu_index(env, 0);
-    uint32_t old, new, cmp, mask, *haddr;
+    uint32_t old, new, cmp, *haddr;
     void *vaddr;
 
     vaddr = probe_access(env, addr, 3, MMU_DATA_STORE, mmu_idx, ra);
@@ -81,7 +81,36 @@ static void atomic_store_3(CPUHPPAState *env, target_ulong addr,
     }
 }
 
-static void do_stby_b(CPUHPPAState *env, target_ulong addr, target_ureg val,
+static void atomic_store_mask64(CPUHPPAState *env, target_ulong addr,
+                                uint64_t val, uint64_t mask,
+                                int size, uintptr_t ra)
+{
+#ifdef CONFIG_ATOMIC64
+    int mmu_idx = cpu_mmu_index(env, 0);
+    uint64_t old, new, cmp, *haddr;
+    void *vaddr;
+
+    vaddr = probe_access(env, addr, size, MMU_DATA_STORE, mmu_idx, ra);
+    if (vaddr == NULL) {
+        cpu_loop_exit_atomic(env_cpu(env), ra);
+    }
+    haddr = (uint64_t *)((uintptr_t)vaddr & -8);
+
+    old = *haddr;
+    while (1) {
+        new = be32_to_cpu((cpu_to_be32(old) & ~mask) | (val & mask));
+        cmp = qatomic_cmpxchg__nocheck(haddr, old, new);
+        if (cmp == old) {
+            return;
+        }
+        old = cmp;
+    }
+#else
+    cpu_loop_exit_atomic(env_cpu(env), ra);
+#endif
+}
+
+static void do_stby_b(CPUHPPAState *env, target_ulong addr, target_ulong val,
                       bool parallel, uintptr_t ra)
 {
     switch (addr & 3) {
@@ -94,7 +123,7 @@ static void do_stby_b(CPUHPPAState *env, target_ulong addr, target_ureg val,
     case 1:
         /* The 3 byte store must appear atomic.  */
         if (parallel) {
-            atomic_store_3(env, addr, val, ra);
+            atomic_store_mask32(env, addr, val, 0x00ffffffu, ra);
         } else {
             cpu_stb_data_ra(env, addr, val >> 16, ra);
             cpu_stw_data_ra(env, addr + 1, val, ra);
@@ -106,25 +135,153 @@ static void do_stby_b(CPUHPPAState *env, target_ulong addr, target_ureg val,
     }
 }
 
-void HELPER(stby_b)(CPUHPPAState *env, target_ulong addr, target_ureg val)
+static void do_stdby_b(CPUHPPAState *env, target_ulong addr, uint64_t val,
+                       bool parallel, uintptr_t ra)
+{
+    switch (addr & 7) {
+    case 7:
+        cpu_stb_data_ra(env, addr, val, ra);
+        break;
+    case 6:
+        cpu_stw_data_ra(env, addr, val, ra);
+        break;
+    case 5:
+        /* The 3 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask32(env, addr, val, 0x00ffffffu, ra);
+        } else {
+            cpu_stb_data_ra(env, addr, val >> 16, ra);
+            cpu_stw_data_ra(env, addr + 1, val, ra);
+        }
+        break;
+    case 4:
+        cpu_stl_data_ra(env, addr, val, ra);
+        break;
+    case 3:
+        /* The 5 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask64(env, addr, val, 0x000000ffffffffffull, 5, ra);
+        } else {
+            cpu_stb_data_ra(env, addr, val >> 32, ra);
+            cpu_stl_data_ra(env, addr + 1, val, ra);
+        }
+        break;
+    case 2:
+        /* The 6 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask64(env, addr, val, 0x0000ffffffffffffull, 6, ra);
+        } else {
+            cpu_stw_data_ra(env, addr, val >> 32, ra);
+            cpu_stl_data_ra(env, addr + 2, val, ra);
+        }
+        break;
+    case 1:
+        /* The 7 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask64(env, addr, val, 0x00ffffffffffffffull, 7, ra);
+        } else {
+            cpu_stb_data_ra(env, addr, val >> 48, ra);
+            cpu_stw_data_ra(env, addr + 1, val >> 32, ra);
+            cpu_stl_data_ra(env, addr + 3, val, ra);
+        }
+        break;
+    default:
+        cpu_stq_data_ra(env, addr, val, ra);
+        break;
+    }
+}
+
+void HELPER(stby_b)(CPUHPPAState *env, target_ulong addr, target_ulong val)
 {
     do_stby_b(env, addr, val, false, GETPC());
 }
 
 void HELPER(stby_b_parallel)(CPUHPPAState *env, target_ulong addr,
-                             target_ureg val)
+                             target_ulong val)
 {
     do_stby_b(env, addr, val, true, GETPC());
 }
 
-static void do_stby_e(CPUHPPAState *env, target_ulong addr, target_ureg val,
+void HELPER(stdby_b)(CPUHPPAState *env, target_ulong addr, target_ulong val)
+{
+    do_stdby_b(env, addr, val, false, GETPC());
+}
+
+void HELPER(stdby_b_parallel)(CPUHPPAState *env, target_ulong addr,
+                              target_ulong val)
+{
+    do_stdby_b(env, addr, val, true, GETPC());
+}
+
+static void do_stby_e(CPUHPPAState *env, target_ulong addr, target_ulong val,
                       bool parallel, uintptr_t ra)
 {
     switch (addr & 3) {
     case 3:
         /* The 3 byte store must appear atomic.  */
         if (parallel) {
-            atomic_store_3(env, addr - 3, val, ra);
+            atomic_store_mask32(env, addr - 3, val, 0xffffff00u, ra);
+        } else {
+            cpu_stw_data_ra(env, addr - 3, val >> 16, ra);
+            cpu_stb_data_ra(env, addr - 1, val >> 8, ra);
+        }
+        break;
+    case 2:
+        cpu_stw_data_ra(env, addr - 2, val >> 16, ra);
+        break;
+    case 1:
+        cpu_stb_data_ra(env, addr - 1, val >> 24, ra);
+        break;
+    default:
+        /* Nothing is stored, but protection is checked and the
+           cacheline is marked dirty.  */
+        probe_write(env, addr, 0, cpu_mmu_index(env, 0), ra);
+        break;
+    }
+}
+
+static void do_stdby_e(CPUHPPAState *env, target_ulong addr, uint64_t val,
+                       bool parallel, uintptr_t ra)
+{
+    switch (addr & 7) {
+    case 7:
+        /* The 7 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask64(env, addr - 7, val,
+                                0xffffffffffffff00ull, 7, ra);
+        } else {
+            cpu_stl_data_ra(env, addr - 7, val >> 32, ra);
+            cpu_stw_data_ra(env, addr - 3, val >> 16, ra);
+            cpu_stb_data_ra(env, addr - 1, val >> 8, ra);
+        }
+        break;
+    case 6:
+        /* The 6 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask64(env, addr - 6, val,
+                                0xffffffffffff0000ull, 6, ra);
+        } else {
+            cpu_stl_data_ra(env, addr - 6, val >> 32, ra);
+            cpu_stw_data_ra(env, addr - 2, val >> 16, ra);
+        }
+        break;
+    case 5:
+        /* The 5 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask64(env, addr - 5, val,
+                                0xffffffffff000000ull, 5, ra);
+        } else {
+            cpu_stl_data_ra(env, addr - 5, val >> 32, ra);
+            cpu_stb_data_ra(env, addr - 1, val >> 24, ra);
+        }
+        break;
+    case 4:
+        cpu_stl_data_ra(env, addr - 4, val >> 32, ra);
+        break;
+    case 3:
+        /* The 3 byte store must appear atomic.  */
+        if (parallel) {
+            atomic_store_mask32(env, addr - 3, val, 0xffffff00u, ra);
         } else {
             cpu_stw_data_ra(env, addr - 3, val >> 16, ra);
             cpu_stb_data_ra(env, addr - 1, val >> 8, ra);
@@ -144,17 +301,28 @@ static void do_stby_e(CPUHPPAState *env, target_ulong addr, target_ureg val,
     }
 }
 
-void HELPER(stby_e)(CPUHPPAState *env, target_ulong addr, target_ureg val)
+void HELPER(stby_e)(CPUHPPAState *env, target_ulong addr, target_ulong val)
 {
     do_stby_e(env, addr, val, false, GETPC());
 }
 
 void HELPER(stby_e_parallel)(CPUHPPAState *env, target_ulong addr,
-                             target_ureg val)
+                             target_ulong val)
 {
     do_stby_e(env, addr, val, true, GETPC());
 }
 
+void HELPER(stdby_e)(CPUHPPAState *env, target_ulong addr, target_ulong val)
+{
+    do_stdby_e(env, addr, val, false, GETPC());
+}
+
+void HELPER(stdby_e_parallel)(CPUHPPAState *env, target_ulong addr,
+                              target_ulong val)
+{
+    do_stdby_e(env, addr, val, true, GETPC());
+}
+
 void HELPER(ldc_check)(target_ulong addr)
 {
     if (unlikely(addr & 0xf)) {
@@ -164,7 +332,7 @@ void HELPER(ldc_check)(target_ulong addr)
     }
 }
 
-target_ureg HELPER(probe)(CPUHPPAState *env, target_ulong addr,
+target_ulong HELPER(probe)(CPUHPPAState *env, target_ulong addr,
                           uint32_t level, uint32_t want)
 {
 #ifdef CONFIG_USER_ONLY
@@ -196,7 +364,7 @@ target_ureg HELPER(probe)(CPUHPPAState *env, target_ulong addr,
 #endif
 }
 
-target_ureg HELPER(read_interval_timer)(void)
+target_ulong HELPER(read_interval_timer)(void)
 {
 #ifdef CONFIG_USER_ONLY
     /* In user-mode, QEMU_CLOCK_VIRTUAL doesn't exist.
@@ -209,3 +377,113 @@ target_ureg HELPER(read_interval_timer)(void)
     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) >> 2;
 #endif
 }
+
+uint64_t HELPER(hadd_ss)(uint64_t r1, uint64_t r2)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = sextract64(r1, i, 16);
+        int f2 = sextract64(r2, i, 16);
+        int fr = f1 + f2;
+
+        fr = MIN(fr, INT16_MAX);
+        fr = MAX(fr, INT16_MIN);
+        ret = deposit64(ret, i, 16, fr);
+    }
+    return ret;
+}
+
+uint64_t HELPER(hadd_us)(uint64_t r1, uint64_t r2)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = extract64(r1, i, 16);
+        int f2 = sextract64(r2, i, 16);
+        int fr = f1 + f2;
+
+        fr = MIN(fr, UINT16_MAX);
+        fr = MAX(fr, 0);
+        ret = deposit64(ret, i, 16, fr);
+    }
+    return ret;
+}
+
+uint64_t HELPER(havg)(uint64_t r1, uint64_t r2)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = extract64(r1, i, 16);
+        int f2 = extract64(r2, i, 16);
+        int fr = f1 + f2;
+
+        ret = deposit64(ret, i, 16, (fr >> 1) | (fr & 1));
+    }
+    return ret;
+}
+
+uint64_t HELPER(hsub_ss)(uint64_t r1, uint64_t r2)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = sextract64(r1, i, 16);
+        int f2 = sextract64(r2, i, 16);
+        int fr = f1 - f2;
+
+        fr = MIN(fr, INT16_MAX);
+        fr = MAX(fr, INT16_MIN);
+        ret = deposit64(ret, i, 16, fr);
+    }
+    return ret;
+}
+
+uint64_t HELPER(hsub_us)(uint64_t r1, uint64_t r2)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = extract64(r1, i, 16);
+        int f2 = sextract64(r2, i, 16);
+        int fr = f1 - f2;
+
+        fr = MIN(fr, UINT16_MAX);
+        fr = MAX(fr, 0);
+        ret = deposit64(ret, i, 16, fr);
+    }
+    return ret;
+}
+
+uint64_t HELPER(hshladd)(uint64_t r1, uint64_t r2, uint32_t sh)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = sextract64(r1, i, 16);
+        int f2 = sextract64(r2, i, 16);
+        int fr = (f1 << sh) + f2;
+
+        fr = MIN(fr, INT16_MAX);
+        fr = MAX(fr, INT16_MIN);
+        ret = deposit64(ret, i, 16, fr);
+    }
+    return ret;
+}
+
+uint64_t HELPER(hshradd)(uint64_t r1, uint64_t r2, uint32_t sh)
+{
+    uint64_t ret = 0;
+
+    for (int i = 0; i < 64; i += 16) {
+        int f1 = sextract64(r1, i, 16);
+        int f2 = sextract64(r2, i, 16);
+        int fr = (f1 >> sh) + f2;
+
+        fr = MIN(fr, INT16_MAX);
+        fr = MAX(fr, INT16_MIN);
+        ret = deposit64(ret, i, 16, fr);
+    }
+    return ret;
+}
diff --git a/target/hppa/sys_helper.c b/target/hppa/sys_helper.c
index 4bb4cf611c..a59245eed3 100644
--- a/target/hppa/sys_helper.c
+++ b/target/hppa/sys_helper.c
@@ -24,7 +24,7 @@
 #include "qemu/timer.h"
 #include "sysemu/runstate.h"
 
-void HELPER(write_interval_timer)(CPUHPPAState *env, target_ureg val)
+void HELPER(write_interval_timer)(CPUHPPAState *env, target_ulong val)
 {
     HPPACPU *cpu = env_archcpu(env);
     uint64_t current = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -58,7 +58,7 @@ void HELPER(reset)(CPUHPPAState *env)
     helper_excp(env, EXCP_HLT);
 }
 
-target_ureg HELPER(swap_system_mask)(CPUHPPAState *env, target_ureg nsm)
+target_ulong HELPER(swap_system_mask)(CPUHPPAState *env, target_ulong nsm)
 {
     target_ulong psw = env->psw;
     /*
@@ -80,6 +80,16 @@ void HELPER(rfi)(CPUHPPAState *env)
     env->iasq_b = (uint64_t)env->cr_back[0] << 32;
     env->iaoq_f = env->cr[CR_IIAOQ];
     env->iaoq_b = env->cr_back[1];
+
+    /*
+     * For pa2.0, IIASQ is the top bits of the virtual address.
+     * To recreate the space identifier, remove the offset bits.
+     */
+    if (hppa_is_pa20(env)) {
+        env->iasq_f &= ~env->iaoq_f;
+        env->iasq_b &= ~env->iaoq_b;
+    }
+
     cpu_hppa_put_psw(env, env->cr[CR_IPSW]);
 }
 
diff --git a/target/hppa/trace-events b/target/hppa/trace-events
index 8931517890..a10ba73d5d 100644
--- a/target/hppa/trace-events
+++ b/target/hppa/trace-events
@@ -10,6 +10,7 @@ disable hppa_tlb_fill_success(void *env, uint64_t addr, uint64_t phys, int size,
 disable hppa_tlb_itlba(void *env, void *ent, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p va_b=0x%lx va_e=0x%lx pa=0x%lx"
 disable hppa_tlb_itlbp(void *env, void *ent, int access_id, int u, int pl2, int pl1, int type, int b, int d, int t) "env=%p ent=%p access_id=%x u=%d pl2=%d pl1=%d type=%d b=%d d=%d t=%d"
 disable hppa_tlb_ptlb(void *env) "env=%p"
+disable hppa_tlb_ptlb_local(void *env) "env=%p"
 disable hppa_tlb_ptlbe(void *env) "env=%p"
 disable hppa_tlb_lpa_success(void *env, uint64_t addr, uint64_t phys) "env=%p addr=0x%lx phys=0x%lx"
 disable hppa_tlb_lpa_failed(void *env, uint64_t addr) "env=%p addr=0x%lx"
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 9f3ba9f42f..bcce65d587 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -23,6 +23,7 @@
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
@@ -32,240 +33,35 @@
 #include "exec/helper-info.c.inc"
 #undef  HELPER_H
 
-
-/* Since we have a distinction between register size and address size,
-   we need to redefine all of these.  */
-
-#undef TCGv
+/* Choose to use explicit sizes within this file. */
 #undef tcg_temp_new
-#undef tcg_global_mem_new
-
-#if TARGET_LONG_BITS == 64
-#define TCGv_tl              TCGv_i64
-#define tcg_temp_new_tl      tcg_temp_new_i64
-#if TARGET_REGISTER_BITS == 64
-#define tcg_gen_extu_reg_tl  tcg_gen_mov_i64
-#else
-#define tcg_gen_extu_reg_tl  tcg_gen_extu_i32_i64
-#endif
-#else
-#define TCGv_tl              TCGv_i32
-#define tcg_temp_new_tl      tcg_temp_new_i32
-#define tcg_gen_extu_reg_tl  tcg_gen_mov_i32
-#endif
-
-#if TARGET_REGISTER_BITS == 64
-#define TCGv_reg             TCGv_i64
-
-#define tcg_temp_new         tcg_temp_new_i64
-#define tcg_global_mem_new   tcg_global_mem_new_i64
-
-#define tcg_gen_movi_reg     tcg_gen_movi_i64
-#define tcg_gen_mov_reg      tcg_gen_mov_i64
-#define tcg_gen_ld8u_reg     tcg_gen_ld8u_i64
-#define tcg_gen_ld8s_reg     tcg_gen_ld8s_i64
-#define tcg_gen_ld16u_reg    tcg_gen_ld16u_i64
-#define tcg_gen_ld16s_reg    tcg_gen_ld16s_i64
-#define tcg_gen_ld32u_reg    tcg_gen_ld32u_i64
-#define tcg_gen_ld32s_reg    tcg_gen_ld32s_i64
-#define tcg_gen_ld_reg       tcg_gen_ld_i64
-#define tcg_gen_st8_reg      tcg_gen_st8_i64
-#define tcg_gen_st16_reg     tcg_gen_st16_i64
-#define tcg_gen_st32_reg     tcg_gen_st32_i64
-#define tcg_gen_st_reg       tcg_gen_st_i64
-#define tcg_gen_add_reg      tcg_gen_add_i64
-#define tcg_gen_addi_reg     tcg_gen_addi_i64
-#define tcg_gen_sub_reg      tcg_gen_sub_i64
-#define tcg_gen_neg_reg      tcg_gen_neg_i64
-#define tcg_gen_subfi_reg    tcg_gen_subfi_i64
-#define tcg_gen_subi_reg     tcg_gen_subi_i64
-#define tcg_gen_and_reg      tcg_gen_and_i64
-#define tcg_gen_andi_reg     tcg_gen_andi_i64
-#define tcg_gen_or_reg       tcg_gen_or_i64
-#define tcg_gen_ori_reg      tcg_gen_ori_i64
-#define tcg_gen_xor_reg      tcg_gen_xor_i64
-#define tcg_gen_xori_reg     tcg_gen_xori_i64
-#define tcg_gen_not_reg      tcg_gen_not_i64
-#define tcg_gen_shl_reg      tcg_gen_shl_i64
-#define tcg_gen_shli_reg     tcg_gen_shli_i64
-#define tcg_gen_shr_reg      tcg_gen_shr_i64
-#define tcg_gen_shri_reg     tcg_gen_shri_i64
-#define tcg_gen_sar_reg      tcg_gen_sar_i64
-#define tcg_gen_sari_reg     tcg_gen_sari_i64
-#define tcg_gen_brcond_reg   tcg_gen_brcond_i64
-#define tcg_gen_brcondi_reg  tcg_gen_brcondi_i64
-#define tcg_gen_setcond_reg  tcg_gen_setcond_i64
-#define tcg_gen_setcondi_reg tcg_gen_setcondi_i64
-#define tcg_gen_mul_reg      tcg_gen_mul_i64
-#define tcg_gen_muli_reg     tcg_gen_muli_i64
-#define tcg_gen_div_reg      tcg_gen_div_i64
-#define tcg_gen_rem_reg      tcg_gen_rem_i64
-#define tcg_gen_divu_reg     tcg_gen_divu_i64
-#define tcg_gen_remu_reg     tcg_gen_remu_i64
-#define tcg_gen_discard_reg  tcg_gen_discard_i64
-#define tcg_gen_trunc_reg_i32 tcg_gen_extrl_i64_i32
-#define tcg_gen_trunc_i64_reg tcg_gen_mov_i64
-#define tcg_gen_extu_i32_reg tcg_gen_extu_i32_i64
-#define tcg_gen_ext_i32_reg  tcg_gen_ext_i32_i64
-#define tcg_gen_extu_reg_i64 tcg_gen_mov_i64
-#define tcg_gen_ext_reg_i64  tcg_gen_mov_i64
-#define tcg_gen_ext8u_reg    tcg_gen_ext8u_i64
-#define tcg_gen_ext8s_reg    tcg_gen_ext8s_i64
-#define tcg_gen_ext16u_reg   tcg_gen_ext16u_i64
-#define tcg_gen_ext16s_reg   tcg_gen_ext16s_i64
-#define tcg_gen_ext32u_reg   tcg_gen_ext32u_i64
-#define tcg_gen_ext32s_reg   tcg_gen_ext32s_i64
-#define tcg_gen_bswap16_reg  tcg_gen_bswap16_i64
-#define tcg_gen_bswap32_reg  tcg_gen_bswap32_i64
-#define tcg_gen_bswap64_reg  tcg_gen_bswap64_i64
-#define tcg_gen_concat_reg_i64 tcg_gen_concat32_i64
-#define tcg_gen_andc_reg     tcg_gen_andc_i64
-#define tcg_gen_eqv_reg      tcg_gen_eqv_i64
-#define tcg_gen_nand_reg     tcg_gen_nand_i64
-#define tcg_gen_nor_reg      tcg_gen_nor_i64
-#define tcg_gen_orc_reg      tcg_gen_orc_i64
-#define tcg_gen_clz_reg      tcg_gen_clz_i64
-#define tcg_gen_ctz_reg      tcg_gen_ctz_i64
-#define tcg_gen_clzi_reg     tcg_gen_clzi_i64
-#define tcg_gen_ctzi_reg     tcg_gen_ctzi_i64
-#define tcg_gen_clrsb_reg    tcg_gen_clrsb_i64
-#define tcg_gen_ctpop_reg    tcg_gen_ctpop_i64
-#define tcg_gen_rotl_reg     tcg_gen_rotl_i64
-#define tcg_gen_rotli_reg    tcg_gen_rotli_i64
-#define tcg_gen_rotr_reg     tcg_gen_rotr_i64
-#define tcg_gen_rotri_reg    tcg_gen_rotri_i64
-#define tcg_gen_deposit_reg  tcg_gen_deposit_i64
-#define tcg_gen_deposit_z_reg tcg_gen_deposit_z_i64
-#define tcg_gen_extract_reg  tcg_gen_extract_i64
-#define tcg_gen_sextract_reg tcg_gen_sextract_i64
-#define tcg_gen_extract2_reg tcg_gen_extract2_i64
-#define tcg_constant_reg     tcg_constant_i64
-#define tcg_gen_movcond_reg  tcg_gen_movcond_i64
-#define tcg_gen_add2_reg     tcg_gen_add2_i64
-#define tcg_gen_sub2_reg     tcg_gen_sub2_i64
-#define tcg_gen_qemu_ld_reg  tcg_gen_qemu_ld_i64
-#define tcg_gen_qemu_st_reg  tcg_gen_qemu_st_i64
-#define tcg_gen_atomic_xchg_reg tcg_gen_atomic_xchg_i64
-#define tcg_gen_trunc_reg_ptr   tcg_gen_trunc_i64_ptr
-#else
-#define TCGv_reg             TCGv_i32
-#define tcg_temp_new         tcg_temp_new_i32
-#define tcg_global_mem_new   tcg_global_mem_new_i32
-
-#define tcg_gen_movi_reg     tcg_gen_movi_i32
-#define tcg_gen_mov_reg      tcg_gen_mov_i32
-#define tcg_gen_ld8u_reg     tcg_gen_ld8u_i32
-#define tcg_gen_ld8s_reg     tcg_gen_ld8s_i32
-#define tcg_gen_ld16u_reg    tcg_gen_ld16u_i32
-#define tcg_gen_ld16s_reg    tcg_gen_ld16s_i32
-#define tcg_gen_ld32u_reg    tcg_gen_ld_i32
-#define tcg_gen_ld32s_reg    tcg_gen_ld_i32
-#define tcg_gen_ld_reg       tcg_gen_ld_i32
-#define tcg_gen_st8_reg      tcg_gen_st8_i32
-#define tcg_gen_st16_reg     tcg_gen_st16_i32
-#define tcg_gen_st32_reg     tcg_gen_st32_i32
-#define tcg_gen_st_reg       tcg_gen_st_i32
-#define tcg_gen_add_reg      tcg_gen_add_i32
-#define tcg_gen_addi_reg     tcg_gen_addi_i32
-#define tcg_gen_sub_reg      tcg_gen_sub_i32
-#define tcg_gen_neg_reg      tcg_gen_neg_i32
-#define tcg_gen_subfi_reg    tcg_gen_subfi_i32
-#define tcg_gen_subi_reg     tcg_gen_subi_i32
-#define tcg_gen_and_reg      tcg_gen_and_i32
-#define tcg_gen_andi_reg     tcg_gen_andi_i32
-#define tcg_gen_or_reg       tcg_gen_or_i32
-#define tcg_gen_ori_reg      tcg_gen_ori_i32
-#define tcg_gen_xor_reg      tcg_gen_xor_i32
-#define tcg_gen_xori_reg     tcg_gen_xori_i32
-#define tcg_gen_not_reg      tcg_gen_not_i32
-#define tcg_gen_shl_reg      tcg_gen_shl_i32
-#define tcg_gen_shli_reg     tcg_gen_shli_i32
-#define tcg_gen_shr_reg      tcg_gen_shr_i32
-#define tcg_gen_shri_reg     tcg_gen_shri_i32
-#define tcg_gen_sar_reg      tcg_gen_sar_i32
-#define tcg_gen_sari_reg     tcg_gen_sari_i32
-#define tcg_gen_brcond_reg   tcg_gen_brcond_i32
-#define tcg_gen_brcondi_reg  tcg_gen_brcondi_i32
-#define tcg_gen_setcond_reg  tcg_gen_setcond_i32
-#define tcg_gen_setcondi_reg tcg_gen_setcondi_i32
-#define tcg_gen_mul_reg      tcg_gen_mul_i32
-#define tcg_gen_muli_reg     tcg_gen_muli_i32
-#define tcg_gen_div_reg      tcg_gen_div_i32
-#define tcg_gen_rem_reg      tcg_gen_rem_i32
-#define tcg_gen_divu_reg     tcg_gen_divu_i32
-#define tcg_gen_remu_reg     tcg_gen_remu_i32
-#define tcg_gen_discard_reg  tcg_gen_discard_i32
-#define tcg_gen_trunc_reg_i32 tcg_gen_mov_i32
-#define tcg_gen_trunc_i64_reg tcg_gen_extrl_i64_i32
-#define tcg_gen_extu_i32_reg tcg_gen_mov_i32
-#define tcg_gen_ext_i32_reg  tcg_gen_mov_i32
-#define tcg_gen_extu_reg_i64 tcg_gen_extu_i32_i64
-#define tcg_gen_ext_reg_i64  tcg_gen_ext_i32_i64
-#define tcg_gen_ext8u_reg    tcg_gen_ext8u_i32
-#define tcg_gen_ext8s_reg    tcg_gen_ext8s_i32
-#define tcg_gen_ext16u_reg   tcg_gen_ext16u_i32
-#define tcg_gen_ext16s_reg   tcg_gen_ext16s_i32
-#define tcg_gen_ext32u_reg   tcg_gen_mov_i32
-#define tcg_gen_ext32s_reg   tcg_gen_mov_i32
-#define tcg_gen_bswap16_reg  tcg_gen_bswap16_i32
-#define tcg_gen_bswap32_reg  tcg_gen_bswap32_i32
-#define tcg_gen_concat_reg_i64 tcg_gen_concat_i32_i64
-#define tcg_gen_andc_reg     tcg_gen_andc_i32
-#define tcg_gen_eqv_reg      tcg_gen_eqv_i32
-#define tcg_gen_nand_reg     tcg_gen_nand_i32
-#define tcg_gen_nor_reg      tcg_gen_nor_i32
-#define tcg_gen_orc_reg      tcg_gen_orc_i32
-#define tcg_gen_clz_reg      tcg_gen_clz_i32
-#define tcg_gen_ctz_reg      tcg_gen_ctz_i32
-#define tcg_gen_clzi_reg     tcg_gen_clzi_i32
-#define tcg_gen_ctzi_reg     tcg_gen_ctzi_i32
-#define tcg_gen_clrsb_reg    tcg_gen_clrsb_i32
-#define tcg_gen_ctpop_reg    tcg_gen_ctpop_i32
-#define tcg_gen_rotl_reg     tcg_gen_rotl_i32
-#define tcg_gen_rotli_reg    tcg_gen_rotli_i32
-#define tcg_gen_rotr_reg     tcg_gen_rotr_i32
-#define tcg_gen_rotri_reg    tcg_gen_rotri_i32
-#define tcg_gen_deposit_reg  tcg_gen_deposit_i32
-#define tcg_gen_deposit_z_reg tcg_gen_deposit_z_i32
-#define tcg_gen_extract_reg  tcg_gen_extract_i32
-#define tcg_gen_sextract_reg tcg_gen_sextract_i32
-#define tcg_gen_extract2_reg tcg_gen_extract2_i32
-#define tcg_constant_reg     tcg_constant_i32
-#define tcg_gen_movcond_reg  tcg_gen_movcond_i32
-#define tcg_gen_add2_reg     tcg_gen_add2_i32
-#define tcg_gen_sub2_reg     tcg_gen_sub2_i32
-#define tcg_gen_qemu_ld_reg  tcg_gen_qemu_ld_i32
-#define tcg_gen_qemu_st_reg  tcg_gen_qemu_st_i32
-#define tcg_gen_atomic_xchg_reg tcg_gen_atomic_xchg_i32
-#define tcg_gen_trunc_reg_ptr   tcg_gen_ext_i32_ptr
-#endif /* TARGET_REGISTER_BITS */
 
 typedef struct DisasCond {
     TCGCond c;
-    TCGv_reg a0, a1;
+    TCGv_i64 a0, a1;
 } DisasCond;
 
 typedef struct DisasContext {
     DisasContextBase base;
     CPUState *cs;
+    TCGOp *insn_start;
 
-    target_ureg iaoq_f;
-    target_ureg iaoq_b;
-    target_ureg iaoq_n;
-    TCGv_reg iaoq_n_var;
-
-    int ntempr, ntempl;
-    TCGv_reg tempr[8];
-    TCGv_tl  templ[4];
+    uint64_t iaoq_f;
+    uint64_t iaoq_b;
+    uint64_t iaoq_n;
+    TCGv_i64 iaoq_n_var;
 
     DisasCond null_cond;
     TCGLabel *null_lab;
 
+    TCGv_i64 zero;
+
     uint32_t insn;
     uint32_t tb_flags;
     int mmu_idx;
     int privilege;
     bool psw_n_nonzero;
+    bool is_pa20;
 
 #ifdef CONFIG_USER_ONLY
     MemOp unalign;
@@ -332,6 +128,23 @@ static int expand_shl11(DisasContext *ctx, int val)
     return val << 11;
 }
 
+static int assemble_6(DisasContext *ctx, int val)
+{
+    /*
+     * Officially, 32 * x + 32 - y.
+     * Here, x is already in bit 5, and y is [4:0].
+     * Since -y = ~y + 1, in 5 bits 32 - y => y ^ 31 + 1,
+     * with the overflow from bit 4 summing with x.
+     */
+    return (val ^ 31) + 1;
+}
+
+/* Translate CMPI doubleword conditions to standard. */
+static int cmpbid_c(DisasContext *ctx, int val)
+{
+    return val ? val : 4; /* 0 == "*<<" */
+}
+
 
 /* Include the auto-generated decoder.  */
 #include "decode-insns.c.inc"
@@ -350,24 +163,24 @@ static int expand_shl11(DisasContext *ctx, int val)
 #define DISAS_EXIT                  DISAS_TARGET_3
 
 /* global register indexes */
-static TCGv_reg cpu_gr[32];
+static TCGv_i64 cpu_gr[32];
 static TCGv_i64 cpu_sr[4];
 static TCGv_i64 cpu_srH;
-static TCGv_reg cpu_iaoq_f;
-static TCGv_reg cpu_iaoq_b;
+static TCGv_i64 cpu_iaoq_f;
+static TCGv_i64 cpu_iaoq_b;
 static TCGv_i64 cpu_iasq_f;
 static TCGv_i64 cpu_iasq_b;
-static TCGv_reg cpu_sar;
-static TCGv_reg cpu_psw_n;
-static TCGv_reg cpu_psw_v;
-static TCGv_reg cpu_psw_cb;
-static TCGv_reg cpu_psw_cb_msb;
+static TCGv_i64 cpu_sar;
+static TCGv_i64 cpu_psw_n;
+static TCGv_i64 cpu_psw_v;
+static TCGv_i64 cpu_psw_cb;
+static TCGv_i64 cpu_psw_cb_msb;
 
 void hppa_translate_init(void)
 {
 #define DEF_VAR(V)  { &cpu_##V, #V, offsetof(CPUHPPAState, V) }
 
-    typedef struct { TCGv_reg *var; const char *name; int ofs; } GlobalVar;
+    typedef struct { TCGv_i64 *var; const char *name; int ofs; } GlobalVar;
     static const GlobalVar vars[] = {
         { &cpu_sar, "sar", offsetof(CPUHPPAState, cr[CR_SAR]) },
         DEF_VAR(psw_n),
@@ -422,6 +235,13 @@ void hppa_translate_init(void)
                                         "iasq_b");
 }
 
+static void set_insn_breg(DisasContext *ctx, int breg)
+{
+    assert(ctx->insn_start != NULL);
+    tcg_set_insn_start_param(ctx->insn_start, 2, breg);
+    ctx->insn_start = NULL;
+}
+
 static DisasCond cond_make_f(void)
 {
     return (DisasCond){
@@ -445,36 +265,36 @@ static DisasCond cond_make_n(void)
     return (DisasCond){
         .c = TCG_COND_NE,
         .a0 = cpu_psw_n,
-        .a1 = tcg_constant_reg(0)
+        .a1 = tcg_constant_i64(0)
     };
 }
 
-static DisasCond cond_make_0_tmp(TCGCond c, TCGv_reg a0)
+static DisasCond cond_make_tmp(TCGCond c, TCGv_i64 a0, TCGv_i64 a1)
 {
     assert (c != TCG_COND_NEVER && c != TCG_COND_ALWAYS);
-    return (DisasCond){
-        .c = c, .a0 = a0, .a1 = tcg_constant_reg(0)
-    };
+    return (DisasCond){ .c = c, .a0 = a0, .a1 = a1 };
 }
 
-static DisasCond cond_make_0(TCGCond c, TCGv_reg a0)
+static DisasCond cond_make_0_tmp(TCGCond c, TCGv_i64 a0)
 {
-    TCGv_reg tmp = tcg_temp_new();
-    tcg_gen_mov_reg(tmp, a0);
-    return cond_make_0_tmp(c, tmp);
+    return cond_make_tmp(c, a0, tcg_constant_i64(0));
 }
 
-static DisasCond cond_make(TCGCond c, TCGv_reg a0, TCGv_reg a1)
+static DisasCond cond_make_0(TCGCond c, TCGv_i64 a0)
 {
-    DisasCond r = { .c = c };
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    tcg_gen_mov_i64(tmp, a0);
+    return cond_make_0_tmp(c, tmp);
+}
 
-    assert (c != TCG_COND_NEVER && c != TCG_COND_ALWAYS);
-    r.a0 = tcg_temp_new();
-    tcg_gen_mov_reg(r.a0, a0);
-    r.a1 = tcg_temp_new();
-    tcg_gen_mov_reg(r.a1, a1);
+static DisasCond cond_make(TCGCond c, TCGv_i64 a0, TCGv_i64 a1)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
 
-    return r;
+    tcg_gen_mov_i64(t0, a0);
+    tcg_gen_mov_i64(t1, a1);
+    return cond_make_tmp(c, t0, t1);
 }
 
 static void cond_free(DisasCond *cond)
@@ -492,60 +312,35 @@ static void cond_free(DisasCond *cond)
     }
 }
 
-static TCGv_reg get_temp(DisasContext *ctx)
-{
-    unsigned i = ctx->ntempr++;
-    g_assert(i < ARRAY_SIZE(ctx->tempr));
-    return ctx->tempr[i] = tcg_temp_new();
-}
-
-#ifndef CONFIG_USER_ONLY
-static TCGv_tl get_temp_tl(DisasContext *ctx)
-{
-    unsigned i = ctx->ntempl++;
-    g_assert(i < ARRAY_SIZE(ctx->templ));
-    return ctx->templ[i] = tcg_temp_new_tl();
-}
-#endif
-
-static TCGv_reg load_const(DisasContext *ctx, target_sreg v)
-{
-    TCGv_reg t = get_temp(ctx);
-    tcg_gen_movi_reg(t, v);
-    return t;
-}
-
-static TCGv_reg load_gpr(DisasContext *ctx, unsigned reg)
+static TCGv_i64 load_gpr(DisasContext *ctx, unsigned reg)
 {
     if (reg == 0) {
-        TCGv_reg t = get_temp(ctx);
-        tcg_gen_movi_reg(t, 0);
-        return t;
+        return ctx->zero;
     } else {
         return cpu_gr[reg];
     }
 }
 
-static TCGv_reg dest_gpr(DisasContext *ctx, unsigned reg)
+static TCGv_i64 dest_gpr(DisasContext *ctx, unsigned reg)
 {
     if (reg == 0 || ctx->null_cond.c != TCG_COND_NEVER) {
-        return get_temp(ctx);
+        return tcg_temp_new_i64();
     } else {
         return cpu_gr[reg];
     }
 }
 
-static void save_or_nullify(DisasContext *ctx, TCGv_reg dest, TCGv_reg t)
+static void save_or_nullify(DisasContext *ctx, TCGv_i64 dest, TCGv_i64 t)
 {
     if (ctx->null_cond.c != TCG_COND_NEVER) {
-        tcg_gen_movcond_reg(ctx->null_cond.c, dest, ctx->null_cond.a0,
+        tcg_gen_movcond_i64(ctx->null_cond.c, dest, ctx->null_cond.a0,
                             ctx->null_cond.a1, dest, t);
     } else {
-        tcg_gen_mov_reg(dest, t);
+        tcg_gen_mov_i64(dest, t);
     }
 }
 
-static void save_gpr(DisasContext *ctx, unsigned reg, TCGv_reg t)
+static void save_gpr(DisasContext *ctx, unsigned reg, TCGv_i64 t)
 {
     if (reg != 0) {
         save_or_nullify(ctx, cpu_gr[reg], t);
@@ -653,18 +448,18 @@ static void nullify_over(DisasContext *ctx)
 
         /* If we're using PSW[N], copy it to a temp because... */
         if (ctx->null_cond.a0 == cpu_psw_n) {
-            ctx->null_cond.a0 = tcg_temp_new();
-            tcg_gen_mov_reg(ctx->null_cond.a0, cpu_psw_n);
+            ctx->null_cond.a0 = tcg_temp_new_i64();
+            tcg_gen_mov_i64(ctx->null_cond.a0, cpu_psw_n);
         }
         /* ... we clear it before branching over the implementation,
            so that (1) it's clear after nullifying this insn and
            (2) if this insn nullifies the next, PSW[N] is valid.  */
         if (ctx->psw_n_nonzero) {
             ctx->psw_n_nonzero = false;
-            tcg_gen_movi_reg(cpu_psw_n, 0);
+            tcg_gen_movi_i64(cpu_psw_n, 0);
         }
 
-        tcg_gen_brcond_reg(ctx->null_cond.c, ctx->null_cond.a0,
+        tcg_gen_brcond_i64(ctx->null_cond.c, ctx->null_cond.a0,
                            ctx->null_cond.a1, ctx->null_lab);
         cond_free(&ctx->null_cond);
     }
@@ -675,12 +470,12 @@ static void nullify_save(DisasContext *ctx)
 {
     if (ctx->null_cond.c == TCG_COND_NEVER) {
         if (ctx->psw_n_nonzero) {
-            tcg_gen_movi_reg(cpu_psw_n, 0);
+            tcg_gen_movi_i64(cpu_psw_n, 0);
         }
         return;
     }
     if (ctx->null_cond.a0 != cpu_psw_n) {
-        tcg_gen_setcond_reg(ctx->null_cond.c, cpu_psw_n,
+        tcg_gen_setcond_i64(ctx->null_cond.c, cpu_psw_n,
                             ctx->null_cond.a0, ctx->null_cond.a1);
         ctx->psw_n_nonzero = true;
     }
@@ -693,7 +488,7 @@ static void nullify_save(DisasContext *ctx)
 static void nullify_set(DisasContext *ctx, bool x)
 {
     if (ctx->psw_n_nonzero || x) {
-        tcg_gen_movi_reg(cpu_psw_n, x);
+        tcg_gen_movi_i64(cpu_psw_n, x);
     }
 }
 
@@ -736,16 +531,36 @@ static bool nullify_end(DisasContext *ctx)
     return true;
 }
 
-static void copy_iaoq_entry(TCGv_reg dest, target_ureg ival, TCGv_reg vval)
+static uint64_t gva_offset_mask(DisasContext *ctx)
+{
+    return (ctx->tb_flags & PSW_W
+            ? MAKE_64BIT_MASK(0, 62)
+            : MAKE_64BIT_MASK(0, 32));
+}
+
+static void copy_iaoq_entry(DisasContext *ctx, TCGv_i64 dest,
+                            uint64_t ival, TCGv_i64 vval)
 {
-    if (unlikely(ival == -1)) {
-        tcg_gen_mov_reg(dest, vval);
+    uint64_t mask = gva_offset_mask(ctx);
+
+    if (ival != -1) {
+        tcg_gen_movi_i64(dest, ival & mask);
+        return;
+    }
+    tcg_debug_assert(vval != NULL);
+
+    /*
+     * We know that the IAOQ is already properly masked.
+     * This optimization is primarily for "iaoq_f = iaoq_b".
+     */
+    if (vval == cpu_iaoq_f || vval == cpu_iaoq_b) {
+        tcg_gen_mov_i64(dest, vval);
     } else {
-        tcg_gen_movi_reg(dest, ival);
+        tcg_gen_andi_i64(dest, vval, mask);
     }
 }
 
-static inline target_ureg iaoq_dest(DisasContext *ctx, target_sreg disp)
+static inline uint64_t iaoq_dest(DisasContext *ctx, int64_t disp)
 {
     return ctx->iaoq_f + disp + 8;
 }
@@ -757,8 +572,8 @@ static void gen_excp_1(int exception)
 
 static void gen_excp(DisasContext *ctx, int exception)
 {
-    copy_iaoq_entry(cpu_iaoq_f, ctx->iaoq_f, cpu_iaoq_f);
-    copy_iaoq_entry(cpu_iaoq_b, ctx->iaoq_b, cpu_iaoq_b);
+    copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_f, cpu_iaoq_f);
+    copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_b, cpu_iaoq_b);
     nullify_save(ctx);
     gen_excp_1(exception);
     ctx->base.is_jmp = DISAS_NORETURN;
@@ -767,7 +582,7 @@ static void gen_excp(DisasContext *ctx, int exception)
 static bool gen_excp_iir(DisasContext *ctx, int exc)
 {
     nullify_over(ctx);
-    tcg_gen_st_reg(tcg_constant_reg(ctx->insn),
+    tcg_gen_st_i64(tcg_constant_i64(ctx->insn),
                    tcg_env, offsetof(CPUHPPAState, cr[CR_IIR]));
     gen_excp(ctx, exc);
     return nullify_end(ctx);
@@ -790,7 +605,7 @@ static bool gen_illegal(DisasContext *ctx)
     } while (0)
 #endif
 
-static bool use_goto_tb(DisasContext *ctx, target_ureg dest)
+static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
 {
     return translator_use_goto_tb(&ctx->base, dest);
 }
@@ -806,16 +621,16 @@ static bool use_nullify_skip(DisasContext *ctx)
 }
 
 static void gen_goto_tb(DisasContext *ctx, int which,
-                        target_ureg f, target_ureg b)
+                        uint64_t f, uint64_t b)
 {
     if (f != -1 && b != -1 && use_goto_tb(ctx, f)) {
         tcg_gen_goto_tb(which);
-        tcg_gen_movi_reg(cpu_iaoq_f, f);
-        tcg_gen_movi_reg(cpu_iaoq_b, b);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, f, NULL);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, b, NULL);
         tcg_gen_exit_tb(ctx->base.tb, which);
     } else {
-        copy_iaoq_entry(cpu_iaoq_f, f, cpu_iaoq_b);
-        copy_iaoq_entry(cpu_iaoq_b, b, ctx->iaoq_n_var);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, f, cpu_iaoq_b);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, b, ctx->iaoq_n_var);
         tcg_gen_lookup_and_goto_ptr();
     }
 }
@@ -830,27 +645,41 @@ static bool cond_need_cb(int c)
     return c == 4 || c == 5;
 }
 
+/* Need extensions from TCGv_i32 to TCGv_i64. */
+static bool cond_need_ext(DisasContext *ctx, bool d)
+{
+    return !(ctx->is_pa20 && d);
+}
+
 /*
  * Compute conditional for arithmetic.  See Page 5-3, Table 5-1, of
  * the Parisc 1.1 Architecture Reference Manual for details.
  */
 
-static DisasCond do_cond(unsigned cf, TCGv_reg res,
-                         TCGv_reg cb_msb, TCGv_reg sv)
+static DisasCond do_cond(DisasContext *ctx, unsigned cf, bool d,
+                         TCGv_i64 res, TCGv_i64 cb_msb, TCGv_i64 sv)
 {
     DisasCond cond;
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
 
     switch (cf >> 1) {
     case 0: /* Never / TR    (0 / 1) */
         cond = cond_make_f();
         break;
     case 1: /* = / <>        (Z / !Z) */
+        if (cond_need_ext(ctx, d)) {
+            tmp = tcg_temp_new_i64();
+            tcg_gen_ext32u_i64(tmp, res);
+            res = tmp;
+        }
         cond = cond_make_0(TCG_COND_EQ, res);
         break;
     case 2: /* < / >=        (N ^ V / !(N ^ V) */
-        tmp = tcg_temp_new();
-        tcg_gen_xor_reg(tmp, res, sv);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_xor_i64(tmp, res, sv);
+        if (cond_need_ext(ctx, d)) {
+            tcg_gen_ext32s_i64(tmp, tmp);
+        }
         cond = cond_make_0_tmp(TCG_COND_LT, tmp);
         break;
     case 3: /* <= / >        (N ^ V) | Z / !((N ^ V) | Z) */
@@ -863,27 +692,42 @@ static DisasCond do_cond(unsigned cf, TCGv_reg res,
          *   !(~(res ^ sv) >> 31) | !res
          *   !(~(res ^ sv) >> 31 & res)
          */
-        tmp = tcg_temp_new();
-        tcg_gen_eqv_reg(tmp, res, sv);
-        tcg_gen_sari_reg(tmp, tmp, TARGET_REGISTER_BITS - 1);
-        tcg_gen_and_reg(tmp, tmp, res);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_eqv_i64(tmp, res, sv);
+        if (cond_need_ext(ctx, d)) {
+            tcg_gen_sextract_i64(tmp, tmp, 31, 1);
+            tcg_gen_and_i64(tmp, tmp, res);
+            tcg_gen_ext32u_i64(tmp, tmp);
+        } else {
+            tcg_gen_sari_i64(tmp, tmp, 63);
+            tcg_gen_and_i64(tmp, tmp, res);
+        }
         cond = cond_make_0_tmp(TCG_COND_EQ, tmp);
         break;
     case 4: /* NUV / UV      (!C / C) */
+        /* Only bit 0 of cb_msb is ever set. */
         cond = cond_make_0(TCG_COND_EQ, cb_msb);
         break;
     case 5: /* ZNV / VNZ     (!C | Z / C & !Z) */
-        tmp = tcg_temp_new();
-        tcg_gen_neg_reg(tmp, cb_msb);
-        tcg_gen_and_reg(tmp, tmp, res);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_neg_i64(tmp, cb_msb);
+        tcg_gen_and_i64(tmp, tmp, res);
+        if (cond_need_ext(ctx, d)) {
+            tcg_gen_ext32u_i64(tmp, tmp);
+        }
         cond = cond_make_0_tmp(TCG_COND_EQ, tmp);
         break;
     case 6: /* SV / NSV      (V / !V) */
+        if (cond_need_ext(ctx, d)) {
+            tmp = tcg_temp_new_i64();
+            tcg_gen_ext32s_i64(tmp, sv);
+            sv = tmp;
+        }
         cond = cond_make_0(TCG_COND_LT, sv);
         break;
     case 7: /* OD / EV */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_reg(tmp, res, 1);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_andi_i64(tmp, res, 1);
         cond = cond_make_0_tmp(TCG_COND_NE, tmp);
         break;
     default:
@@ -900,35 +744,55 @@ static DisasCond do_cond(unsigned cf, TCGv_reg res,
    can use the inputs directly.  This can allow other computation to be
    deleted as unused.  */
 
-static DisasCond do_sub_cond(unsigned cf, TCGv_reg res,
-                             TCGv_reg in1, TCGv_reg in2, TCGv_reg sv)
+static DisasCond do_sub_cond(DisasContext *ctx, unsigned cf, bool d,
+                             TCGv_i64 res, TCGv_i64 in1,
+                             TCGv_i64 in2, TCGv_i64 sv)
 {
-    DisasCond cond;
+    TCGCond tc;
+    bool ext_uns;
 
     switch (cf >> 1) {
     case 1: /* = / <> */
-        cond = cond_make(TCG_COND_EQ, in1, in2);
+        tc = TCG_COND_EQ;
+        ext_uns = true;
         break;
     case 2: /* < / >= */
-        cond = cond_make(TCG_COND_LT, in1, in2);
+        tc = TCG_COND_LT;
+        ext_uns = false;
         break;
     case 3: /* <= / > */
-        cond = cond_make(TCG_COND_LE, in1, in2);
+        tc = TCG_COND_LE;
+        ext_uns = false;
         break;
     case 4: /* << / >>= */
-        cond = cond_make(TCG_COND_LTU, in1, in2);
+        tc = TCG_COND_LTU;
+        ext_uns = true;
         break;
     case 5: /* <<= / >> */
-        cond = cond_make(TCG_COND_LEU, in1, in2);
+        tc = TCG_COND_LEU;
+        ext_uns = true;
         break;
     default:
-        return do_cond(cf, res, NULL, sv);
+        return do_cond(ctx, cf, d, res, NULL, sv);
     }
+
     if (cf & 1) {
-        cond.c = tcg_invert_cond(cond.c);
+        tc = tcg_invert_cond(tc);
     }
+    if (cond_need_ext(ctx, d)) {
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        TCGv_i64 t2 = tcg_temp_new_i64();
 
-    return cond;
+        if (ext_uns) {
+            tcg_gen_ext32u_i64(t1, in1);
+            tcg_gen_ext32u_i64(t2, in2);
+        } else {
+            tcg_gen_ext32s_i64(t1, in1);
+            tcg_gen_ext32s_i64(t2, in2);
+        }
+        return cond_make_tmp(tc, t1, t2);
+    }
+    return cond_make(tc, in1, in2);
 }
 
 /*
@@ -940,8 +804,12 @@ static DisasCond do_sub_cond(unsigned cf, TCGv_reg res,
  * how cases c={2,3} are treated.
  */
 
-static DisasCond do_log_cond(unsigned cf, TCGv_reg res)
+static DisasCond do_log_cond(DisasContext *ctx, unsigned cf, bool d,
+                             TCGv_i64 res)
 {
+    TCGCond tc;
+    bool ext_uns;
+
     switch (cf) {
     case 0:  /* never */
     case 9:  /* undef, C */
@@ -956,30 +824,55 @@ static DisasCond do_log_cond(unsigned cf, TCGv_reg res)
         return cond_make_t();
 
     case 2:  /* == */
-        return cond_make_0(TCG_COND_EQ, res);
+        tc = TCG_COND_EQ;
+        ext_uns = true;
+        break;
     case 3:  /* <> */
-        return cond_make_0(TCG_COND_NE, res);
+        tc = TCG_COND_NE;
+        ext_uns = true;
+        break;
     case 4:  /* < */
-        return cond_make_0(TCG_COND_LT, res);
+        tc = TCG_COND_LT;
+        ext_uns = false;
+        break;
     case 5:  /* >= */
-        return cond_make_0(TCG_COND_GE, res);
+        tc = TCG_COND_GE;
+        ext_uns = false;
+        break;
     case 6:  /* <= */
-        return cond_make_0(TCG_COND_LE, res);
+        tc = TCG_COND_LE;
+        ext_uns = false;
+        break;
     case 7:  /* > */
-        return cond_make_0(TCG_COND_GT, res);
+        tc = TCG_COND_GT;
+        ext_uns = false;
+        break;
 
     case 14: /* OD */
     case 15: /* EV */
-        return do_cond(cf, res, NULL, NULL);
+        return do_cond(ctx, cf, d, res, NULL, NULL);
 
     default:
         g_assert_not_reached();
     }
+
+    if (cond_need_ext(ctx, d)) {
+        TCGv_i64 tmp = tcg_temp_new_i64();
+
+        if (ext_uns) {
+            tcg_gen_ext32u_i64(tmp, res);
+        } else {
+            tcg_gen_ext32s_i64(tmp, res);
+        }
+        return cond_make_0_tmp(tc, tmp);
+    }
+    return cond_make_0(tc, res);
 }
 
 /* Similar, but for shift/extract/deposit conditions.  */
 
-static DisasCond do_sed_cond(unsigned orig, TCGv_reg res)
+static DisasCond do_sed_cond(DisasContext *ctx, unsigned orig, bool d,
+                             TCGv_i64 res)
 {
     unsigned c, f;
 
@@ -992,28 +885,29 @@ static DisasCond do_sed_cond(unsigned orig, TCGv_reg res)
     }
     f = (orig & 4) / 4;
 
-    return do_log_cond(c * 2 + f, res);
+    return do_log_cond(ctx, c * 2 + f, d, res);
 }
 
 /* Similar, but for unit conditions.  */
 
-static DisasCond do_unit_cond(unsigned cf, TCGv_reg res,
-                              TCGv_reg in1, TCGv_reg in2)
+static DisasCond do_unit_cond(unsigned cf, bool d, TCGv_i64 res,
+                              TCGv_i64 in1, TCGv_i64 in2)
 {
     DisasCond cond;
-    TCGv_reg tmp, cb = NULL;
+    TCGv_i64 tmp, cb = NULL;
+    uint64_t d_repl = d ? 0x0000000100000001ull : 1;
 
     if (cf & 8) {
         /* Since we want to test lots of carry-out bits all at once, do not
          * do our normal thing and compute carry-in of bit B+1 since that
          * leaves us with carry bits spread across two words.
          */
-        cb = tcg_temp_new();
-        tmp = tcg_temp_new();
-        tcg_gen_or_reg(cb, in1, in2);
-        tcg_gen_and_reg(tmp, in1, in2);
-        tcg_gen_andc_reg(cb, cb, res);
-        tcg_gen_or_reg(cb, cb, tmp);
+        cb = tcg_temp_new_i64();
+        tmp = tcg_temp_new_i64();
+        tcg_gen_or_i64(cb, in1, in2);
+        tcg_gen_and_i64(tmp, in1, in2);
+        tcg_gen_andc_i64(cb, cb, res);
+        tcg_gen_or_i64(cb, cb, tmp);
     }
 
     switch (cf >> 1) {
@@ -1027,33 +921,33 @@ static DisasCond do_unit_cond(unsigned cf, TCGv_reg res,
         /* See hasless(v,1) from
          * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
          */
-        tmp = tcg_temp_new();
-        tcg_gen_subi_reg(tmp, res, 0x01010101u);
-        tcg_gen_andc_reg(tmp, tmp, res);
-        tcg_gen_andi_reg(tmp, tmp, 0x80808080u);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_subi_i64(tmp, res, d_repl * 0x01010101u);
+        tcg_gen_andc_i64(tmp, tmp, res);
+        tcg_gen_andi_i64(tmp, tmp, d_repl * 0x80808080u);
         cond = cond_make_0(TCG_COND_NE, tmp);
         break;
 
     case 3: /* SHZ / NHZ */
-        tmp = tcg_temp_new();
-        tcg_gen_subi_reg(tmp, res, 0x00010001u);
-        tcg_gen_andc_reg(tmp, tmp, res);
-        tcg_gen_andi_reg(tmp, tmp, 0x80008000u);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_subi_i64(tmp, res, d_repl * 0x00010001u);
+        tcg_gen_andc_i64(tmp, tmp, res);
+        tcg_gen_andi_i64(tmp, tmp, d_repl * 0x80008000u);
         cond = cond_make_0(TCG_COND_NE, tmp);
         break;
 
     case 4: /* SDC / NDC */
-        tcg_gen_andi_reg(cb, cb, 0x88888888u);
+        tcg_gen_andi_i64(cb, cb, d_repl * 0x88888888u);
         cond = cond_make_0(TCG_COND_NE, cb);
         break;
 
     case 6: /* SBC / NBC */
-        tcg_gen_andi_reg(cb, cb, 0x80808080u);
+        tcg_gen_andi_i64(cb, cb, d_repl * 0x80808080u);
         cond = cond_make_0(TCG_COND_NE, cb);
         break;
 
     case 7: /* SHC / NHC */
-        tcg_gen_andi_reg(cb, cb, 0x80008000u);
+        tcg_gen_andi_i64(cb, cb, d_repl * 0x80008000u);
         cond = cond_make_0(TCG_COND_NE, cb);
         break;
 
@@ -1067,68 +961,87 @@ static DisasCond do_unit_cond(unsigned cf, TCGv_reg res,
     return cond;
 }
 
+static TCGv_i64 get_carry(DisasContext *ctx, bool d,
+                          TCGv_i64 cb, TCGv_i64 cb_msb)
+{
+    if (cond_need_ext(ctx, d)) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_extract_i64(t, cb, 32, 1);
+        return t;
+    }
+    return cb_msb;
+}
+
+static TCGv_i64 get_psw_carry(DisasContext *ctx, bool d)
+{
+    return get_carry(ctx, d, cpu_psw_cb, cpu_psw_cb_msb);
+}
+
 /* Compute signed overflow for addition.  */
-static TCGv_reg do_add_sv(DisasContext *ctx, TCGv_reg res,
-                          TCGv_reg in1, TCGv_reg in2)
+static TCGv_i64 do_add_sv(DisasContext *ctx, TCGv_i64 res,
+                          TCGv_i64 in1, TCGv_i64 in2)
 {
-    TCGv_reg sv = get_temp(ctx);
-    TCGv_reg tmp = tcg_temp_new();
+    TCGv_i64 sv = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
 
-    tcg_gen_xor_reg(sv, res, in1);
-    tcg_gen_xor_reg(tmp, in1, in2);
-    tcg_gen_andc_reg(sv, sv, tmp);
+    tcg_gen_xor_i64(sv, res, in1);
+    tcg_gen_xor_i64(tmp, in1, in2);
+    tcg_gen_andc_i64(sv, sv, tmp);
 
     return sv;
 }
 
 /* Compute signed overflow for subtraction.  */
-static TCGv_reg do_sub_sv(DisasContext *ctx, TCGv_reg res,
-                          TCGv_reg in1, TCGv_reg in2)
+static TCGv_i64 do_sub_sv(DisasContext *ctx, TCGv_i64 res,
+                          TCGv_i64 in1, TCGv_i64 in2)
 {
-    TCGv_reg sv = get_temp(ctx);
-    TCGv_reg tmp = tcg_temp_new();
+    TCGv_i64 sv = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
 
-    tcg_gen_xor_reg(sv, res, in1);
-    tcg_gen_xor_reg(tmp, in1, in2);
-    tcg_gen_and_reg(sv, sv, tmp);
+    tcg_gen_xor_i64(sv, res, in1);
+    tcg_gen_xor_i64(tmp, in1, in2);
+    tcg_gen_and_i64(sv, sv, tmp);
 
     return sv;
 }
 
-static void do_add(DisasContext *ctx, unsigned rt, TCGv_reg in1,
-                   TCGv_reg in2, unsigned shift, bool is_l,
-                   bool is_tsv, bool is_tc, bool is_c, unsigned cf)
+static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 in1,
+                   TCGv_i64 in2, unsigned shift, bool is_l,
+                   bool is_tsv, bool is_tc, bool is_c, unsigned cf, bool d)
 {
-    TCGv_reg dest, cb, cb_msb, sv, tmp;
+    TCGv_i64 dest, cb, cb_msb, cb_cond, sv, tmp;
     unsigned c = cf >> 1;
     DisasCond cond;
 
-    dest = tcg_temp_new();
+    dest = tcg_temp_new_i64();
     cb = NULL;
     cb_msb = NULL;
+    cb_cond = NULL;
 
     if (shift) {
-        tmp = get_temp(ctx);
-        tcg_gen_shli_reg(tmp, in1, shift);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_shli_i64(tmp, in1, shift);
         in1 = tmp;
     }
 
     if (!is_l || cond_need_cb(c)) {
-        TCGv_reg zero = tcg_constant_reg(0);
-        cb_msb = get_temp(ctx);
-        tcg_gen_add2_reg(dest, cb_msb, in1, zero, in2, zero);
+        cb_msb = tcg_temp_new_i64();
+        cb = tcg_temp_new_i64();
+
+        tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero);
         if (is_c) {
-            tcg_gen_add2_reg(dest, cb_msb, dest, cb_msb, cpu_psw_cb_msb, zero);
+            tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb,
+                             get_psw_carry(ctx, d), ctx->zero);
         }
-        if (!is_l) {
-            cb = get_temp(ctx);
-            tcg_gen_xor_reg(cb, in1, in2);
-            tcg_gen_xor_reg(cb, cb, dest);
+        tcg_gen_xor_i64(cb, in1, in2);
+        tcg_gen_xor_i64(cb, cb, dest);
+        if (cond_need_cb(c)) {
+            cb_cond = get_carry(ctx, d, cb, cb_msb);
         }
     } else {
-        tcg_gen_add_reg(dest, in1, in2);
+        tcg_gen_add_i64(dest, in1, in2);
         if (is_c) {
-            tcg_gen_add_reg(dest, dest, cpu_psw_cb_msb);
+            tcg_gen_add_i64(dest, dest, get_psw_carry(ctx, d));
         }
     }
 
@@ -1143,10 +1056,10 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     }
 
     /* Emit any conditional trap before any writeback.  */
-    cond = do_cond(cf, dest, cb_msb, sv);
+    cond = do_cond(ctx, cf, d, dest, cb_cond, sv);
     if (is_tc) {
-        tmp = tcg_temp_new();
-        tcg_gen_setcond_reg(cond.c, tmp, cond.a0, cond.a1);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond.c, tmp, cond.a0, cond.a1);
         gen_helper_tcond(tcg_env, tmp);
     }
 
@@ -1162,61 +1075,65 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     ctx->null_cond = cond;
 }
 
-static bool do_add_reg(DisasContext *ctx, arg_rrr_cf_sh *a,
+static bool do_add_reg(DisasContext *ctx, arg_rrr_cf_d_sh *a,
                        bool is_l, bool is_tsv, bool is_tc, bool is_c)
 {
-    TCGv_reg tcg_r1, tcg_r2;
+    TCGv_i64 tcg_r1, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
-    do_add(ctx, a->t, tcg_r1, tcg_r2, a->sh, is_l, is_tsv, is_tc, is_c, a->cf);
+    do_add(ctx, a->t, tcg_r1, tcg_r2, a->sh, is_l,
+           is_tsv, is_tc, is_c, a->cf, a->d);
     return nullify_end(ctx);
 }
 
 static bool do_add_imm(DisasContext *ctx, arg_rri_cf *a,
                        bool is_tsv, bool is_tc)
 {
-    TCGv_reg tcg_im, tcg_r2;
+    TCGv_i64 tcg_im, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
-    tcg_im = load_const(ctx, a->i);
+    tcg_im = tcg_constant_i64(a->i);
     tcg_r2 = load_gpr(ctx, a->r);
-    do_add(ctx, a->t, tcg_im, tcg_r2, 0, 0, is_tsv, is_tc, 0, a->cf);
+    /* All ADDI conditions are 32-bit. */
+    do_add(ctx, a->t, tcg_im, tcg_r2, 0, 0, is_tsv, is_tc, 0, a->cf, false);
     return nullify_end(ctx);
 }
 
-static void do_sub(DisasContext *ctx, unsigned rt, TCGv_reg in1,
-                   TCGv_reg in2, bool is_tsv, bool is_b,
-                   bool is_tc, unsigned cf)
+static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1,
+                   TCGv_i64 in2, bool is_tsv, bool is_b,
+                   bool is_tc, unsigned cf, bool d)
 {
-    TCGv_reg dest, sv, cb, cb_msb, zero, tmp;
+    TCGv_i64 dest, sv, cb, cb_msb, tmp;
     unsigned c = cf >> 1;
     DisasCond cond;
 
-    dest = tcg_temp_new();
-    cb = tcg_temp_new();
-    cb_msb = tcg_temp_new();
+    dest = tcg_temp_new_i64();
+    cb = tcg_temp_new_i64();
+    cb_msb = tcg_temp_new_i64();
 
-    zero = tcg_constant_reg(0);
     if (is_b) {
         /* DEST,C = IN1 + ~IN2 + C.  */
-        tcg_gen_not_reg(cb, in2);
-        tcg_gen_add2_reg(dest, cb_msb, in1, zero, cpu_psw_cb_msb, zero);
-        tcg_gen_add2_reg(dest, cb_msb, dest, cb_msb, cb, zero);
-        tcg_gen_xor_reg(cb, cb, in1);
-        tcg_gen_xor_reg(cb, cb, dest);
+        tcg_gen_not_i64(cb, in2);
+        tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero,
+                         get_psw_carry(ctx, d), ctx->zero);
+        tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, cb, ctx->zero);
+        tcg_gen_xor_i64(cb, cb, in1);
+        tcg_gen_xor_i64(cb, cb, dest);
     } else {
-        /* DEST,C = IN1 + ~IN2 + 1.  We can produce the same result in fewer
-           operations by seeding the high word with 1 and subtracting.  */
-        tcg_gen_movi_reg(cb_msb, 1);
-        tcg_gen_sub2_reg(dest, cb_msb, in1, cb_msb, in2, zero);
-        tcg_gen_eqv_reg(cb, in1, in2);
-        tcg_gen_xor_reg(cb, cb, dest);
+        /*
+         * DEST,C = IN1 + ~IN2 + 1.  We can produce the same result in fewer
+         * operations by seeding the high word with 1 and subtracting.
+         */
+        TCGv_i64 one = tcg_constant_i64(1);
+        tcg_gen_sub2_i64(dest, cb_msb, in1, one, in2, ctx->zero);
+        tcg_gen_eqv_i64(cb, in1, in2);
+        tcg_gen_xor_i64(cb, cb, dest);
     }
 
     /* Compute signed overflow if required.  */
@@ -1230,15 +1147,15 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_reg in1,
 
     /* Compute the condition.  We cannot use the special case for borrow.  */
     if (!is_b) {
-        cond = do_sub_cond(cf, dest, in1, in2, sv);
+        cond = do_sub_cond(ctx, cf, d, dest, in1, in2, sv);
     } else {
-        cond = do_cond(cf, dest, cb_msb, sv);
+        cond = do_cond(ctx, cf, d, dest, get_carry(ctx, d, cb, cb_msb), sv);
     }
 
     /* Emit any conditional trap before any writeback.  */
     if (is_tc) {
-        tmp = tcg_temp_new();
-        tcg_gen_setcond_reg(cond.c, tmp, cond.a0, cond.a1);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond.c, tmp, cond.a0, cond.a1);
         gen_helper_tcond(tcg_env, tmp);
     }
 
@@ -1252,41 +1169,42 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     ctx->null_cond = cond;
 }
 
-static bool do_sub_reg(DisasContext *ctx, arg_rrr_cf *a,
+static bool do_sub_reg(DisasContext *ctx, arg_rrr_cf_d *a,
                        bool is_tsv, bool is_b, bool is_tc)
 {
-    TCGv_reg tcg_r1, tcg_r2;
+    TCGv_i64 tcg_r1, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
-    do_sub(ctx, a->t, tcg_r1, tcg_r2, is_tsv, is_b, is_tc, a->cf);
+    do_sub(ctx, a->t, tcg_r1, tcg_r2, is_tsv, is_b, is_tc, a->cf, a->d);
     return nullify_end(ctx);
 }
 
 static bool do_sub_imm(DisasContext *ctx, arg_rri_cf *a, bool is_tsv)
 {
-    TCGv_reg tcg_im, tcg_r2;
+    TCGv_i64 tcg_im, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
-    tcg_im = load_const(ctx, a->i);
+    tcg_im = tcg_constant_i64(a->i);
     tcg_r2 = load_gpr(ctx, a->r);
-    do_sub(ctx, a->t, tcg_im, tcg_r2, is_tsv, 0, 0, a->cf);
+    /* All SUBI conditions are 32-bit. */
+    do_sub(ctx, a->t, tcg_im, tcg_r2, is_tsv, 0, 0, a->cf, false);
     return nullify_end(ctx);
 }
 
-static void do_cmpclr(DisasContext *ctx, unsigned rt, TCGv_reg in1,
-                      TCGv_reg in2, unsigned cf)
+static void do_cmpclr(DisasContext *ctx, unsigned rt, TCGv_i64 in1,
+                      TCGv_i64 in2, unsigned cf, bool d)
 {
-    TCGv_reg dest, sv;
+    TCGv_i64 dest, sv;
     DisasCond cond;
 
-    dest = tcg_temp_new();
-    tcg_gen_sub_reg(dest, in1, in2);
+    dest = tcg_temp_new_i64();
+    tcg_gen_sub_i64(dest, in1, in2);
 
     /* Compute signed overflow if required.  */
     sv = NULL;
@@ -1295,10 +1213,10 @@ static void do_cmpclr(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     }
 
     /* Form the condition for the compare.  */
-    cond = do_sub_cond(cf, dest, in1, in2, sv);
+    cond = do_sub_cond(ctx, cf, d, dest, in1, in2, sv);
 
     /* Clear.  */
-    tcg_gen_movi_reg(dest, 0);
+    tcg_gen_movi_i64(dest, 0);
     save_gpr(ctx, rt, dest);
 
     /* Install the new nullification.  */
@@ -1306,11 +1224,11 @@ static void do_cmpclr(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     ctx->null_cond = cond;
 }
 
-static void do_log(DisasContext *ctx, unsigned rt, TCGv_reg in1,
-                   TCGv_reg in2, unsigned cf,
-                   void (*fn)(TCGv_reg, TCGv_reg, TCGv_reg))
+static void do_log(DisasContext *ctx, unsigned rt, TCGv_i64 in1,
+                   TCGv_i64 in2, unsigned cf, bool d,
+                   void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
 {
-    TCGv_reg dest = dest_gpr(ctx, rt);
+    TCGv_i64 dest = dest_gpr(ctx, rt);
 
     /* Perform the operation, and writeback.  */
     fn(dest, in1, in2);
@@ -1319,29 +1237,29 @@ static void do_log(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (cf) {
-        ctx->null_cond = do_log_cond(cf, dest);
+        ctx->null_cond = do_log_cond(ctx, cf, d, dest);
     }
 }
 
-static bool do_log_reg(DisasContext *ctx, arg_rrr_cf *a,
-                       void (*fn)(TCGv_reg, TCGv_reg, TCGv_reg))
+static bool do_log_reg(DisasContext *ctx, arg_rrr_cf_d *a,
+                       void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
 {
-    TCGv_reg tcg_r1, tcg_r2;
+    TCGv_i64 tcg_r1, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
-    do_log(ctx, a->t, tcg_r1, tcg_r2, a->cf, fn);
+    do_log(ctx, a->t, tcg_r1, tcg_r2, a->cf, a->d, fn);
     return nullify_end(ctx);
 }
 
-static void do_unit(DisasContext *ctx, unsigned rt, TCGv_reg in1,
-                    TCGv_reg in2, unsigned cf, bool is_tc,
-                    void (*fn)(TCGv_reg, TCGv_reg, TCGv_reg))
+static void do_unit(DisasContext *ctx, unsigned rt, TCGv_i64 in1,
+                    TCGv_i64 in2, unsigned cf, bool d, bool is_tc,
+                    void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
     DisasCond cond;
 
     if (cf == 0) {
@@ -1350,14 +1268,14 @@ static void do_unit(DisasContext *ctx, unsigned rt, TCGv_reg in1,
         save_gpr(ctx, rt, dest);
         cond_free(&ctx->null_cond);
     } else {
-        dest = tcg_temp_new();
+        dest = tcg_temp_new_i64();
         fn(dest, in1, in2);
 
-        cond = do_unit_cond(cf, dest, in1, in2);
+        cond = do_unit_cond(cf, d, dest, in1, in2);
 
         if (is_tc) {
-            TCGv_reg tmp = tcg_temp_new();
-            tcg_gen_setcond_reg(cond.c, tmp, cond.a0, cond.a1);
+            TCGv_i64 tmp = tcg_temp_new_i64();
+            tcg_gen_setcond_i64(cond.c, tmp, cond.a0, cond.a1);
             gen_helper_tcond(tcg_env, tmp);
         }
         save_gpr(ctx, rt, dest);
@@ -1372,17 +1290,17 @@ static void do_unit(DisasContext *ctx, unsigned rt, TCGv_reg in1,
    from the top 2 bits of the base register.  There are a few system
    instructions that have a 3-bit space specifier, for which SR0 is
    not special.  To handle this, pass ~SP.  */
-static TCGv_i64 space_select(DisasContext *ctx, int sp, TCGv_reg base)
+static TCGv_i64 space_select(DisasContext *ctx, int sp, TCGv_i64 base)
 {
     TCGv_ptr ptr;
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
     TCGv_i64 spc;
 
     if (sp != 0) {
         if (sp < 0) {
             sp = ~sp;
         }
-        spc = get_temp_tl(ctx);
+        spc = tcg_temp_new_i64();
         load_spr(ctx, spc, sp);
         return spc;
     }
@@ -1391,12 +1309,13 @@ static TCGv_i64 space_select(DisasContext *ctx, int sp, TCGv_reg base)
     }
 
     ptr = tcg_temp_new_ptr();
-    tmp = tcg_temp_new();
-    spc = get_temp_tl(ctx);
+    tmp = tcg_temp_new_i64();
+    spc = tcg_temp_new_i64();
 
-    tcg_gen_shri_reg(tmp, base, TARGET_REGISTER_BITS - 5);
-    tcg_gen_andi_reg(tmp, tmp, 030);
-    tcg_gen_trunc_reg_ptr(ptr, tmp);
+    /* Extract top 2 bits of the address, shift left 3 for uint64_t index. */
+    tcg_gen_shri_i64(tmp, base, (ctx->tb_flags & PSW_W ? 64 : 32) - 5);
+    tcg_gen_andi_i64(tmp, tmp, 030);
+    tcg_gen_trunc_i64_ptr(ptr, tmp);
 
     tcg_gen_add_ptr(ptr, ptr, tcg_env);
     tcg_gen_ld_i64(spc, ptr, offsetof(CPUHPPAState, sr[4]));
@@ -1405,38 +1324,35 @@ static TCGv_i64 space_select(DisasContext *ctx, int sp, TCGv_reg base)
 }
 #endif
 
-static void form_gva(DisasContext *ctx, TCGv_tl *pgva, TCGv_reg *pofs,
-                     unsigned rb, unsigned rx, int scale, target_sreg disp,
+static void form_gva(DisasContext *ctx, TCGv_i64 *pgva, TCGv_i64 *pofs,
+                     unsigned rb, unsigned rx, int scale, int64_t disp,
                      unsigned sp, int modify, bool is_phys)
 {
-    TCGv_reg base = load_gpr(ctx, rb);
-    TCGv_reg ofs;
+    TCGv_i64 base = load_gpr(ctx, rb);
+    TCGv_i64 ofs;
+    TCGv_i64 addr;
+
+    set_insn_breg(ctx, rb);
 
     /* Note that RX is mutually exclusive with DISP.  */
     if (rx) {
-        ofs = get_temp(ctx);
-        tcg_gen_shli_reg(ofs, cpu_gr[rx], scale);
-        tcg_gen_add_reg(ofs, ofs, base);
+        ofs = tcg_temp_new_i64();
+        tcg_gen_shli_i64(ofs, cpu_gr[rx], scale);
+        tcg_gen_add_i64(ofs, ofs, base);
     } else if (disp || modify) {
-        ofs = get_temp(ctx);
-        tcg_gen_addi_reg(ofs, base, disp);
+        ofs = tcg_temp_new_i64();
+        tcg_gen_addi_i64(ofs, base, disp);
     } else {
         ofs = base;
     }
 
     *pofs = ofs;
-#ifdef CONFIG_USER_ONLY
-    *pgva = (modify <= 0 ? ofs : base);
-#else
-    TCGv_tl addr = get_temp_tl(ctx);
-    tcg_gen_extu_reg_tl(addr, modify <= 0 ? ofs : base);
-    if (ctx->tb_flags & PSW_W) {
-        tcg_gen_andi_tl(addr, addr, 0x3fffffffffffffffull);
-    }
+    *pgva = addr = tcg_temp_new_i64();
+    tcg_gen_andi_i64(addr, modify <= 0 ? ofs : base, gva_offset_mask(ctx));
+#ifndef CONFIG_USER_ONLY
     if (!is_phys) {
-        tcg_gen_or_tl(addr, addr, space_select(ctx, sp, base));
+        tcg_gen_or_i64(addr, addr, space_select(ctx, sp, base));
     }
-    *pgva = addr;
 #endif
 }
 
@@ -1446,29 +1362,29 @@ static void form_gva(DisasContext *ctx, TCGv_tl *pgva, TCGv_reg *pofs,
  * = 0 for no base register update.
  */
 static void do_load_32(DisasContext *ctx, TCGv_i32 dest, unsigned rb,
-                       unsigned rx, int scale, target_sreg disp,
+                       unsigned rx, int scale, int64_t disp,
                        unsigned sp, int modify, MemOp mop)
 {
-    TCGv_reg ofs;
-    TCGv_tl addr;
+    TCGv_i64 ofs;
+    TCGv_i64 addr;
 
     /* Caller uses nullify_over/nullify_end.  */
     assert(ctx->null_cond.c == TCG_COND_NEVER);
 
     form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify,
              ctx->mmu_idx == MMU_PHYS_IDX);
-    tcg_gen_qemu_ld_reg(dest, addr, ctx->mmu_idx, mop | UNALIGN(ctx));
+    tcg_gen_qemu_ld_i32(dest, addr, ctx->mmu_idx, mop | UNALIGN(ctx));
     if (modify) {
         save_gpr(ctx, rb, ofs);
     }
 }
 
 static void do_load_64(DisasContext *ctx, TCGv_i64 dest, unsigned rb,
-                       unsigned rx, int scale, target_sreg disp,
+                       unsigned rx, int scale, int64_t disp,
                        unsigned sp, int modify, MemOp mop)
 {
-    TCGv_reg ofs;
-    TCGv_tl addr;
+    TCGv_i64 ofs;
+    TCGv_i64 addr;
 
     /* Caller uses nullify_over/nullify_end.  */
     assert(ctx->null_cond.c == TCG_COND_NEVER);
@@ -1482,11 +1398,11 @@ static void do_load_64(DisasContext *ctx, TCGv_i64 dest, unsigned rb,
 }
 
 static void do_store_32(DisasContext *ctx, TCGv_i32 src, unsigned rb,
-                        unsigned rx, int scale, target_sreg disp,
+                        unsigned rx, int scale, int64_t disp,
                         unsigned sp, int modify, MemOp mop)
 {
-    TCGv_reg ofs;
-    TCGv_tl addr;
+    TCGv_i64 ofs;
+    TCGv_i64 addr;
 
     /* Caller uses nullify_over/nullify_end.  */
     assert(ctx->null_cond.c == TCG_COND_NEVER);
@@ -1500,11 +1416,11 @@ static void do_store_32(DisasContext *ctx, TCGv_i32 src, unsigned rb,
 }
 
 static void do_store_64(DisasContext *ctx, TCGv_i64 src, unsigned rb,
-                        unsigned rx, int scale, target_sreg disp,
+                        unsigned rx, int scale, int64_t disp,
                         unsigned sp, int modify, MemOp mop)
 {
-    TCGv_reg ofs;
-    TCGv_tl addr;
+    TCGv_i64 ofs;
+    TCGv_i64 addr;
 
     /* Caller uses nullify_over/nullify_end.  */
     assert(ctx->null_cond.c == TCG_COND_NEVER);
@@ -1517,19 +1433,11 @@ static void do_store_64(DisasContext *ctx, TCGv_i64 src, unsigned rb,
     }
 }
 
-#if TARGET_REGISTER_BITS == 64
-#define do_load_reg   do_load_64
-#define do_store_reg  do_store_64
-#else
-#define do_load_reg   do_load_32
-#define do_store_reg  do_store_32
-#endif
-
 static bool do_load(DisasContext *ctx, unsigned rt, unsigned rb,
-                    unsigned rx, int scale, target_sreg disp,
+                    unsigned rx, int scale, int64_t disp,
                     unsigned sp, int modify, MemOp mop)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
 
     nullify_over(ctx);
 
@@ -1538,16 +1446,16 @@ static bool do_load(DisasContext *ctx, unsigned rt, unsigned rb,
         dest = dest_gpr(ctx, rt);
     } else {
         /* Make sure if RT == RB, we see the result of the load.  */
-        dest = get_temp(ctx);
+        dest = tcg_temp_new_i64();
     }
-    do_load_reg(ctx, dest, rb, rx, scale, disp, sp, modify, mop);
+    do_load_64(ctx, dest, rb, rx, scale, disp, sp, modify, mop);
     save_gpr(ctx, rt, dest);
 
     return nullify_end(ctx);
 }
 
 static bool do_floadw(DisasContext *ctx, unsigned rt, unsigned rb,
-                      unsigned rx, int scale, target_sreg disp,
+                      unsigned rx, int scale, int64_t disp,
                       unsigned sp, int modify)
 {
     TCGv_i32 tmp;
@@ -1572,7 +1480,7 @@ static bool trans_fldw(DisasContext *ctx, arg_ldst *a)
 }
 
 static bool do_floadd(DisasContext *ctx, unsigned rt, unsigned rb,
-                      unsigned rx, int scale, target_sreg disp,
+                      unsigned rx, int scale, int64_t disp,
                       unsigned sp, int modify)
 {
     TCGv_i64 tmp;
@@ -1597,16 +1505,16 @@ static bool trans_fldd(DisasContext *ctx, arg_ldst *a)
 }
 
 static bool do_store(DisasContext *ctx, unsigned rt, unsigned rb,
-                     target_sreg disp, unsigned sp,
+                     int64_t disp, unsigned sp,
                      int modify, MemOp mop)
 {
     nullify_over(ctx);
-    do_store_reg(ctx, load_gpr(ctx, rt), rb, 0, 0, disp, sp, modify, mop);
+    do_store_64(ctx, load_gpr(ctx, rt), rb, 0, 0, disp, sp, modify, mop);
     return nullify_end(ctx);
 }
 
 static bool do_fstorew(DisasContext *ctx, unsigned rt, unsigned rb,
-                       unsigned rx, int scale, target_sreg disp,
+                       unsigned rx, int scale, int64_t disp,
                        unsigned sp, int modify)
 {
     TCGv_i32 tmp;
@@ -1626,7 +1534,7 @@ static bool trans_fstw(DisasContext *ctx, arg_ldst *a)
 }
 
 static bool do_fstored(DisasContext *ctx, unsigned rt, unsigned rb,
-                       unsigned rx, int scale, target_sreg disp,
+                       unsigned rx, int scale, int64_t disp,
                        unsigned sp, int modify)
 {
     TCGv_i64 tmp;
@@ -1739,12 +1647,12 @@ static bool do_fop_dedd(DisasContext *ctx, unsigned rt,
 
 /* Emit an unconditional branch to a direct target, which may or may not
    have already had nullification handled.  */
-static bool do_dbranch(DisasContext *ctx, target_ureg dest,
+static bool do_dbranch(DisasContext *ctx, uint64_t dest,
                        unsigned link, bool is_n)
 {
     if (ctx->null_cond.c == TCG_COND_NEVER && ctx->null_lab == NULL) {
         if (link != 0) {
-            copy_iaoq_entry(cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
+            copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
         }
         ctx->iaoq_n = dest;
         if (is_n) {
@@ -1754,7 +1662,7 @@ static bool do_dbranch(DisasContext *ctx, target_ureg dest,
         nullify_over(ctx);
 
         if (link != 0) {
-            copy_iaoq_entry(cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
+            copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
         }
 
         if (is_n && use_nullify_skip(ctx)) {
@@ -1776,10 +1684,10 @@ static bool do_dbranch(DisasContext *ctx, target_ureg dest,
 
 /* Emit a conditional branch to a direct target.  If the branch itself
    is nullified, we should have already used nullify_over.  */
-static bool do_cbranch(DisasContext *ctx, target_sreg disp, bool is_n,
+static bool do_cbranch(DisasContext *ctx, int64_t disp, bool is_n,
                        DisasCond *cond)
 {
-    target_ureg dest = iaoq_dest(ctx, disp);
+    uint64_t dest = iaoq_dest(ctx, disp);
     TCGLabel *taken = NULL;
     TCGCond c = cond->c;
     bool n;
@@ -1795,7 +1703,7 @@ static bool do_cbranch(DisasContext *ctx, target_sreg disp, bool is_n,
     }
 
     taken = gen_new_label();
-    tcg_gen_brcond_reg(c, cond->a0, cond->a1, taken);
+    tcg_gen_brcond_i64(c, cond->a0, cond->a1, taken);
     cond_free(cond);
 
     /* Not taken: Condition not satisfied; nullify on backward branches. */
@@ -1812,7 +1720,7 @@ static bool do_cbranch(DisasContext *ctx, target_sreg disp, bool is_n,
         if (ctx->iaoq_n == -1) {
             /* The temporary iaoq_n_var died at the branch above.
                Regenerate it here instead of saving it.  */
-            tcg_gen_addi_reg(ctx->iaoq_n_var, cpu_iaoq_b, 4);
+            tcg_gen_addi_i64(ctx->iaoq_n_var, cpu_iaoq_b, 4);
         }
         gen_goto_tb(ctx, 0, ctx->iaoq_b, ctx->iaoq_n);
     }
@@ -1842,24 +1750,25 @@ static bool do_cbranch(DisasContext *ctx, target_sreg disp, bool is_n,
 
 /* Emit an unconditional branch to an indirect target.  This handles
    nullification of the branch itself.  */
-static bool do_ibranch(DisasContext *ctx, TCGv_reg dest,
+static bool do_ibranch(DisasContext *ctx, TCGv_i64 dest,
                        unsigned link, bool is_n)
 {
-    TCGv_reg a0, a1, next, tmp;
+    TCGv_i64 a0, a1, next, tmp;
     TCGCond c;
 
     assert(ctx->null_lab == NULL);
 
     if (ctx->null_cond.c == TCG_COND_NEVER) {
         if (link != 0) {
-            copy_iaoq_entry(cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
+            copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
         }
-        next = get_temp(ctx);
-        tcg_gen_mov_reg(next, dest);
+        next = tcg_temp_new_i64();
+        tcg_gen_mov_i64(next, dest);
         if (is_n) {
             if (use_nullify_skip(ctx)) {
-                tcg_gen_mov_reg(cpu_iaoq_f, next);
-                tcg_gen_addi_reg(cpu_iaoq_b, next, 4);
+                copy_iaoq_entry(ctx, cpu_iaoq_f, -1, next);
+                tcg_gen_addi_i64(next, next, 4);
+                copy_iaoq_entry(ctx, cpu_iaoq_b, -1, next);
                 nullify_set(ctx, 0);
                 ctx->base.is_jmp = DISAS_IAQ_N_UPDATED;
                 return true;
@@ -1881,12 +1790,14 @@ static bool do_ibranch(DisasContext *ctx, TCGv_reg dest,
         /* We do have to handle the non-local temporary, DEST, before
            branching.  Since IOAQ_F is not really live at this point, we
            can simply store DEST optimistically.  Similarly with IAOQ_B.  */
-        tcg_gen_mov_reg(cpu_iaoq_f, dest);
-        tcg_gen_addi_reg(cpu_iaoq_b, dest, 4);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, -1, dest);
+        next = tcg_temp_new_i64();
+        tcg_gen_addi_i64(next, dest, 4);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, -1, next);
 
         nullify_over(ctx);
         if (link != 0) {
-            tcg_gen_movi_reg(cpu_gr[link], ctx->iaoq_n);
+            copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var);
         }
         tcg_gen_lookup_and_goto_ptr();
         return nullify_end(ctx);
@@ -1895,23 +1806,23 @@ static bool do_ibranch(DisasContext *ctx, TCGv_reg dest,
         a0 = ctx->null_cond.a0;
         a1 = ctx->null_cond.a1;
 
-        tmp = tcg_temp_new();
-        next = get_temp(ctx);
+        tmp = tcg_temp_new_i64();
+        next = tcg_temp_new_i64();
 
-        copy_iaoq_entry(tmp, ctx->iaoq_n, ctx->iaoq_n_var);
-        tcg_gen_movcond_reg(c, next, a0, a1, tmp, dest);
+        copy_iaoq_entry(ctx, tmp, ctx->iaoq_n, ctx->iaoq_n_var);
+        tcg_gen_movcond_i64(c, next, a0, a1, tmp, dest);
         ctx->iaoq_n = -1;
         ctx->iaoq_n_var = next;
 
         if (link != 0) {
-            tcg_gen_movcond_reg(c, cpu_gr[link], a0, a1, cpu_gr[link], tmp);
+            tcg_gen_movcond_i64(c, cpu_gr[link], a0, a1, cpu_gr[link], tmp);
         }
 
         if (is_n) {
             /* The branch nullifies the next insn, which means the state of N
                after the branch is the inverse of the state of N that applied
                to the branch.  */
-            tcg_gen_setcond_reg(tcg_invert_cond(c), cpu_psw_n, a0, a1);
+            tcg_gen_setcond_i64(tcg_invert_cond(c), cpu_psw_n, a0, a1);
             cond_free(&ctx->null_cond);
             ctx->null_cond = cond_make_n();
             ctx->psw_n_nonzero = true;
@@ -1929,23 +1840,23 @@ static bool do_ibranch(DisasContext *ctx, TCGv_reg dest,
  *      IAOQ_Next{30..31} ← IAOQ_Front{30..31};
  * which keeps the privilege level from being increased.
  */
-static TCGv_reg do_ibranch_priv(DisasContext *ctx, TCGv_reg offset)
+static TCGv_i64 do_ibranch_priv(DisasContext *ctx, TCGv_i64 offset)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
     switch (ctx->privilege) {
     case 0:
         /* Privilege 0 is maximum and is allowed to decrease.  */
         return offset;
     case 3:
         /* Privilege 3 is minimum and is never allowed to increase.  */
-        dest = get_temp(ctx);
-        tcg_gen_ori_reg(dest, offset, 3);
+        dest = tcg_temp_new_i64();
+        tcg_gen_ori_i64(dest, offset, 3);
         break;
     default:
-        dest = get_temp(ctx);
-        tcg_gen_andi_reg(dest, offset, -4);
-        tcg_gen_ori_reg(dest, dest, ctx->privilege);
-        tcg_gen_movcond_reg(TCG_COND_GTU, dest, dest, offset, dest, offset);
+        dest = tcg_temp_new_i64();
+        tcg_gen_andi_i64(dest, offset, -4);
+        tcg_gen_ori_i64(dest, dest, ctx->privilege);
+        tcg_gen_movcond_i64(TCG_COND_GTU, dest, dest, offset, dest, offset);
         break;
     }
     return dest;
@@ -1961,6 +1872,8 @@ static TCGv_reg do_ibranch_priv(DisasContext *ctx, TCGv_reg offset)
    aforementioned BE.  */
 static void do_page_zero(DisasContext *ctx)
 {
+    TCGv_i64 tmp;
+
     /* If by some means we get here with PSW[N]=1, that implies that
        the B,GATE instruction would be skipped, and we'd fault on the
        next insn within the privileged page.  */
@@ -1968,7 +1881,7 @@ static void do_page_zero(DisasContext *ctx)
     case TCG_COND_NEVER:
         break;
     case TCG_COND_ALWAYS:
-        tcg_gen_movi_reg(cpu_psw_n, 0);
+        tcg_gen_movi_i64(cpu_psw_n, 0);
         goto do_sigill;
     default:
         /* Since this is always the first (and only) insn within the
@@ -1996,9 +1909,12 @@ static void do_page_zero(DisasContext *ctx)
         break;
 
     case 0xe0: /* SET_THREAD_POINTER */
-        tcg_gen_st_reg(cpu_gr[26], tcg_env, offsetof(CPUHPPAState, cr[27]));
-        tcg_gen_ori_reg(cpu_iaoq_f, cpu_gr[31], 3);
-        tcg_gen_addi_reg(cpu_iaoq_b, cpu_iaoq_f, 4);
+        tcg_gen_st_i64(cpu_gr[26], tcg_env, offsetof(CPUHPPAState, cr[27]));
+        tmp = tcg_temp_new_i64();
+        tcg_gen_ori_i64(tmp, cpu_gr[31], 3);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, -1, tmp);
+        tcg_gen_addi_i64(tmp, tmp, 4);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp);
         ctx->base.is_jmp = DISAS_IAQ_N_UPDATED;
         break;
 
@@ -2039,8 +1955,8 @@ static bool trans_sync(DisasContext *ctx, arg_sync *a)
 static bool trans_mfia(DisasContext *ctx, arg_mfia *a)
 {
     unsigned rt = a->t;
-    TCGv_reg tmp = dest_gpr(ctx, rt);
-    tcg_gen_movi_reg(tmp, ctx->iaoq_f);
+    TCGv_i64 tmp = dest_gpr(ctx, rt);
+    tcg_gen_movi_i64(tmp, ctx->iaoq_f);
     save_gpr(ctx, rt, tmp);
 
     cond_free(&ctx->null_cond);
@@ -2052,13 +1968,11 @@ static bool trans_mfsp(DisasContext *ctx, arg_mfsp *a)
     unsigned rt = a->t;
     unsigned rs = a->sp;
     TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_reg t1 = tcg_temp_new();
 
     load_spr(ctx, t0, rs);
     tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_trunc_i64_reg(t1, t0);
 
-    save_gpr(ctx, rt, t1);
+    save_gpr(ctx, rt, t0);
 
     cond_free(&ctx->null_cond);
     return true;
@@ -2068,19 +1982,17 @@ static bool trans_mfctl(DisasContext *ctx, arg_mfctl *a)
 {
     unsigned rt = a->t;
     unsigned ctl = a->r;
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
 
     switch (ctl) {
     case CR_SAR:
-#ifdef TARGET_HPPA64
         if (a->e == 0) {
             /* MFSAR without ,W masks low 5 bits.  */
             tmp = dest_gpr(ctx, rt);
-            tcg_gen_andi_reg(tmp, cpu_sar, 31);
+            tcg_gen_andi_i64(tmp, cpu_sar, 31);
             save_gpr(ctx, rt, tmp);
             goto done;
         }
-#endif
         save_gpr(ctx, rt, cpu_sar);
         goto done;
     case CR_IT: /* Interval Timer */
@@ -2104,8 +2016,8 @@ static bool trans_mfctl(DisasContext *ctx, arg_mfctl *a)
         break;
     }
 
-    tmp = get_temp(ctx);
-    tcg_gen_ld_reg(tmp, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
+    tmp = tcg_temp_new_i64();
+    tcg_gen_ld_i64(tmp, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
     save_gpr(ctx, rt, tmp);
 
  done:
@@ -2117,22 +2029,21 @@ static bool trans_mtsp(DisasContext *ctx, arg_mtsp *a)
 {
     unsigned rr = a->r;
     unsigned rs = a->sp;
-    TCGv_i64 t64;
+    TCGv_i64 tmp;
 
     if (rs >= 5) {
         CHECK_MOST_PRIVILEGED(EXCP_PRIV_REG);
     }
     nullify_over(ctx);
 
-    t64 = tcg_temp_new_i64();
-    tcg_gen_extu_reg_i64(t64, load_gpr(ctx, rr));
-    tcg_gen_shli_i64(t64, t64, 32);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_shli_i64(tmp, load_gpr(ctx, rr), 32);
 
     if (rs >= 4) {
-        tcg_gen_st_i64(t64, tcg_env, offsetof(CPUHPPAState, sr[rs]));
+        tcg_gen_st_i64(tmp, tcg_env, offsetof(CPUHPPAState, sr[rs]));
         ctx->tb_flags &= ~TB_FLAG_SR_SAME;
     } else {
-        tcg_gen_mov_i64(cpu_sr[rs], t64);
+        tcg_gen_mov_i64(cpu_sr[rs], tmp);
     }
 
     return nullify_end(ctx);
@@ -2141,13 +2052,13 @@ static bool trans_mtsp(DisasContext *ctx, arg_mtsp *a)
 static bool trans_mtctl(DisasContext *ctx, arg_mtctl *a)
 {
     unsigned ctl = a->t;
-    TCGv_reg reg;
-    TCGv_reg tmp;
+    TCGv_i64 reg;
+    TCGv_i64 tmp;
 
     if (ctl == CR_SAR) {
         reg = load_gpr(ctx, a->r);
-        tmp = tcg_temp_new();
-        tcg_gen_andi_reg(tmp, reg, TARGET_REGISTER_BITS - 1);
+        tmp = tcg_temp_new_i64();
+        tcg_gen_andi_i64(tmp, reg, ctx->is_pa20 ? 63 : 31);
         save_or_nullify(ctx, cpu_sar, tmp);
 
         cond_free(&ctx->null_cond);
@@ -2159,7 +2070,13 @@ static bool trans_mtctl(DisasContext *ctx, arg_mtctl *a)
 
 #ifndef CONFIG_USER_ONLY
     nullify_over(ctx);
-    reg = load_gpr(ctx, a->r);
+
+    if (ctx->is_pa20) {
+        reg = load_gpr(ctx, a->r);
+    } else {
+        reg = tcg_temp_new_i64();
+        tcg_gen_ext32u_i64(reg, load_gpr(ctx, a->r));
+    }
 
     switch (ctl) {
     case CR_IT:
@@ -2177,11 +2094,11 @@ static bool trans_mtctl(DisasContext *ctx, arg_mtctl *a)
     case CR_IIAOQ:
         /* FIXME: Respect PSW_Q bit */
         /* The write advances the queue and stores to the back element.  */
-        tmp = get_temp(ctx);
-        tcg_gen_ld_reg(tmp, tcg_env,
+        tmp = tcg_temp_new_i64();
+        tcg_gen_ld_i64(tmp, tcg_env,
                        offsetof(CPUHPPAState, cr_back[ctl - CR_IIASQ]));
-        tcg_gen_st_reg(tmp, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
-        tcg_gen_st_reg(reg, tcg_env,
+        tcg_gen_st_i64(tmp, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
+        tcg_gen_st_i64(reg, tcg_env,
                        offsetof(CPUHPPAState, cr_back[ctl - CR_IIASQ]));
         break;
 
@@ -2189,14 +2106,14 @@ static bool trans_mtctl(DisasContext *ctx, arg_mtctl *a)
     case CR_PID2:
     case CR_PID3:
     case CR_PID4:
-        tcg_gen_st_reg(reg, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
+        tcg_gen_st_i64(reg, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
 #ifndef CONFIG_USER_ONLY
         gen_helper_change_prot_id(tcg_env);
 #endif
         break;
 
     default:
-        tcg_gen_st_reg(reg, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
+        tcg_gen_st_i64(reg, tcg_env, offsetof(CPUHPPAState, cr[ctl]));
         break;
     }
     return nullify_end(ctx);
@@ -2205,10 +2122,10 @@ static bool trans_mtctl(DisasContext *ctx, arg_mtctl *a)
 
 static bool trans_mtsarcm(DisasContext *ctx, arg_mtsarcm *a)
 {
-    TCGv_reg tmp = tcg_temp_new();
+    TCGv_i64 tmp = tcg_temp_new_i64();
 
-    tcg_gen_not_reg(tmp, load_gpr(ctx, a->r));
-    tcg_gen_andi_reg(tmp, tmp, TARGET_REGISTER_BITS - 1);
+    tcg_gen_not_i64(tmp, load_gpr(ctx, a->r));
+    tcg_gen_andi_i64(tmp, tmp, ctx->is_pa20 ? 63 : 31);
     save_or_nullify(ctx, cpu_sar, tmp);
 
     cond_free(&ctx->null_cond);
@@ -2217,17 +2134,14 @@ static bool trans_mtsarcm(DisasContext *ctx, arg_mtsarcm *a)
 
 static bool trans_ldsid(DisasContext *ctx, arg_ldsid *a)
 {
-    TCGv_reg dest = dest_gpr(ctx, a->t);
+    TCGv_i64 dest = dest_gpr(ctx, a->t);
 
 #ifdef CONFIG_USER_ONLY
     /* We don't implement space registers in user mode. */
-    tcg_gen_movi_reg(dest, 0);
+    tcg_gen_movi_i64(dest, 0);
 #else
-    TCGv_i64 t0 = tcg_temp_new_i64();
-
-    tcg_gen_mov_i64(t0, space_select(ctx, a->sp, load_gpr(ctx, a->b)));
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_trunc_i64_reg(dest, t0);
+    tcg_gen_mov_i64(dest, space_select(ctx, a->sp, load_gpr(ctx, a->b)));
+    tcg_gen_shri_i64(dest, dest, 32);
 #endif
     save_gpr(ctx, a->t, dest);
 
@@ -2239,13 +2153,13 @@ static bool trans_rsm(DisasContext *ctx, arg_rsm *a)
 {
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
 
     nullify_over(ctx);
 
-    tmp = get_temp(ctx);
-    tcg_gen_ld_reg(tmp, tcg_env, offsetof(CPUHPPAState, psw));
-    tcg_gen_andi_reg(tmp, tmp, ~a->i);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_ld_i64(tmp, tcg_env, offsetof(CPUHPPAState, psw));
+    tcg_gen_andi_i64(tmp, tmp, ~a->i);
     gen_helper_swap_system_mask(tmp, tcg_env, tmp);
     save_gpr(ctx, a->t, tmp);
 
@@ -2259,13 +2173,13 @@ static bool trans_ssm(DisasContext *ctx, arg_ssm *a)
 {
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
 
     nullify_over(ctx);
 
-    tmp = get_temp(ctx);
-    tcg_gen_ld_reg(tmp, tcg_env, offsetof(CPUHPPAState, psw));
-    tcg_gen_ori_reg(tmp, tmp, a->i);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_ld_i64(tmp, tcg_env, offsetof(CPUHPPAState, psw));
+    tcg_gen_ori_i64(tmp, tmp, a->i);
     gen_helper_swap_system_mask(tmp, tcg_env, tmp);
     save_gpr(ctx, a->t, tmp);
 
@@ -2279,11 +2193,11 @@ static bool trans_mtsm(DisasContext *ctx, arg_mtsm *a)
 {
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_reg tmp, reg;
+    TCGv_i64 tmp, reg;
     nullify_over(ctx);
 
     reg = load_gpr(ctx, a->r);
-    tmp = get_temp(ctx);
+    tmp = tcg_temp_new_i64();
     gen_helper_swap_system_mask(tmp, tcg_env, reg);
 
     /* Exit the TB to recognize new interrupts.  */
@@ -2356,12 +2270,12 @@ static bool trans_getshadowregs(DisasContext *ctx, arg_getshadowregs *a)
 static bool trans_nop_addrx(DisasContext *ctx, arg_ldst *a)
 {
     if (a->m) {
-        TCGv_reg dest = dest_gpr(ctx, a->b);
-        TCGv_reg src1 = load_gpr(ctx, a->b);
-        TCGv_reg src2 = load_gpr(ctx, a->x);
+        TCGv_i64 dest = dest_gpr(ctx, a->b);
+        TCGv_i64 src1 = load_gpr(ctx, a->b);
+        TCGv_i64 src2 = load_gpr(ctx, a->x);
 
         /* The only thing we need to do is the base register modification.  */
-        tcg_gen_add_reg(dest, src1, src2);
+        tcg_gen_add_i64(dest, src1, src2);
         save_gpr(ctx, a->b, dest);
     }
     cond_free(&ctx->null_cond);
@@ -2370,9 +2284,9 @@ static bool trans_nop_addrx(DisasContext *ctx, arg_ldst *a)
 
 static bool trans_probe(DisasContext *ctx, arg_probe *a)
 {
-    TCGv_reg dest, ofs;
+    TCGv_i64 dest, ofs;
     TCGv_i32 level, want;
-    TCGv_tl addr;
+    TCGv_i64 addr;
 
     nullify_over(ctx);
 
@@ -2383,7 +2297,7 @@ static bool trans_probe(DisasContext *ctx, arg_probe *a)
         level = tcg_constant_i32(a->ri);
     } else {
         level = tcg_temp_new_i32();
-        tcg_gen_trunc_reg_i32(level, load_gpr(ctx, a->ri));
+        tcg_gen_extrl_i64_i32(level, load_gpr(ctx, a->ri));
         tcg_gen_andi_i32(level, level, 3);
     }
     want = tcg_constant_i32(a->write ? PAGE_WRITE : PAGE_READ);
@@ -2396,19 +2310,22 @@ static bool trans_probe(DisasContext *ctx, arg_probe *a)
 
 static bool trans_ixtlbx(DisasContext *ctx, arg_ixtlbx *a)
 {
+    if (ctx->is_pa20) {
+        return false;
+    }
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_tl addr;
-    TCGv_reg ofs, reg;
+    TCGv_i64 addr;
+    TCGv_i64 ofs, reg;
 
     nullify_over(ctx);
 
     form_gva(ctx, &addr, &ofs, a->b, 0, 0, 0, a->sp, 0, false);
     reg = load_gpr(ctx, a->r);
     if (a->addr) {
-        gen_helper_itlba(tcg_env, addr, reg);
+        gen_helper_itlba_pa11(tcg_env, addr, reg);
     } else {
-        gen_helper_itlbp(tcg_env, addr, reg);
+        gen_helper_itlbp_pa11(tcg_env, addr, reg);
     }
 
     /* Exit TB for TLB change if mmu is enabled.  */
@@ -2419,25 +2336,63 @@ static bool trans_ixtlbx(DisasContext *ctx, arg_ixtlbx *a)
 #endif
 }
 
-static bool trans_pxtlbx(DisasContext *ctx, arg_pxtlbx *a)
+static bool do_pxtlb(DisasContext *ctx, arg_ldst *a, bool local)
 {
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_tl addr;
-    TCGv_reg ofs;
+    TCGv_i64 addr;
+    TCGv_i64 ofs;
 
     nullify_over(ctx);
 
     form_gva(ctx, &addr, &ofs, a->b, a->x, 0, 0, a->sp, a->m, false);
-    if (a->m) {
-        save_gpr(ctx, a->b, ofs);
+
+    /*
+     * Page align now, rather than later, so that we can add in the
+     * page_size field from pa2.0 from the low 4 bits of GR[b].
+     */
+    tcg_gen_andi_i64(addr, addr, TARGET_PAGE_MASK);
+    if (ctx->is_pa20) {
+        tcg_gen_deposit_i64(addr, addr, load_gpr(ctx, a->b), 0, 4);
     }
-    if (a->local) {
-        gen_helper_ptlbe(tcg_env);
+
+    if (local) {
+        gen_helper_ptlb_l(tcg_env, addr);
     } else {
         gen_helper_ptlb(tcg_env, addr);
     }
 
+    if (a->m) {
+        save_gpr(ctx, a->b, ofs);
+    }
+
+    /* Exit TB for TLB change if mmu is enabled.  */
+    if (ctx->tb_flags & PSW_C) {
+        ctx->base.is_jmp = DISAS_IAQ_N_STALE;
+    }
+    return nullify_end(ctx);
+#endif
+}
+
+static bool trans_pxtlb(DisasContext *ctx, arg_ldst *a)
+{
+    return do_pxtlb(ctx, a, false);
+}
+
+static bool trans_pxtlb_l(DisasContext *ctx, arg_ldst *a)
+{
+    return ctx->is_pa20 && do_pxtlb(ctx, a, true);
+}
+
+static bool trans_pxtlbe(DisasContext *ctx, arg_ldst *a)
+{
+    CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
+#ifndef CONFIG_USER_ONLY
+    nullify_over(ctx);
+
+    trans_nop_addrx(ctx, a);
+    gen_helper_ptlbe(tcg_env);
+
     /* Exit TB for TLB change if mmu is enabled.  */
     if (ctx->tb_flags & PSW_C) {
         ctx->base.is_jmp = DISAS_IAQ_N_STALE;
@@ -2454,10 +2409,13 @@ static bool trans_pxtlbx(DisasContext *ctx, arg_pxtlbx *a)
  */
 static bool trans_ixtlbxf(DisasContext *ctx, arg_ixtlbxf *a)
 {
+    if (ctx->is_pa20) {
+        return false;
+    }
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_tl addr, atl, stl;
-    TCGv_reg reg;
+    TCGv_i64 addr, atl, stl;
+    TCGv_i64 reg;
 
     nullify_over(ctx);
 
@@ -2465,13 +2423,11 @@ static bool trans_ixtlbxf(DisasContext *ctx, arg_ixtlbxf *a)
      * FIXME:
      *  if (not (pcxl or pcxl2))
      *    return gen_illegal(ctx);
-     *
-     * Note for future: these are 32-bit systems; no hppa64.
      */
 
-    atl = tcg_temp_new_tl();
-    stl = tcg_temp_new_tl();
-    addr = tcg_temp_new_tl();
+    atl = tcg_temp_new_i64();
+    stl = tcg_temp_new_i64();
+    addr = tcg_temp_new_i64();
 
     tcg_gen_ld32u_i64(stl, tcg_env,
                       a->data ? offsetof(CPUHPPAState, cr[CR_ISR])
@@ -2480,13 +2436,13 @@ static bool trans_ixtlbxf(DisasContext *ctx, arg_ixtlbxf *a)
                       a->data ? offsetof(CPUHPPAState, cr[CR_IOR])
                       : offsetof(CPUHPPAState, cr[CR_IIAOQ]));
     tcg_gen_shli_i64(stl, stl, 32);
-    tcg_gen_or_tl(addr, atl, stl);
+    tcg_gen_or_i64(addr, atl, stl);
 
     reg = load_gpr(ctx, a->r);
     if (a->addr) {
-        gen_helper_itlba(tcg_env, addr, reg);
+        gen_helper_itlba_pa11(tcg_env, addr, reg);
     } else {
-        gen_helper_itlbp(tcg_env, addr, reg);
+        gen_helper_itlbp_pa11(tcg_env, addr, reg);
     }
 
     /* Exit TB for TLB change if mmu is enabled.  */
@@ -2497,18 +2453,44 @@ static bool trans_ixtlbxf(DisasContext *ctx, arg_ixtlbxf *a)
 #endif
 }
 
+static bool trans_ixtlbt(DisasContext *ctx, arg_ixtlbt *a)
+{
+    if (!ctx->is_pa20) {
+        return false;
+    }
+    CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
+#ifndef CONFIG_USER_ONLY
+    nullify_over(ctx);
+    {
+        TCGv_i64 src1 = load_gpr(ctx, a->r1);
+        TCGv_i64 src2 = load_gpr(ctx, a->r2);
+
+        if (a->data) {
+            gen_helper_idtlbt_pa20(tcg_env, src1, src2);
+        } else {
+            gen_helper_iitlbt_pa20(tcg_env, src1, src2);
+        }
+    }
+    /* Exit TB for TLB change if mmu is enabled.  */
+    if (ctx->tb_flags & PSW_C) {
+        ctx->base.is_jmp = DISAS_IAQ_N_STALE;
+    }
+    return nullify_end(ctx);
+#endif
+}
+
 static bool trans_lpa(DisasContext *ctx, arg_ldst *a)
 {
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 #ifndef CONFIG_USER_ONLY
-    TCGv_tl vaddr;
-    TCGv_reg ofs, paddr;
+    TCGv_i64 vaddr;
+    TCGv_i64 ofs, paddr;
 
     nullify_over(ctx);
 
     form_gva(ctx, &vaddr, &ofs, a->b, a->x, 0, 0, a->sp, a->m, false);
 
-    paddr = tcg_temp_new();
+    paddr = tcg_temp_new_i64();
     gen_helper_lpa(paddr, tcg_env, vaddr);
 
     /* Note that physical address result overrides base modification.  */
@@ -2529,78 +2511,78 @@ static bool trans_lci(DisasContext *ctx, arg_lci *a)
        physical address.  Two addresses with the same CI have a coherent
        view of the cache.  Our implementation is to return 0 for all,
        since the entire address space is coherent.  */
-    save_gpr(ctx, a->t, tcg_constant_reg(0));
+    save_gpr(ctx, a->t, ctx->zero);
 
     cond_free(&ctx->null_cond);
     return true;
 }
 
-static bool trans_add(DisasContext *ctx, arg_rrr_cf_sh *a)
+static bool trans_add(DisasContext *ctx, arg_rrr_cf_d_sh *a)
 {
     return do_add_reg(ctx, a, false, false, false, false);
 }
 
-static bool trans_add_l(DisasContext *ctx, arg_rrr_cf_sh *a)
+static bool trans_add_l(DisasContext *ctx, arg_rrr_cf_d_sh *a)
 {
     return do_add_reg(ctx, a, true, false, false, false);
 }
 
-static bool trans_add_tsv(DisasContext *ctx, arg_rrr_cf_sh *a)
+static bool trans_add_tsv(DisasContext *ctx, arg_rrr_cf_d_sh *a)
 {
     return do_add_reg(ctx, a, false, true, false, false);
 }
 
-static bool trans_add_c(DisasContext *ctx, arg_rrr_cf_sh *a)
+static bool trans_add_c(DisasContext *ctx, arg_rrr_cf_d_sh *a)
 {
     return do_add_reg(ctx, a, false, false, false, true);
 }
 
-static bool trans_add_c_tsv(DisasContext *ctx, arg_rrr_cf_sh *a)
+static bool trans_add_c_tsv(DisasContext *ctx, arg_rrr_cf_d_sh *a)
 {
     return do_add_reg(ctx, a, false, true, false, true);
 }
 
-static bool trans_sub(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_sub(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_sub_reg(ctx, a, false, false, false);
 }
 
-static bool trans_sub_tsv(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_sub_tsv(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_sub_reg(ctx, a, true, false, false);
 }
 
-static bool trans_sub_tc(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_sub_tc(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_sub_reg(ctx, a, false, false, true);
 }
 
-static bool trans_sub_tsv_tc(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_sub_tsv_tc(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_sub_reg(ctx, a, true, false, true);
 }
 
-static bool trans_sub_b(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_sub_b(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_sub_reg(ctx, a, false, true, false);
 }
 
-static bool trans_sub_b_tsv(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_sub_b_tsv(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_sub_reg(ctx, a, true, true, false);
 }
 
-static bool trans_andcm(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_andcm(DisasContext *ctx, arg_rrr_cf_d *a)
 {
-    return do_log_reg(ctx, a, tcg_gen_andc_reg);
+    return do_log_reg(ctx, a, tcg_gen_andc_i64);
 }
 
-static bool trans_and(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_and(DisasContext *ctx, arg_rrr_cf_d *a)
 {
-    return do_log_reg(ctx, a, tcg_gen_and_reg);
+    return do_log_reg(ctx, a, tcg_gen_and_i64);
 }
 
-static bool trans_or(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_or(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     if (a->cf == 0) {
         unsigned r2 = a->r2;
@@ -2613,8 +2595,8 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf *a)
         }
         if (r2 == 0) { /* COPY */
             if (r1 == 0) {
-                TCGv_reg dest = dest_gpr(ctx, rt);
-                tcg_gen_movi_reg(dest, 0);
+                TCGv_i64 dest = dest_gpr(ctx, rt);
+                tcg_gen_movi_i64(dest, 0);
                 save_gpr(ctx, rt, dest);
             } else {
                 save_gpr(ctx, rt, cpu_gr[r1]);
@@ -2635,8 +2617,8 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf *a)
             nullify_over(ctx);
 
             /* Advance the instruction queue.  */
-            copy_iaoq_entry(cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
-            copy_iaoq_entry(cpu_iaoq_b, ctx->iaoq_n, ctx->iaoq_n_var);
+            copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
+            copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_n, ctx->iaoq_n_var);
             nullify_set(ctx, 0);
 
             /* Tell the qemu main loop to halt until this cpu has work.  */
@@ -2649,142 +2631,146 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf *a)
         }
 #endif
     }
-    return do_log_reg(ctx, a, tcg_gen_or_reg);
+    return do_log_reg(ctx, a, tcg_gen_or_i64);
 }
 
-static bool trans_xor(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_xor(DisasContext *ctx, arg_rrr_cf_d *a)
 {
-    return do_log_reg(ctx, a, tcg_gen_xor_reg);
+    return do_log_reg(ctx, a, tcg_gen_xor_i64);
 }
 
-static bool trans_cmpclr(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_cmpclr(DisasContext *ctx, arg_rrr_cf_d *a)
 {
-    TCGv_reg tcg_r1, tcg_r2;
+    TCGv_i64 tcg_r1, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
-    do_cmpclr(ctx, a->t, tcg_r1, tcg_r2, a->cf);
+    do_cmpclr(ctx, a->t, tcg_r1, tcg_r2, a->cf, a->d);
     return nullify_end(ctx);
 }
 
-static bool trans_uxor(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_uxor(DisasContext *ctx, arg_rrr_cf_d *a)
 {
-    TCGv_reg tcg_r1, tcg_r2;
+    TCGv_i64 tcg_r1, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
-    do_unit(ctx, a->t, tcg_r1, tcg_r2, a->cf, false, tcg_gen_xor_reg);
+    do_unit(ctx, a->t, tcg_r1, tcg_r2, a->cf, a->d, false, tcg_gen_xor_i64);
     return nullify_end(ctx);
 }
 
-static bool do_uaddcm(DisasContext *ctx, arg_rrr_cf *a, bool is_tc)
+static bool do_uaddcm(DisasContext *ctx, arg_rrr_cf_d *a, bool is_tc)
 {
-    TCGv_reg tcg_r1, tcg_r2, tmp;
+    TCGv_i64 tcg_r1, tcg_r2, tmp;
 
     if (a->cf) {
         nullify_over(ctx);
     }
     tcg_r1 = load_gpr(ctx, a->r1);
     tcg_r2 = load_gpr(ctx, a->r2);
-    tmp = get_temp(ctx);
-    tcg_gen_not_reg(tmp, tcg_r2);
-    do_unit(ctx, a->t, tcg_r1, tmp, a->cf, is_tc, tcg_gen_add_reg);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_not_i64(tmp, tcg_r2);
+    do_unit(ctx, a->t, tcg_r1, tmp, a->cf, a->d, is_tc, tcg_gen_add_i64);
     return nullify_end(ctx);
 }
 
-static bool trans_uaddcm(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_uaddcm(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_uaddcm(ctx, a, false);
 }
 
-static bool trans_uaddcm_tc(DisasContext *ctx, arg_rrr_cf *a)
+static bool trans_uaddcm_tc(DisasContext *ctx, arg_rrr_cf_d *a)
 {
     return do_uaddcm(ctx, a, true);
 }
 
-static bool do_dcor(DisasContext *ctx, arg_rr_cf *a, bool is_i)
+static bool do_dcor(DisasContext *ctx, arg_rr_cf_d *a, bool is_i)
 {
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
 
     nullify_over(ctx);
 
-    tmp = get_temp(ctx);
-    tcg_gen_shri_reg(tmp, cpu_psw_cb, 3);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_shri_i64(tmp, cpu_psw_cb, 3);
     if (!is_i) {
-        tcg_gen_not_reg(tmp, tmp);
+        tcg_gen_not_i64(tmp, tmp);
     }
-    tcg_gen_andi_reg(tmp, tmp, 0x11111111);
-    tcg_gen_muli_reg(tmp, tmp, 6);
-    do_unit(ctx, a->t, load_gpr(ctx, a->r), tmp, a->cf, false,
-            is_i ? tcg_gen_add_reg : tcg_gen_sub_reg);
+    tcg_gen_andi_i64(tmp, tmp, (uint64_t)0x1111111111111111ull);
+    tcg_gen_muli_i64(tmp, tmp, 6);
+    do_unit(ctx, a->t, load_gpr(ctx, a->r), tmp, a->cf, a->d, false,
+            is_i ? tcg_gen_add_i64 : tcg_gen_sub_i64);
     return nullify_end(ctx);
 }
 
-static bool trans_dcor(DisasContext *ctx, arg_rr_cf *a)
+static bool trans_dcor(DisasContext *ctx, arg_rr_cf_d *a)
 {
     return do_dcor(ctx, a, false);
 }
 
-static bool trans_dcor_i(DisasContext *ctx, arg_rr_cf *a)
+static bool trans_dcor_i(DisasContext *ctx, arg_rr_cf_d *a)
 {
     return do_dcor(ctx, a, true);
 }
 
 static bool trans_ds(DisasContext *ctx, arg_rrr_cf *a)
 {
-    TCGv_reg dest, add1, add2, addc, zero, in1, in2;
+    TCGv_i64 dest, add1, add2, addc, in1, in2;
+    TCGv_i64 cout;
 
     nullify_over(ctx);
 
     in1 = load_gpr(ctx, a->r1);
     in2 = load_gpr(ctx, a->r2);
 
-    add1 = tcg_temp_new();
-    add2 = tcg_temp_new();
-    addc = tcg_temp_new();
-    dest = tcg_temp_new();
-    zero = tcg_constant_reg(0);
+    add1 = tcg_temp_new_i64();
+    add2 = tcg_temp_new_i64();
+    addc = tcg_temp_new_i64();
+    dest = tcg_temp_new_i64();
 
     /* Form R1 << 1 | PSW[CB]{8}.  */
-    tcg_gen_add_reg(add1, in1, in1);
-    tcg_gen_add_reg(add1, add1, cpu_psw_cb_msb);
-
-    /* Add or subtract R2, depending on PSW[V].  Proper computation of
-       carry{8} requires that we subtract via + ~R2 + 1, as described in
-       the manual.  By extracting and masking V, we can produce the
-       proper inputs to the addition without movcond.  */
-    tcg_gen_sari_reg(addc, cpu_psw_v, TARGET_REGISTER_BITS - 1);
-    tcg_gen_xor_reg(add2, in2, addc);
-    tcg_gen_andi_reg(addc, addc, 1);
-    /* ??? This is only correct for 32-bit.  */
-    tcg_gen_add2_i32(dest, cpu_psw_cb_msb, add1, zero, add2, zero);
-    tcg_gen_add2_i32(dest, cpu_psw_cb_msb, dest, cpu_psw_cb_msb, addc, zero);
+    tcg_gen_add_i64(add1, in1, in1);
+    tcg_gen_add_i64(add1, add1, get_psw_carry(ctx, false));
+
+    /*
+     * Add or subtract R2, depending on PSW[V].  Proper computation of
+     * carry requires that we subtract via + ~R2 + 1, as described in
+     * the manual.  By extracting and masking V, we can produce the
+     * proper inputs to the addition without movcond.
+     */
+    tcg_gen_sextract_i64(addc, cpu_psw_v, 31, 1);
+    tcg_gen_xor_i64(add2, in2, addc);
+    tcg_gen_andi_i64(addc, addc, 1);
+
+    tcg_gen_add2_i64(dest, cpu_psw_cb_msb, add1, ctx->zero, add2, ctx->zero);
+    tcg_gen_add2_i64(dest, cpu_psw_cb_msb, dest, cpu_psw_cb_msb,
+                     addc, ctx->zero);
 
     /* Write back the result register.  */
     save_gpr(ctx, a->t, dest);
 
     /* Write back PSW[CB].  */
-    tcg_gen_xor_reg(cpu_psw_cb, add1, add2);
-    tcg_gen_xor_reg(cpu_psw_cb, cpu_psw_cb, dest);
+    tcg_gen_xor_i64(cpu_psw_cb, add1, add2);
+    tcg_gen_xor_i64(cpu_psw_cb, cpu_psw_cb, dest);
 
     /* Write back PSW[V] for the division step.  */
-    tcg_gen_neg_reg(cpu_psw_v, cpu_psw_cb_msb);
-    tcg_gen_xor_reg(cpu_psw_v, cpu_psw_v, in2);
+    cout = get_psw_carry(ctx, false);
+    tcg_gen_neg_i64(cpu_psw_v, cout);
+    tcg_gen_xor_i64(cpu_psw_v, cpu_psw_v, in2);
 
     /* Install the new nullification.  */
     if (a->cf) {
-        TCGv_reg sv = NULL;
+        TCGv_i64 sv = NULL;
         if (cond_need_sv(a->cf >> 1)) {
             /* ??? The lshift is supposed to contribute to overflow.  */
             sv = do_add_sv(ctx, dest, add1, add2);
         }
-        ctx->null_cond = do_cond(a->cf, dest, cpu_psw_cb_msb, sv);
+        ctx->null_cond = do_cond(ctx, a->cf, false, dest, cout, sv);
     }
 
     return nullify_end(ctx);
@@ -2820,53 +2806,270 @@ static bool trans_subi_tsv(DisasContext *ctx, arg_rri_cf *a)
     return do_sub_imm(ctx, a, true);
 }
 
-static bool trans_cmpiclr(DisasContext *ctx, arg_rri_cf *a)
+static bool trans_cmpiclr(DisasContext *ctx, arg_rri_cf_d *a)
 {
-    TCGv_reg tcg_im, tcg_r2;
+    TCGv_i64 tcg_im, tcg_r2;
 
     if (a->cf) {
         nullify_over(ctx);
     }
 
-    tcg_im = load_const(ctx, a->i);
+    tcg_im = tcg_constant_i64(a->i);
     tcg_r2 = load_gpr(ctx, a->r);
-    do_cmpclr(ctx, a->t, tcg_im, tcg_r2, a->cf);
+    do_cmpclr(ctx, a->t, tcg_im, tcg_r2, a->cf, a->d);
+
+    return nullify_end(ctx);
+}
+
+static bool do_multimedia(DisasContext *ctx, arg_rrr *a,
+                          void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 r1, r2, dest;
+
+    if (!ctx->is_pa20) {
+        return false;
+    }
+
+    nullify_over(ctx);
+
+    r1 = load_gpr(ctx, a->r1);
+    r2 = load_gpr(ctx, a->r2);
+    dest = dest_gpr(ctx, a->t);
+
+    fn(dest, r1, r2);
+    save_gpr(ctx, a->t, dest);
 
     return nullify_end(ctx);
 }
 
+static bool do_multimedia_sh(DisasContext *ctx, arg_rri *a,
+                             void (*fn)(TCGv_i64, TCGv_i64, int64_t))
+{
+    TCGv_i64 r, dest;
+
+    if (!ctx->is_pa20) {
+        return false;
+    }
+
+    nullify_over(ctx);
+
+    r = load_gpr(ctx, a->r);
+    dest = dest_gpr(ctx, a->t);
+
+    fn(dest, r, a->i);
+    save_gpr(ctx, a->t, dest);
+
+    return nullify_end(ctx);
+}
+
+static bool do_multimedia_shadd(DisasContext *ctx, arg_rrr_sh *a,
+                                void (*fn)(TCGv_i64, TCGv_i64,
+                                           TCGv_i64, TCGv_i32))
+{
+    TCGv_i64 r1, r2, dest;
+
+    if (!ctx->is_pa20) {
+        return false;
+    }
+
+    nullify_over(ctx);
+
+    r1 = load_gpr(ctx, a->r1);
+    r2 = load_gpr(ctx, a->r2);
+    dest = dest_gpr(ctx, a->t);
+
+    fn(dest, r1, r2, tcg_constant_i32(a->sh));
+    save_gpr(ctx, a->t, dest);
+
+    return nullify_end(ctx);
+}
+
+static bool trans_hadd(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, tcg_gen_vec_add16_i64);
+}
+
+static bool trans_hadd_ss(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_helper_hadd_ss);
+}
+
+static bool trans_hadd_us(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_helper_hadd_us);
+}
+
+static bool trans_havg(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_helper_havg);
+}
+
+static bool trans_hshl(DisasContext *ctx, arg_rri *a)
+{
+    return do_multimedia_sh(ctx, a, tcg_gen_vec_shl16i_i64);
+}
+
+static bool trans_hshr_s(DisasContext *ctx, arg_rri *a)
+{
+    return do_multimedia_sh(ctx, a, tcg_gen_vec_sar16i_i64);
+}
+
+static bool trans_hshr_u(DisasContext *ctx, arg_rri *a)
+{
+    return do_multimedia_sh(ctx, a, tcg_gen_vec_shr16i_i64);
+}
+
+static bool trans_hshladd(DisasContext *ctx, arg_rrr_sh *a)
+{
+    return do_multimedia_shadd(ctx, a, gen_helper_hshladd);
+}
+
+static bool trans_hshradd(DisasContext *ctx, arg_rrr_sh *a)
+{
+    return do_multimedia_shadd(ctx, a, gen_helper_hshradd);
+}
+
+static bool trans_hsub(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, tcg_gen_vec_sub16_i64);
+}
+
+static bool trans_hsub_ss(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_helper_hsub_ss);
+}
+
+static bool trans_hsub_us(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_helper_hsub_us);
+}
+
+static void gen_mixh_l(TCGv_i64 dst, TCGv_i64 r1, TCGv_i64 r2)
+{
+    uint64_t mask = 0xffff0000ffff0000ull;
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    tcg_gen_andi_i64(tmp, r2, mask);
+    tcg_gen_andi_i64(dst, r1, mask);
+    tcg_gen_shri_i64(tmp, tmp, 16);
+    tcg_gen_or_i64(dst, dst, tmp);
+}
+
+static bool trans_mixh_l(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_mixh_l);
+}
+
+static void gen_mixh_r(TCGv_i64 dst, TCGv_i64 r1, TCGv_i64 r2)
+{
+    uint64_t mask = 0x0000ffff0000ffffull;
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    tcg_gen_andi_i64(tmp, r1, mask);
+    tcg_gen_andi_i64(dst, r2, mask);
+    tcg_gen_shli_i64(tmp, tmp, 16);
+    tcg_gen_or_i64(dst, dst, tmp);
+}
+
+static bool trans_mixh_r(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_mixh_r);
+}
+
+static void gen_mixw_l(TCGv_i64 dst, TCGv_i64 r1, TCGv_i64 r2)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    tcg_gen_shri_i64(tmp, r2, 32);
+    tcg_gen_deposit_i64(dst, r1, tmp, 0, 32);
+}
+
+static bool trans_mixw_l(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_mixw_l);
+}
+
+static void gen_mixw_r(TCGv_i64 dst, TCGv_i64 r1, TCGv_i64 r2)
+{
+    tcg_gen_deposit_i64(dst, r2, r1, 32, 32);
+}
+
+static bool trans_mixw_r(DisasContext *ctx, arg_rrr *a)
+{
+    return do_multimedia(ctx, a, gen_mixw_r);
+}
+
+static bool trans_permh(DisasContext *ctx, arg_permh *a)
+{
+    TCGv_i64 r, t0, t1, t2, t3;
+
+    if (!ctx->is_pa20) {
+        return false;
+    }
+
+    nullify_over(ctx);
+
+    r = load_gpr(ctx, a->r1);
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    t3 = tcg_temp_new_i64();
+
+    tcg_gen_extract_i64(t0, r, (3 - a->c0) * 16, 16);
+    tcg_gen_extract_i64(t1, r, (3 - a->c1) * 16, 16);
+    tcg_gen_extract_i64(t2, r, (3 - a->c2) * 16, 16);
+    tcg_gen_extract_i64(t3, r, (3 - a->c3) * 16, 16);
+
+    tcg_gen_deposit_i64(t0, t1, t0, 16, 48);
+    tcg_gen_deposit_i64(t2, t3, t2, 16, 48);
+    tcg_gen_deposit_i64(t0, t2, t0, 32, 32);
+
+    save_gpr(ctx, a->t, t0);
+    return nullify_end(ctx);
+}
+
 static bool trans_ld(DisasContext *ctx, arg_ldst *a)
 {
-    if (unlikely(TARGET_REGISTER_BITS == 32 && a->size > MO_32)) {
+    if (ctx->is_pa20) {
+       /*
+        * With pa20, LDB, LDH, LDW, LDD to %g0 are prefetches.
+        * Any base modification still occurs.
+        */
+        if (a->t == 0) {
+            return trans_nop_addrx(ctx, a);
+        }
+    } else if (a->size > MO_32) {
         return gen_illegal(ctx);
-    } else {
-        return do_load(ctx, a->t, a->b, a->x, a->scale ? a->size : 0,
-                   a->disp, a->sp, a->m, a->size | MO_TE);
     }
+    return do_load(ctx, a->t, a->b, a->x, a->scale ? a->size : 0,
+                   a->disp, a->sp, a->m, a->size | MO_TE);
 }
 
 static bool trans_st(DisasContext *ctx, arg_ldst *a)
 {
     assert(a->x == 0 && a->scale == 0);
-    if (unlikely(TARGET_REGISTER_BITS == 32 && a->size > MO_32)) {
+    if (!ctx->is_pa20 && a->size > MO_32) {
         return gen_illegal(ctx);
-    } else {
-        return do_store(ctx, a->t, a->b, a->disp, a->sp, a->m, a->size | MO_TE);
     }
+    return do_store(ctx, a->t, a->b, a->disp, a->sp, a->m, a->size | MO_TE);
 }
 
 static bool trans_ldc(DisasContext *ctx, arg_ldst *a)
 {
     MemOp mop = MO_TE | MO_ALIGN | a->size;
-    TCGv_reg zero, dest, ofs;
-    TCGv_tl addr;
+    TCGv_i64 dest, ofs;
+    TCGv_i64 addr;
+
+    if (!ctx->is_pa20 && a->size > MO_32) {
+        return gen_illegal(ctx);
+    }
 
     nullify_over(ctx);
 
     if (a->m) {
         /* Base register modification.  Make sure if RT == RB,
            we see the result of the load.  */
-        dest = get_temp(ctx);
+        dest = tcg_temp_new_i64();
     } else {
         dest = dest_gpr(ctx, a->t);
     }
@@ -2884,8 +3087,7 @@ static bool trans_ldc(DisasContext *ctx, arg_ldst *a)
      */
     gen_helper_ldc_check(addr);
 
-    zero = tcg_constant_reg(0);
-    tcg_gen_atomic_xchg_reg(dest, addr, zero, ctx->mmu_idx, mop);
+    tcg_gen_atomic_xchg_i64(dest, addr, ctx->zero, ctx->mmu_idx, mop);
 
     if (a->m) {
         save_gpr(ctx, a->b, ofs);
@@ -2897,8 +3099,8 @@ static bool trans_ldc(DisasContext *ctx, arg_ldst *a)
 
 static bool trans_stby(DisasContext *ctx, arg_stby *a)
 {
-    TCGv_reg ofs, val;
-    TCGv_tl addr;
+    TCGv_i64 ofs, val;
+    TCGv_i64 addr;
 
     nullify_over(ctx);
 
@@ -2919,7 +3121,41 @@ static bool trans_stby(DisasContext *ctx, arg_stby *a)
         }
     }
     if (a->m) {
-        tcg_gen_andi_reg(ofs, ofs, ~3);
+        tcg_gen_andi_i64(ofs, ofs, ~3);
+        save_gpr(ctx, a->b, ofs);
+    }
+
+    return nullify_end(ctx);
+}
+
+static bool trans_stdby(DisasContext *ctx, arg_stby *a)
+{
+    TCGv_i64 ofs, val;
+    TCGv_i64 addr;
+
+    if (!ctx->is_pa20) {
+        return false;
+    }
+    nullify_over(ctx);
+
+    form_gva(ctx, &addr, &ofs, a->b, 0, 0, a->disp, a->sp, a->m,
+             ctx->mmu_idx == MMU_PHYS_IDX);
+    val = load_gpr(ctx, a->r);
+    if (a->a) {
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            gen_helper_stdby_e_parallel(tcg_env, addr, val);
+        } else {
+            gen_helper_stdby_e(tcg_env, addr, val);
+        }
+    } else {
+        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+            gen_helper_stdby_b_parallel(tcg_env, addr, val);
+        } else {
+            gen_helper_stdby_b(tcg_env, addr, val);
+        }
+    }
+    if (a->m) {
+        tcg_gen_andi_i64(ofs, ofs, ~7);
         save_gpr(ctx, a->b, ofs);
     }
 
@@ -2950,9 +3186,9 @@ static bool trans_sta(DisasContext *ctx, arg_ldst *a)
 
 static bool trans_ldil(DisasContext *ctx, arg_ldil *a)
 {
-    TCGv_reg tcg_rt = dest_gpr(ctx, a->t);
+    TCGv_i64 tcg_rt = dest_gpr(ctx, a->t);
 
-    tcg_gen_movi_reg(tcg_rt, a->i);
+    tcg_gen_movi_i64(tcg_rt, a->i);
     save_gpr(ctx, a->t, tcg_rt);
     cond_free(&ctx->null_cond);
     return true;
@@ -2960,10 +3196,10 @@ static bool trans_ldil(DisasContext *ctx, arg_ldil *a)
 
 static bool trans_addil(DisasContext *ctx, arg_addil *a)
 {
-    TCGv_reg tcg_rt = load_gpr(ctx, a->r);
-    TCGv_reg tcg_r1 = dest_gpr(ctx, 1);
+    TCGv_i64 tcg_rt = load_gpr(ctx, a->r);
+    TCGv_i64 tcg_r1 = dest_gpr(ctx, 1);
 
-    tcg_gen_addi_reg(tcg_r1, tcg_rt, a->i);
+    tcg_gen_addi_i64(tcg_r1, tcg_rt, a->i);
     save_gpr(ctx, 1, tcg_r1);
     cond_free(&ctx->null_cond);
     return true;
@@ -2971,75 +3207,100 @@ static bool trans_addil(DisasContext *ctx, arg_addil *a)
 
 static bool trans_ldo(DisasContext *ctx, arg_ldo *a)
 {
-    TCGv_reg tcg_rt = dest_gpr(ctx, a->t);
+    TCGv_i64 tcg_rt = dest_gpr(ctx, a->t);
 
     /* Special case rb == 0, for the LDI pseudo-op.
-       The COPY pseudo-op is handled for free within tcg_gen_addi_tl.  */
+       The COPY pseudo-op is handled for free within tcg_gen_addi_i64.  */
     if (a->b == 0) {
-        tcg_gen_movi_reg(tcg_rt, a->i);
+        tcg_gen_movi_i64(tcg_rt, a->i);
     } else {
-        tcg_gen_addi_reg(tcg_rt, cpu_gr[a->b], a->i);
+        tcg_gen_addi_i64(tcg_rt, cpu_gr[a->b], a->i);
     }
     save_gpr(ctx, a->t, tcg_rt);
     cond_free(&ctx->null_cond);
     return true;
 }
 
-static bool do_cmpb(DisasContext *ctx, unsigned r, TCGv_reg in1,
-                    unsigned c, unsigned f, unsigned n, int disp)
+static bool do_cmpb(DisasContext *ctx, unsigned r, TCGv_i64 in1,
+                    unsigned c, unsigned f, bool d, unsigned n, int disp)
 {
-    TCGv_reg dest, in2, sv;
+    TCGv_i64 dest, in2, sv;
     DisasCond cond;
 
     in2 = load_gpr(ctx, r);
-    dest = get_temp(ctx);
+    dest = tcg_temp_new_i64();
 
-    tcg_gen_sub_reg(dest, in1, in2);
+    tcg_gen_sub_i64(dest, in1, in2);
 
     sv = NULL;
     if (cond_need_sv(c)) {
         sv = do_sub_sv(ctx, dest, in1, in2);
     }
 
-    cond = do_sub_cond(c * 2 + f, dest, in1, in2, sv);
+    cond = do_sub_cond(ctx, c * 2 + f, d, dest, in1, in2, sv);
     return do_cbranch(ctx, disp, n, &cond);
 }
 
 static bool trans_cmpb(DisasContext *ctx, arg_cmpb *a)
 {
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     nullify_over(ctx);
-    return do_cmpb(ctx, a->r2, load_gpr(ctx, a->r1), a->c, a->f, a->n, a->disp);
+    return do_cmpb(ctx, a->r2, load_gpr(ctx, a->r1),
+                   a->c, a->f, a->d, a->n, a->disp);
 }
 
 static bool trans_cmpbi(DisasContext *ctx, arg_cmpbi *a)
 {
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     nullify_over(ctx);
-    return do_cmpb(ctx, a->r, load_const(ctx, a->i), a->c, a->f, a->n, a->disp);
+    return do_cmpb(ctx, a->r, tcg_constant_i64(a->i),
+                   a->c, a->f, a->d, a->n, a->disp);
 }
 
-static bool do_addb(DisasContext *ctx, unsigned r, TCGv_reg in1,
+static bool do_addb(DisasContext *ctx, unsigned r, TCGv_i64 in1,
                     unsigned c, unsigned f, unsigned n, int disp)
 {
-    TCGv_reg dest, in2, sv, cb_msb;
+    TCGv_i64 dest, in2, sv, cb_cond;
     DisasCond cond;
+    bool d = false;
+
+    /*
+     * For hppa64, the ADDB conditions change with PSW.W,
+     * dropping ZNV, SV, OD in favor of double-word EQ, LT, LE.
+     */
+    if (ctx->tb_flags & PSW_W) {
+        d = c >= 5;
+        if (d) {
+            c &= 3;
+        }
+    }
 
     in2 = load_gpr(ctx, r);
-    dest = tcg_temp_new();
+    dest = tcg_temp_new_i64();
     sv = NULL;
-    cb_msb = NULL;
+    cb_cond = NULL;
 
     if (cond_need_cb(c)) {
-        cb_msb = get_temp(ctx);
-        tcg_gen_movi_reg(cb_msb, 0);
-        tcg_gen_add2_reg(dest, cb_msb, in1, cb_msb, in2, cb_msb);
+        TCGv_i64 cb = tcg_temp_new_i64();
+        TCGv_i64 cb_msb = tcg_temp_new_i64();
+
+        tcg_gen_movi_i64(cb_msb, 0);
+        tcg_gen_add2_i64(dest, cb_msb, in1, cb_msb, in2, cb_msb);
+        tcg_gen_xor_i64(cb, in1, in2);
+        tcg_gen_xor_i64(cb, cb, dest);
+        cb_cond = get_carry(ctx, d, cb, cb_msb);
     } else {
-        tcg_gen_add_reg(dest, in1, in2);
+        tcg_gen_add_i64(dest, in1, in2);
     }
     if (cond_need_sv(c)) {
         sv = do_add_sv(ctx, dest, in1, in2);
     }
 
-    cond = do_cond(c * 2 + f, dest, cb_msb, sv);
+    cond = do_cond(ctx, c * 2 + f, d, dest, cb_cond, sv);
     save_gpr(ctx, r, dest);
     return do_cbranch(ctx, disp, n, &cond);
 }
@@ -3053,34 +3314,42 @@ static bool trans_addb(DisasContext *ctx, arg_addb *a)
 static bool trans_addbi(DisasContext *ctx, arg_addbi *a)
 {
     nullify_over(ctx);
-    return do_addb(ctx, a->r, load_const(ctx, a->i), a->c, a->f, a->n, a->disp);
+    return do_addb(ctx, a->r, tcg_constant_i64(a->i), a->c, a->f, a->n, a->disp);
 }
 
 static bool trans_bb_sar(DisasContext *ctx, arg_bb_sar *a)
 {
-    TCGv_reg tmp, tcg_r;
+    TCGv_i64 tmp, tcg_r;
     DisasCond cond;
 
     nullify_over(ctx);
 
-    tmp = tcg_temp_new();
+    tmp = tcg_temp_new_i64();
     tcg_r = load_gpr(ctx, a->r);
-    tcg_gen_shl_reg(tmp, tcg_r, cpu_sar);
+    if (cond_need_ext(ctx, a->d)) {
+        /* Force shift into [32,63] */
+        tcg_gen_ori_i64(tmp, cpu_sar, 32);
+        tcg_gen_shl_i64(tmp, tcg_r, tmp);
+    } else {
+        tcg_gen_shl_i64(tmp, tcg_r, cpu_sar);
+    }
 
-    cond = cond_make_0(a->c ? TCG_COND_GE : TCG_COND_LT, tmp);
+    cond = cond_make_0_tmp(a->c ? TCG_COND_GE : TCG_COND_LT, tmp);
     return do_cbranch(ctx, a->disp, a->n, &cond);
 }
 
 static bool trans_bb_imm(DisasContext *ctx, arg_bb_imm *a)
 {
-    TCGv_reg tmp, tcg_r;
+    TCGv_i64 tmp, tcg_r;
     DisasCond cond;
+    int p;
 
     nullify_over(ctx);
 
-    tmp = tcg_temp_new();
+    tmp = tcg_temp_new_i64();
     tcg_r = load_gpr(ctx, a->r);
-    tcg_gen_shli_reg(tmp, tcg_r, a->p);
+    p = a->p | (cond_need_ext(ctx, a->d) ? 32 : 0);
+    tcg_gen_shli_i64(tmp, tcg_r, p);
 
     cond = cond_make_0(a->c ? TCG_COND_GE : TCG_COND_LT, tmp);
     return do_cbranch(ctx, a->disp, a->n, &cond);
@@ -3088,178 +3357,246 @@ static bool trans_bb_imm(DisasContext *ctx, arg_bb_imm *a)
 
 static bool trans_movb(DisasContext *ctx, arg_movb *a)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
     DisasCond cond;
 
     nullify_over(ctx);
 
     dest = dest_gpr(ctx, a->r2);
     if (a->r1 == 0) {
-        tcg_gen_movi_reg(dest, 0);
+        tcg_gen_movi_i64(dest, 0);
     } else {
-        tcg_gen_mov_reg(dest, cpu_gr[a->r1]);
+        tcg_gen_mov_i64(dest, cpu_gr[a->r1]);
     }
 
-    cond = do_sed_cond(a->c, dest);
+    /* All MOVB conditions are 32-bit. */
+    cond = do_sed_cond(ctx, a->c, false, dest);
     return do_cbranch(ctx, a->disp, a->n, &cond);
 }
 
 static bool trans_movbi(DisasContext *ctx, arg_movbi *a)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
     DisasCond cond;
 
     nullify_over(ctx);
 
     dest = dest_gpr(ctx, a->r);
-    tcg_gen_movi_reg(dest, a->i);
+    tcg_gen_movi_i64(dest, a->i);
 
-    cond = do_sed_cond(a->c, dest);
+    /* All MOVBI conditions are 32-bit. */
+    cond = do_sed_cond(ctx, a->c, false, dest);
     return do_cbranch(ctx, a->disp, a->n, &cond);
 }
 
-static bool trans_shrpw_sar(DisasContext *ctx, arg_shrpw_sar *a)
+static bool trans_shrp_sar(DisasContext *ctx, arg_shrp_sar *a)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest, src2;
 
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
 
     dest = dest_gpr(ctx, a->t);
+    src2 = load_gpr(ctx, a->r2);
     if (a->r1 == 0) {
-        tcg_gen_ext32u_reg(dest, load_gpr(ctx, a->r2));
-        tcg_gen_shr_reg(dest, dest, cpu_sar);
+        if (a->d) {
+            tcg_gen_shr_i64(dest, src2, cpu_sar);
+        } else {
+            TCGv_i64 tmp = tcg_temp_new_i64();
+
+            tcg_gen_ext32u_i64(dest, src2);
+            tcg_gen_andi_i64(tmp, cpu_sar, 31);
+            tcg_gen_shr_i64(dest, dest, tmp);
+        }
     } else if (a->r1 == a->r2) {
-        TCGv_i32 t32 = tcg_temp_new_i32();
-        tcg_gen_trunc_reg_i32(t32, load_gpr(ctx, a->r2));
-        tcg_gen_rotr_i32(t32, t32, cpu_sar);
-        tcg_gen_extu_i32_reg(dest, t32);
+        if (a->d) {
+            tcg_gen_rotr_i64(dest, src2, cpu_sar);
+        } else {
+            TCGv_i32 t32 = tcg_temp_new_i32();
+            TCGv_i32 s32 = tcg_temp_new_i32();
+
+            tcg_gen_extrl_i64_i32(t32, src2);
+            tcg_gen_extrl_i64_i32(s32, cpu_sar);
+            tcg_gen_andi_i32(s32, s32, 31);
+            tcg_gen_rotr_i32(t32, t32, s32);
+            tcg_gen_extu_i32_i64(dest, t32);
+        }
     } else {
-        TCGv_i64 t = tcg_temp_new_i64();
-        TCGv_i64 s = tcg_temp_new_i64();
+        TCGv_i64 src1 = load_gpr(ctx, a->r1);
+
+        if (a->d) {
+            TCGv_i64 t = tcg_temp_new_i64();
+            TCGv_i64 n = tcg_temp_new_i64();
+
+            tcg_gen_xori_i64(n, cpu_sar, 63);
+            tcg_gen_shl_i64(t, src2, n);
+            tcg_gen_shli_i64(t, t, 1);
+            tcg_gen_shr_i64(dest, src1, cpu_sar);
+            tcg_gen_or_i64(dest, dest, t);
+        } else {
+            TCGv_i64 t = tcg_temp_new_i64();
+            TCGv_i64 s = tcg_temp_new_i64();
 
-        tcg_gen_concat_reg_i64(t, load_gpr(ctx, a->r2), load_gpr(ctx, a->r1));
-        tcg_gen_extu_reg_i64(s, cpu_sar);
-        tcg_gen_shr_i64(t, t, s);
-        tcg_gen_trunc_i64_reg(dest, t);
+            tcg_gen_concat32_i64(t, src2, src1);
+            tcg_gen_andi_i64(s, cpu_sar, 31);
+            tcg_gen_shr_i64(dest, t, s);
+        }
     }
     save_gpr(ctx, a->t, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (a->c) {
-        ctx->null_cond = do_sed_cond(a->c, dest);
+        ctx->null_cond = do_sed_cond(ctx, a->c, false, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool trans_shrpw_imm(DisasContext *ctx, arg_shrpw_imm *a)
+static bool trans_shrp_imm(DisasContext *ctx, arg_shrp_imm *a)
 {
-    unsigned sa = 31 - a->cpos;
-    TCGv_reg dest, t2;
+    unsigned width, sa;
+    TCGv_i64 dest, t2;
 
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
 
+    width = a->d ? 64 : 32;
+    sa = width - 1 - a->cpos;
+
     dest = dest_gpr(ctx, a->t);
     t2 = load_gpr(ctx, a->r2);
     if (a->r1 == 0) {
-        tcg_gen_extract_reg(dest, t2, sa, 32 - sa);
-    } else if (TARGET_REGISTER_BITS == 32) {
-        tcg_gen_extract2_reg(dest, t2, cpu_gr[a->r1], sa);
-    } else if (a->r1 == a->r2) {
-        TCGv_i32 t32 = tcg_temp_new_i32();
-        tcg_gen_trunc_reg_i32(t32, t2);
-        tcg_gen_rotri_i32(t32, t32, sa);
-        tcg_gen_extu_i32_reg(dest, t32);
+        tcg_gen_extract_i64(dest, t2, sa, width - sa);
+    } else if (width == TARGET_LONG_BITS) {
+        tcg_gen_extract2_i64(dest, t2, cpu_gr[a->r1], sa);
     } else {
-        TCGv_i64 t64 = tcg_temp_new_i64();
-        tcg_gen_concat_reg_i64(t64, t2, cpu_gr[a->r1]);
-        tcg_gen_shri_i64(t64, t64, sa);
-        tcg_gen_trunc_i64_reg(dest, t64);
+        assert(!a->d);
+        if (a->r1 == a->r2) {
+            TCGv_i32 t32 = tcg_temp_new_i32();
+            tcg_gen_extrl_i64_i32(t32, t2);
+            tcg_gen_rotri_i32(t32, t32, sa);
+            tcg_gen_extu_i32_i64(dest, t32);
+        } else {
+            tcg_gen_concat32_i64(dest, t2, cpu_gr[a->r1]);
+            tcg_gen_extract_i64(dest, dest, sa, 32);
+        }
     }
     save_gpr(ctx, a->t, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (a->c) {
-        ctx->null_cond = do_sed_cond(a->c, dest);
+        ctx->null_cond = do_sed_cond(ctx, a->c, false, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool trans_extrw_sar(DisasContext *ctx, arg_extrw_sar *a)
+static bool trans_extr_sar(DisasContext *ctx, arg_extr_sar *a)
 {
-    unsigned len = 32 - a->clen;
-    TCGv_reg dest, src, tmp;
+    unsigned widthm1 = a->d ? 63 : 31;
+    TCGv_i64 dest, src, tmp;
 
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
 
     dest = dest_gpr(ctx, a->t);
     src = load_gpr(ctx, a->r);
-    tmp = tcg_temp_new();
+    tmp = tcg_temp_new_i64();
 
     /* Recall that SAR is using big-endian bit numbering.  */
-    tcg_gen_xori_reg(tmp, cpu_sar, TARGET_REGISTER_BITS - 1);
+    tcg_gen_andi_i64(tmp, cpu_sar, widthm1);
+    tcg_gen_xori_i64(tmp, tmp, widthm1);
+
     if (a->se) {
-        tcg_gen_sar_reg(dest, src, tmp);
-        tcg_gen_sextract_reg(dest, dest, 0, len);
+        if (!a->d) {
+            tcg_gen_ext32s_i64(dest, src);
+            src = dest;
+        }
+        tcg_gen_sar_i64(dest, src, tmp);
+        tcg_gen_sextract_i64(dest, dest, 0, a->len);
     } else {
-        tcg_gen_shr_reg(dest, src, tmp);
-        tcg_gen_extract_reg(dest, dest, 0, len);
+        if (!a->d) {
+            tcg_gen_ext32u_i64(dest, src);
+            src = dest;
+        }
+        tcg_gen_shr_i64(dest, src, tmp);
+        tcg_gen_extract_i64(dest, dest, 0, a->len);
     }
     save_gpr(ctx, a->t, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (a->c) {
-        ctx->null_cond = do_sed_cond(a->c, dest);
+        ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool trans_extrw_imm(DisasContext *ctx, arg_extrw_imm *a)
+static bool trans_extr_imm(DisasContext *ctx, arg_extr_imm *a)
 {
-    unsigned len = 32 - a->clen;
-    unsigned cpos = 31 - a->pos;
-    TCGv_reg dest, src;
+    unsigned len, cpos, width;
+    TCGv_i64 dest, src;
 
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
 
+    len = a->len;
+    width = a->d ? 64 : 32;
+    cpos = width - 1 - a->pos;
+    if (cpos + len > width) {
+        len = width - cpos;
+    }
+
     dest = dest_gpr(ctx, a->t);
     src = load_gpr(ctx, a->r);
     if (a->se) {
-        tcg_gen_sextract_reg(dest, src, cpos, len);
+        tcg_gen_sextract_i64(dest, src, cpos, len);
     } else {
-        tcg_gen_extract_reg(dest, src, cpos, len);
+        tcg_gen_extract_i64(dest, src, cpos, len);
     }
     save_gpr(ctx, a->t, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (a->c) {
-        ctx->null_cond = do_sed_cond(a->c, dest);
+        ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool trans_depwi_imm(DisasContext *ctx, arg_depwi_imm *a)
+static bool trans_depi_imm(DisasContext *ctx, arg_depi_imm *a)
 {
-    unsigned len = 32 - a->clen;
-    target_sreg mask0, mask1;
-    TCGv_reg dest;
+    unsigned len, width;
+    uint64_t mask0, mask1;
+    TCGv_i64 dest;
 
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
-    if (a->cpos + len > 32) {
-        len = 32 - a->cpos;
+
+    len = a->len;
+    width = a->d ? 64 : 32;
+    if (a->cpos + len > width) {
+        len = width - a->cpos;
     }
 
     dest = dest_gpr(ctx, a->t);
@@ -3267,110 +3604,122 @@ static bool trans_depwi_imm(DisasContext *ctx, arg_depwi_imm *a)
     mask1 = deposit64(-1, a->cpos, len, a->i);
 
     if (a->nz) {
-        TCGv_reg src = load_gpr(ctx, a->t);
-        if (mask1 != -1) {
-            tcg_gen_andi_reg(dest, src, mask1);
-            src = dest;
-        }
-        tcg_gen_ori_reg(dest, src, mask0);
+        TCGv_i64 src = load_gpr(ctx, a->t);
+        tcg_gen_andi_i64(dest, src, mask1);
+        tcg_gen_ori_i64(dest, dest, mask0);
     } else {
-        tcg_gen_movi_reg(dest, mask0);
+        tcg_gen_movi_i64(dest, mask0);
     }
     save_gpr(ctx, a->t, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (a->c) {
-        ctx->null_cond = do_sed_cond(a->c, dest);
+        ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool trans_depw_imm(DisasContext *ctx, arg_depw_imm *a)
+static bool trans_dep_imm(DisasContext *ctx, arg_dep_imm *a)
 {
     unsigned rs = a->nz ? a->t : 0;
-    unsigned len = 32 - a->clen;
-    TCGv_reg dest, val;
+    unsigned len, width;
+    TCGv_i64 dest, val;
 
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
-    if (a->cpos + len > 32) {
-        len = 32 - a->cpos;
+
+    len = a->len;
+    width = a->d ? 64 : 32;
+    if (a->cpos + len > width) {
+        len = width - a->cpos;
     }
 
     dest = dest_gpr(ctx, a->t);
     val = load_gpr(ctx, a->r);
     if (rs == 0) {
-        tcg_gen_deposit_z_reg(dest, val, a->cpos, len);
+        tcg_gen_deposit_z_i64(dest, val, a->cpos, len);
     } else {
-        tcg_gen_deposit_reg(dest, cpu_gr[rs], val, a->cpos, len);
+        tcg_gen_deposit_i64(dest, cpu_gr[rs], val, a->cpos, len);
     }
     save_gpr(ctx, a->t, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (a->c) {
-        ctx->null_cond = do_sed_cond(a->c, dest);
+        ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool do_depw_sar(DisasContext *ctx, unsigned rt, unsigned c,
-                        unsigned nz, unsigned clen, TCGv_reg val)
+static bool do_dep_sar(DisasContext *ctx, unsigned rt, unsigned c,
+                       bool d, bool nz, unsigned len, TCGv_i64 val)
 {
     unsigned rs = nz ? rt : 0;
-    unsigned len = 32 - clen;
-    TCGv_reg mask, tmp, shift, dest;
-    unsigned msb = 1U << (len - 1);
+    unsigned widthm1 = d ? 63 : 31;
+    TCGv_i64 mask, tmp, shift, dest;
+    uint64_t msb = 1ULL << (len - 1);
 
     dest = dest_gpr(ctx, rt);
-    shift = tcg_temp_new();
-    tmp = tcg_temp_new();
+    shift = tcg_temp_new_i64();
+    tmp = tcg_temp_new_i64();
 
     /* Convert big-endian bit numbering in SAR to left-shift.  */
-    tcg_gen_xori_reg(shift, cpu_sar, TARGET_REGISTER_BITS - 1);
+    tcg_gen_andi_i64(shift, cpu_sar, widthm1);
+    tcg_gen_xori_i64(shift, shift, widthm1);
 
-    mask = tcg_temp_new();
-    tcg_gen_movi_reg(mask, msb + (msb - 1));
-    tcg_gen_and_reg(tmp, val, mask);
+    mask = tcg_temp_new_i64();
+    tcg_gen_movi_i64(mask, msb + (msb - 1));
+    tcg_gen_and_i64(tmp, val, mask);
     if (rs) {
-        tcg_gen_shl_reg(mask, mask, shift);
-        tcg_gen_shl_reg(tmp, tmp, shift);
-        tcg_gen_andc_reg(dest, cpu_gr[rs], mask);
-        tcg_gen_or_reg(dest, dest, tmp);
+        tcg_gen_shl_i64(mask, mask, shift);
+        tcg_gen_shl_i64(tmp, tmp, shift);
+        tcg_gen_andc_i64(dest, cpu_gr[rs], mask);
+        tcg_gen_or_i64(dest, dest, tmp);
     } else {
-        tcg_gen_shl_reg(dest, tmp, shift);
+        tcg_gen_shl_i64(dest, tmp, shift);
     }
     save_gpr(ctx, rt, dest);
 
     /* Install the new nullification.  */
     cond_free(&ctx->null_cond);
     if (c) {
-        ctx->null_cond = do_sed_cond(c, dest);
+        ctx->null_cond = do_sed_cond(ctx, c, d, dest);
     }
     return nullify_end(ctx);
 }
 
-static bool trans_depw_sar(DisasContext *ctx, arg_depw_sar *a)
+static bool trans_dep_sar(DisasContext *ctx, arg_dep_sar *a)
 {
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
-    return do_depw_sar(ctx, a->t, a->c, a->nz, a->clen, load_gpr(ctx, a->r));
+    return do_dep_sar(ctx, a->t, a->c, a->d, a->nz, a->len,
+                      load_gpr(ctx, a->r));
 }
 
-static bool trans_depwi_sar(DisasContext *ctx, arg_depwi_sar *a)
+static bool trans_depi_sar(DisasContext *ctx, arg_depi_sar *a)
 {
+    if (!ctx->is_pa20 && a->d) {
+        return false;
+    }
     if (a->c) {
         nullify_over(ctx);
     }
-    return do_depw_sar(ctx, a->t, a->c, a->nz, a->clen, load_const(ctx, a->i));
+    return do_dep_sar(ctx, a->t, a->c, a->d, a->nz, a->len,
+                      tcg_constant_i64(a->i));
 }
 
 static bool trans_be(DisasContext *ctx, arg_be *a)
 {
-    TCGv_reg tmp;
+    TCGv_i64 tmp;
 
 #ifdef CONFIG_USER_ONLY
     /* ??? It seems like there should be a good way of using
@@ -3388,8 +3737,8 @@ static bool trans_be(DisasContext *ctx, arg_be *a)
     nullify_over(ctx);
 #endif
 
-    tmp = get_temp(ctx);
-    tcg_gen_addi_reg(tmp, load_gpr(ctx, a->b), a->disp);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_addi_i64(tmp, load_gpr(ctx, a->b), a->disp);
     tmp = do_ibranch_priv(ctx, tmp);
 
 #ifdef CONFIG_USER_ONLY
@@ -3399,20 +3748,21 @@ static bool trans_be(DisasContext *ctx, arg_be *a)
 
     load_spr(ctx, new_spc, a->sp);
     if (a->l) {
-        copy_iaoq_entry(cpu_gr[31], ctx->iaoq_n, ctx->iaoq_n_var);
+        copy_iaoq_entry(ctx, cpu_gr[31], ctx->iaoq_n, ctx->iaoq_n_var);
         tcg_gen_mov_i64(cpu_sr[0], cpu_iasq_f);
     }
     if (a->n && use_nullify_skip(ctx)) {
-        tcg_gen_mov_reg(cpu_iaoq_f, tmp);
-        tcg_gen_addi_reg(cpu_iaoq_b, cpu_iaoq_f, 4);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, -1, tmp);
+        tcg_gen_addi_i64(tmp, tmp, 4);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp);
         tcg_gen_mov_i64(cpu_iasq_f, new_spc);
         tcg_gen_mov_i64(cpu_iasq_b, cpu_iasq_f);
     } else {
-        copy_iaoq_entry(cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
         if (ctx->iaoq_b == -1) {
             tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b);
         }
-        tcg_gen_mov_reg(cpu_iaoq_b, tmp);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp);
         tcg_gen_mov_i64(cpu_iasq_b, new_spc);
         nullify_set(ctx, a->n);
     }
@@ -3429,7 +3779,7 @@ static bool trans_bl(DisasContext *ctx, arg_bl *a)
 
 static bool trans_b_gate(DisasContext *ctx, arg_b_gate *a)
 {
-    target_ureg dest = iaoq_dest(ctx, a->disp);
+    uint64_t dest = iaoq_dest(ctx, a->disp);
 
     nullify_over(ctx);
 
@@ -3471,11 +3821,11 @@ static bool trans_b_gate(DisasContext *ctx, arg_b_gate *a)
 #endif
 
     if (a->l) {
-        TCGv_reg tmp = dest_gpr(ctx, a->l);
+        TCGv_i64 tmp = dest_gpr(ctx, a->l);
         if (ctx->privilege < 3) {
-            tcg_gen_andi_reg(tmp, tmp, -4);
+            tcg_gen_andi_i64(tmp, tmp, -4);
         }
-        tcg_gen_ori_reg(tmp, tmp, ctx->privilege);
+        tcg_gen_ori_i64(tmp, tmp, ctx->privilege);
         save_gpr(ctx, a->l, tmp);
     }
 
@@ -3485,9 +3835,9 @@ static bool trans_b_gate(DisasContext *ctx, arg_b_gate *a)
 static bool trans_blr(DisasContext *ctx, arg_blr *a)
 {
     if (a->x) {
-        TCGv_reg tmp = get_temp(ctx);
-        tcg_gen_shli_reg(tmp, load_gpr(ctx, a->x), 3);
-        tcg_gen_addi_reg(tmp, tmp, ctx->iaoq_f + 8);
+        TCGv_i64 tmp = tcg_temp_new_i64();
+        tcg_gen_shli_i64(tmp, load_gpr(ctx, a->x), 3);
+        tcg_gen_addi_i64(tmp, tmp, ctx->iaoq_f + 8);
         /* The computation here never changes privilege level.  */
         return do_ibranch(ctx, tmp, a->l, a->n);
     } else {
@@ -3498,14 +3848,14 @@ static bool trans_blr(DisasContext *ctx, arg_blr *a)
 
 static bool trans_bv(DisasContext *ctx, arg_bv *a)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
 
     if (a->x == 0) {
         dest = load_gpr(ctx, a->b);
     } else {
-        dest = get_temp(ctx);
-        tcg_gen_shli_reg(dest, load_gpr(ctx, a->x), 3);
-        tcg_gen_add_reg(dest, dest, load_gpr(ctx, a->b));
+        dest = tcg_temp_new_i64();
+        tcg_gen_shli_i64(dest, load_gpr(ctx, a->x), 3);
+        tcg_gen_add_i64(dest, dest, load_gpr(ctx, a->b));
     }
     dest = do_ibranch_priv(ctx, dest);
     return do_ibranch(ctx, dest, 0, a->n);
@@ -3513,7 +3863,7 @@ static bool trans_bv(DisasContext *ctx, arg_bv *a)
 
 static bool trans_bve(DisasContext *ctx, arg_bve *a)
 {
-    TCGv_reg dest;
+    TCGv_i64 dest;
 
 #ifdef CONFIG_USER_ONLY
     dest = do_ibranch_priv(ctx, load_gpr(ctx, a->b));
@@ -3522,14 +3872,14 @@ static bool trans_bve(DisasContext *ctx, arg_bve *a)
     nullify_over(ctx);
     dest = do_ibranch_priv(ctx, load_gpr(ctx, a->b));
 
-    copy_iaoq_entry(cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
+    copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b);
     if (ctx->iaoq_b == -1) {
         tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b);
     }
-    copy_iaoq_entry(cpu_iaoq_b, -1, dest);
+    copy_iaoq_entry(ctx, cpu_iaoq_b, -1, dest);
     tcg_gen_mov_i64(cpu_iasq_b, space_select(ctx, 0, dest));
     if (a->l) {
-        copy_iaoq_entry(cpu_gr[a->l], ctx->iaoq_n, ctx->iaoq_n_var);
+        copy_iaoq_entry(ctx, cpu_gr[a->l], ctx->iaoq_n, ctx->iaoq_n_var);
     }
     nullify_set(ctx, a->n);
     tcg_gen_lookup_and_goto_ptr();
@@ -3538,6 +3888,12 @@ static bool trans_bve(DisasContext *ctx, arg_bve *a)
 #endif
 }
 
+static bool trans_nopbts(DisasContext *ctx, arg_nopbts *a)
+{
+    /* All branch target stack instructions implement as nop. */
+    return ctx->is_pa20;
+}
+
 /*
  * Float class 0
  */
@@ -3551,7 +3907,7 @@ static bool trans_fid_f(DisasContext *ctx, arg_fid_f *a)
 {
     uint64_t ret;
 
-    if (TARGET_REGISTER_BITS == 64) {
+    if (ctx->is_pa20) {
         ret = 0x13080000000000ULL; /* PA8700 (PCX-W2) */
     } else {
         ret = 0x0f080000000000ULL; /* PA7300LC (PCX-L2) */
@@ -3830,12 +4186,12 @@ static bool trans_fcmp_d(DisasContext *ctx, arg_fclass2 *a)
 
 static bool trans_ftest(DisasContext *ctx, arg_ftest *a)
 {
-    TCGv_reg t;
+    TCGv_i64 t;
 
     nullify_over(ctx);
 
-    t = get_temp(ctx);
-    tcg_gen_ld32u_reg(t, tcg_env, offsetof(CPUHPPAState, fr0_shadow));
+    t = tcg_temp_new_i64();
+    tcg_gen_ld32u_i64(t, tcg_env, offsetof(CPUHPPAState, fr0_shadow));
 
     if (a->y == 1) {
         int mask;
@@ -3843,7 +4199,7 @@ static bool trans_ftest(DisasContext *ctx, arg_ftest *a)
 
         switch (a->c) {
         case 0: /* simple */
-            tcg_gen_andi_reg(t, t, 0x4000000);
+            tcg_gen_andi_i64(t, t, 0x4000000);
             ctx->null_cond = cond_make_0(TCG_COND_NE, t);
             goto done;
         case 2: /* rej */
@@ -3872,17 +4228,17 @@ static bool trans_ftest(DisasContext *ctx, arg_ftest *a)
             return true;
         }
         if (inv) {
-            TCGv_reg c = load_const(ctx, mask);
-            tcg_gen_or_reg(t, t, c);
+            TCGv_i64 c = tcg_constant_i64(mask);
+            tcg_gen_or_i64(t, t, c);
             ctx->null_cond = cond_make(TCG_COND_EQ, t, c);
         } else {
-            tcg_gen_andi_reg(t, t, mask);
+            tcg_gen_andi_i64(t, t, mask);
             ctx->null_cond = cond_make_0(TCG_COND_EQ, t);
         }
     } else {
         unsigned cbit = (a->y ^ 1) - 1;
 
-        tcg_gen_extract_reg(t, t, 21 - cbit, 1);
+        tcg_gen_extract_i64(t, t, 21 - cbit, 1);
         ctx->null_cond = cond_make_0(TCG_COND_NE, t);
     }
 
@@ -4062,6 +4418,7 @@ static void hppa_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 
     ctx->cs = cs;
     ctx->tb_flags = ctx->base.tb->flags;
+    ctx->is_pa20 = hppa_is_pa20(cpu_env(cs));
 
 #ifdef CONFIG_USER_ONLY
     ctx->privilege = MMU_IDX_TO_PRIV(MMU_USER_IDX);
@@ -4071,8 +4428,9 @@ static void hppa_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->unalign = (ctx->tb_flags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN);
 #else
     ctx->privilege = (ctx->tb_flags >> TB_FLAG_PRIV_SHIFT) & 3;
-    ctx->mmu_idx = (ctx->tb_flags & PSW_D ?
-                    PRIV_TO_MMU_IDX(ctx->privilege) : MMU_PHYS_IDX);
+    ctx->mmu_idx = (ctx->tb_flags & PSW_D
+                    ? PRIV_P_TO_MMU_IDX(ctx->privilege, ctx->tb_flags & PSW_P)
+                    : MMU_PHYS_IDX);
 
     /* Recover the IAOQ values from the GVA + PRIV.  */
     uint64_t cs_base = ctx->base.tb->cs_base;
@@ -4085,14 +4443,11 @@ static void hppa_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     ctx->iaoq_n = -1;
     ctx->iaoq_n_var = NULL;
 
+    ctx->zero = tcg_constant_i64(0);
+
     /* Bound the number of instructions by those left on the page.  */
     bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
     ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
-
-    ctx->ntempr = 0;
-    ctx->ntempl = 0;
-    memset(ctx->tempr, 0, sizeof(ctx->tempr));
-    memset(ctx->templ, 0, sizeof(ctx->templ));
 }
 
 static void hppa_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
@@ -4113,7 +4468,8 @@ static void hppa_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
-    tcg_gen_insn_start(ctx->iaoq_f, ctx->iaoq_b);
+    tcg_gen_insn_start(ctx->iaoq_f, ctx->iaoq_b, 0);
+    ctx->insn_start = tcg_last_op();
 }
 
 static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
@@ -4121,7 +4477,6 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUHPPAState *env = cpu_env(cs);
     DisasJumpType ret;
-    int i, n;
 
     /* Execute one insn.  */
 #ifdef CONFIG_USER_ONLY
@@ -4140,8 +4495,8 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
            This will be overwritten by a branch.  */
         if (ctx->iaoq_b == -1) {
             ctx->iaoq_n = -1;
-            ctx->iaoq_n_var = get_temp(ctx);
-            tcg_gen_addi_reg(ctx->iaoq_n_var, cpu_iaoq_b, 4);
+            ctx->iaoq_n_var = tcg_temp_new_i64();
+            tcg_gen_addi_i64(ctx->iaoq_n_var, cpu_iaoq_b, 4);
         } else {
             ctx->iaoq_n = ctx->iaoq_b + 4;
             ctx->iaoq_n_var = NULL;
@@ -4160,16 +4515,6 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
         }
     }
 
-    /* Forget any temporaries allocated.  */
-    for (i = 0, n = ctx->ntempr; i < n; ++i) {
-        ctx->tempr[i] = NULL;
-    }
-    for (i = 0, n = ctx->ntempl; i < n; ++i) {
-        ctx->templ[i] = NULL;
-    }
-    ctx->ntempr = 0;
-    ctx->ntempl = 0;
-
     /* Advance the insn queue.  Note that this check also detects
        a priority change within the instruction queue.  */
     if (ret == DISAS_NEXT && ctx->iaoq_b != ctx->iaoq_f + 4) {
@@ -4197,8 +4542,8 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     case DISAS_IAQ_N_STALE:
     case DISAS_IAQ_N_STALE_EXIT:
         if (ctx->iaoq_f == -1) {
-            tcg_gen_mov_reg(cpu_iaoq_f, cpu_iaoq_b);
-            copy_iaoq_entry(cpu_iaoq_b, ctx->iaoq_n, ctx->iaoq_n_var);
+            copy_iaoq_entry(ctx, cpu_iaoq_f, -1, cpu_iaoq_b);
+            copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_n, ctx->iaoq_n_var);
 #ifndef CONFIG_USER_ONLY
             tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b);
 #endif
@@ -4207,7 +4552,7 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
                                 ? DISAS_EXIT
                                 : DISAS_IAQ_N_UPDATED);
         } else if (ctx->iaoq_b == -1) {
-            tcg_gen_mov_reg(cpu_iaoq_b, ctx->iaoq_n_var);
+            copy_iaoq_entry(ctx, cpu_iaoq_b, -1, ctx->iaoq_n_var);
         }
         break;
 
@@ -4227,8 +4572,8 @@ static void hppa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     case DISAS_TOO_MANY:
     case DISAS_IAQ_N_STALE:
     case DISAS_IAQ_N_STALE_EXIT:
-        copy_iaoq_entry(cpu_iaoq_f, ctx->iaoq_f, cpu_iaoq_f);
-        copy_iaoq_entry(cpu_iaoq_b, ctx->iaoq_b, cpu_iaoq_b);
+        copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_f, cpu_iaoq_f);
+        copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_b, cpu_iaoq_b);
         nullify_save(ctx);
         /* FALLTHRU */
     case DISAS_IAQ_N_UPDATED:
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 770e81d56e..11b8177eff 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1837,6 +1837,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
                 c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
                 c->ebx = cs->cpu_index;
             }
+
+            if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
+                c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
+            }
         }
 
         r = kvm_xen_init_vcpu(cs);
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 76348f9d5d..c0631f9cf4 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -28,6 +28,7 @@
 #include "hw/i386/kvm/xen_overlay.h"
 #include "hw/i386/kvm/xen_evtchn.h"
 #include "hw/i386/kvm/xen_gnttab.h"
+#include "hw/i386/kvm/xen_primary_console.h"
 #include "hw/i386/kvm/xen_xenstore.h"
 
 #include "hw/xen/interface/version.h"
@@ -182,7 +183,8 @@ int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
         return ret;
     }
 
-    /* The page couldn't be overlaid until KVM was initialized */
+    /* The pages couldn't be overlaid until KVM was initialized */
+    xen_primary_console_reset();
     xen_xenstore_reset();
 
     return 0;
@@ -267,7 +269,6 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
             fi.submap |= 1 << XENFEAT_writable_page_tables |
                          1 << XENFEAT_writable_descriptor_tables |
                          1 << XENFEAT_auto_translated_physmap |
-                         1 << XENFEAT_supervisor_mode_kernel |
                          1 << XENFEAT_hvm_callback_vector |
                          1 << XENFEAT_hvm_safe_pvclock |
                          1 << XENFEAT_hvm_pirqs;
@@ -307,7 +308,7 @@ static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
 
     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
 
-    return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
+    return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
 }
 
 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
@@ -425,6 +426,13 @@ void kvm_xen_set_callback_asserted(void)
     }
 }
 
+bool kvm_xen_has_vcpu_callback_vector(void)
+{
+    CPUState *cs = qemu_get_cpu(0);
+
+    return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
+}
+
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 {
     CPUState *cs = qemu_get_cpu(vcpu_id);
@@ -441,7 +449,8 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
          * deliver it as an MSI.
          */
         MSIMessage msg = {
-            .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
+            .address = APIC_DEFAULT_ADDRESS |
+                       (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
         };
         kvm_irqchip_send_msi(kvm_state, msg);
@@ -805,11 +814,23 @@ static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
     case HVM_PARAM_STORE_EVTCHN:
         hp.value = xen_xenstore_get_port();
         break;
+    case HVM_PARAM_CONSOLE_PFN:
+        hp.value = xen_primary_console_get_pfn();
+        if (!hp.value) {
+            err = -EINVAL;
+        }
+        break;
+    case HVM_PARAM_CONSOLE_EVTCHN:
+        hp.value = xen_primary_console_get_port();
+        if (!hp.value) {
+            err = -EINVAL;
+        }
+        break;
     default:
         return false;
     }
 
-    if (kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
+    if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
         err = -EFAULT;
     }
 out:
@@ -850,8 +871,7 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
     int ret = -ENOSYS;
     switch (cmd) {
     case HVMOP_set_evtchn_upcall_vector:
-        ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
-                                                 exit->u.hcall.params[0]);
+        ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
         break;
 
     case HVMOP_pagetable_dying:
@@ -1071,17 +1091,13 @@ static int vcpuop_stop_periodic_timer(CPUState *target)
  * Must always be called with xen_timers_lock held.
  */
 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
-                                   bool future, bool linux_wa)
+                                   bool linux_wa)
 {
     CPUX86State *env = &X86_CPU(cs)->env;
     int64_t now = kvm_get_current_ns();
     int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     int64_t delta = timeout_abs - now;
 
-    if (future && timeout_abs < now) {
-        return -ETIME;
-    }
-
     if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
                              (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
         /*
@@ -1123,9 +1139,13 @@ static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
     }
 
     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
-    return do_set_singleshot_timer(cs, sst.timeout_abs_ns,
-                                   !!(sst.flags & VCPU_SSHOTTMR_future),
-                                   false);
+
+    /*
+     * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does.
+     * The only guest that ever used it, got it wrong.
+     * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909
+     */
+    return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false);
 }
 
 static int vcpuop_stop_singleshot_timer(CPUState *cs)
@@ -1150,7 +1170,7 @@ static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
         err = vcpuop_stop_singleshot_timer(CPU(cpu));
     } else {
         QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
-        err = do_set_singleshot_timer(CPU(cpu), timeout, false, true);
+        err = do_set_singleshot_timer(CPU(cpu), timeout, true);
     }
     exit->u.hcall.result = err;
     return true;
@@ -1421,6 +1441,11 @@ int kvm_xen_soft_reset(void)
         return err;
     }
 
+    err = xen_primary_console_reset();
+    if (err) {
+        return err;
+    }
+
     err = xen_xenstore_reset();
     if (err) {
         return err;
@@ -1838,7 +1863,7 @@ int kvm_put_xen_state(CPUState *cs)
         QEMU_LOCK_GUARD(&env->xen_timers_lock);
         if (env->xen_singleshot_timer_ns) {
             ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
-                                    false, false);
+                                          false);
             if (ret < 0) {
                 return ret;
             }
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ac4a6c7eec..02db2760d1 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -34,6 +34,7 @@
 #include "sysemu/kvm.h"
 #include "sysemu/tcg.h"
 #include "kvm/kvm_riscv.h"
+#include "tcg/tcg-cpu.h"
 #include "tcg/tcg.h"
 
 /* RISC-V CPU definitions */
@@ -76,13 +77,15 @@ const uint32_t misa_bits[] = {RVI, RVE, RVM, RVA, RVF, RVD, RVV,
  * instead.
  */
 const RISCVIsaExtData isa_edata_arr[] = {
-    ISA_EXT_DATA_ENTRY(zicbom, PRIV_VERSION_1_12_0, ext_icbom),
-    ISA_EXT_DATA_ENTRY(zicboz, PRIV_VERSION_1_12_0, ext_icboz),
+    ISA_EXT_DATA_ENTRY(zicbom, PRIV_VERSION_1_12_0, ext_zicbom),
+    ISA_EXT_DATA_ENTRY(zicboz, PRIV_VERSION_1_12_0, ext_zicboz),
     ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
-    ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
-    ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
+    ISA_EXT_DATA_ENTRY(zicntr, PRIV_VERSION_1_12_0, ext_zicntr),
+    ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_zicsr),
+    ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_zifencei),
     ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl),
     ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause),
+    ISA_EXT_DATA_ENTRY(zihpm, PRIV_VERSION_1_12_0, ext_zihpm),
     ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
     ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
     ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa),
@@ -124,16 +127,24 @@ const RISCVIsaExtData isa_edata_arr[] = {
     ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
     ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
     ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin),
+    ISA_EXT_DATA_ENTRY(zvkb, PRIV_VERSION_1_12_0, ext_zvkb),
     ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg),
+    ISA_EXT_DATA_ENTRY(zvkn, PRIV_VERSION_1_12_0, ext_zvkn),
+    ISA_EXT_DATA_ENTRY(zvknc, PRIV_VERSION_1_12_0, ext_zvknc),
     ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned),
+    ISA_EXT_DATA_ENTRY(zvkng, PRIV_VERSION_1_12_0, ext_zvkng),
     ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha),
     ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb),
+    ISA_EXT_DATA_ENTRY(zvks, PRIV_VERSION_1_12_0, ext_zvks),
+    ISA_EXT_DATA_ENTRY(zvksc, PRIV_VERSION_1_12_0, ext_zvksc),
     ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed),
+    ISA_EXT_DATA_ENTRY(zvksg, PRIV_VERSION_1_12_0, ext_zvksg),
     ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh),
+    ISA_EXT_DATA_ENTRY(zvkt, PRIV_VERSION_1_12_0, ext_zvkt),
     ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
     ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
     ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia),
-    ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp),
+    ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, ext_smepmp),
     ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen),
     ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia),
     ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf),
@@ -382,8 +393,8 @@ static void riscv_any_cpu_init(Object *obj)
     env->priv_ver = PRIV_VERSION_LATEST;
 
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.mmu = true;
     cpu->cfg.pmp = true;
 }
@@ -430,8 +441,8 @@ static void rv64_sifive_u_cpu_init(Object *obj)
 #endif
 
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.mmu = true;
     cpu->cfg.pmp = true;
 }
@@ -448,8 +459,8 @@ static void rv64_sifive_e_cpu_init(Object *obj)
 #endif
 
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.pmp = true;
 }
 
@@ -494,13 +505,13 @@ static void rv64_veyron_v1_cpu_init(Object *obj)
 
     /* Enable ISA extensions */
     cpu->cfg.mmu = true;
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.pmp = true;
-    cpu->cfg.ext_icbom = true;
+    cpu->cfg.ext_zicbom = true;
     cpu->cfg.cbom_blocksize = 64;
     cpu->cfg.cboz_blocksize = 64;
-    cpu->cfg.ext_icboz = true;
+    cpu->cfg.ext_zicboz = true;
     cpu->cfg.ext_smaia = true;
     cpu->cfg.ext_ssaia = true;
     cpu->cfg.ext_sscofpmf = true;
@@ -566,8 +577,8 @@ static void rv32_sifive_u_cpu_init(Object *obj)
 #endif
 
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.mmu = true;
     cpu->cfg.pmp = true;
 }
@@ -584,8 +595,8 @@ static void rv32_sifive_e_cpu_init(Object *obj)
 #endif
 
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.pmp = true;
 }
 
@@ -595,16 +606,15 @@ static void rv32_ibex_cpu_init(Object *obj)
     RISCVCPU *cpu = RISCV_CPU(obj);
 
     riscv_cpu_set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU);
-    env->priv_ver = PRIV_VERSION_1_11_0;
+    env->priv_ver = PRIV_VERSION_1_12_0;
 #ifndef CONFIG_USER_ONLY
     set_satp_mode_max_supported(cpu, VM_1_10_MBARE);
 #endif
-    cpu->cfg.epmp = true;
-
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.pmp = true;
+    cpu->cfg.ext_smepmp = true;
 }
 
 static void rv32_imafcu_nommu_cpu_init(Object *obj)
@@ -619,8 +629,8 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj)
 #endif
 
     /* inherited from parent obj via riscv_cpu_init() */
-    cpu->cfg.ext_ifencei = true;
-    cpu->cfg.ext_icsr = true;
+    cpu->cfg.ext_zifencei = true;
+    cpu->cfg.ext_zicsr = true;
     cpu->cfg.pmp = true;
 }
 #endif
@@ -813,7 +823,9 @@ static bool riscv_cpu_has_work(CPUState *cs)
      * Definition of the WFI instruction requires it to ignore the privilege
      * mode and delegation registers, but respect individual enables
      */
-    return riscv_cpu_all_pending(env) != 0;
+    return riscv_cpu_all_pending(env) != 0 ||
+        riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE ||
+        riscv_cpu_vsirq_pending(env) != RISCV_EXCP_NONE;
 #else
     return true;
 #endif
@@ -882,6 +894,17 @@ static void riscv_cpu_reset_hold(Object *obj)
     }
     /* mmte is supposed to have pm.current hardwired to 1 */
     env->mmte |= (EXT_STATUS_INITIAL | MMTE_M_PM_CURRENT);
+
+    /*
+     * Clear mseccfg and unlock all the PMP entries upon reset.
+     * This is allowed as per the priv and smepmp specifications
+     * and is needed to clear stale entries across reboots.
+     */
+    if (riscv_cpu_cfg(env)->ext_smepmp) {
+        env->mseccfg = 0;
+    }
+
+    pmp_unlock_entries(env);
 #endif
     env->xl = riscv_cpu_mxl(env);
     riscv_cpu_update_mask(env);
@@ -996,11 +1019,24 @@ static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp)
 }
 #endif
 
-static void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
+void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
 {
-#ifndef CONFIG_USER_ONLY
     Error *local_err = NULL;
 
+    /*
+     * KVM accel does not have a specialized finalize()
+     * callback because its extensions are validated
+     * in the get()/set() callbacks of each property.
+     */
+    if (tcg_enabled()) {
+        riscv_tcg_cpu_finalize_features(cpu, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+#ifndef CONFIG_USER_ONLY
     riscv_cpu_satp_mode_finalize(cpu, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -1047,6 +1083,15 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
+bool riscv_cpu_accelerator_compatible(RISCVCPU *cpu)
+{
+    if (tcg_enabled()) {
+        return riscv_cpu_tcg_compatible(cpu);
+    }
+
+    return true;
+}
+
 #ifndef CONFIG_USER_ONLY
 static void cpu_riscv_get_satp(Object *obj, Visitor *v, const char *name,
                                void *opaque, Error **errp)
@@ -1173,6 +1218,16 @@ static void riscv_cpu_init(Object *obj)
     qdev_init_gpio_in(DEVICE(obj), riscv_cpu_set_irq,
                       IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX);
 #endif /* CONFIG_USER_ONLY */
+
+    /*
+     * The timer and performance counters extensions were supported
+     * in QEMU before they were added as discrete extensions in the
+     * ISA. To keep compatibility we'll always default them to 'true'
+     * for all CPUs. Each accelerator will decide what to do when
+     * users disable them.
+     */
+    RISCV_CPU(obj)->cfg.ext_zicntr = true;
+    RISCV_CPU(obj)->cfg.ext_zihpm = true;
 }
 
 typedef struct misa_ext_info {
@@ -1242,8 +1297,8 @@ const char *riscv_get_misa_ext_description(uint32_t bit)
 const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     /* Defaults for standard extensions */
     MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false),
-    MULTI_EXT_CFG_BOOL("zifencei", ext_ifencei, true),
-    MULTI_EXT_CFG_BOOL("zicsr", ext_icsr, true),
+    MULTI_EXT_CFG_BOOL("zifencei", ext_zifencei, true),
+    MULTI_EXT_CFG_BOOL("zicsr", ext_zicsr, true),
     MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true),
     MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true),
     MULTI_EXT_CFG_BOOL("zawrs", ext_zawrs, true),
@@ -1255,12 +1310,16 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
     MULTI_EXT_CFG_BOOL("sstc", ext_sstc, true),
 
+    MULTI_EXT_CFG_BOOL("smepmp", ext_smepmp, false),
     MULTI_EXT_CFG_BOOL("smstateen", ext_smstateen, false),
     MULTI_EXT_CFG_BOOL("svadu", ext_svadu, true),
     MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false),
     MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false),
     MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false),
 
+    MULTI_EXT_CFG_BOOL("zicntr", ext_zicntr, true),
+    MULTI_EXT_CFG_BOOL("zihpm", ext_zihpm, true),
+
     MULTI_EXT_CFG_BOOL("zba", ext_zba, true),
     MULTI_EXT_CFG_BOOL("zbb", ext_zbb, true),
     MULTI_EXT_CFG_BOOL("zbc", ext_zbc, true),
@@ -1284,8 +1343,8 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     MULTI_EXT_CFG_BOOL("zhinx", ext_zhinx, false),
     MULTI_EXT_CFG_BOOL("zhinxmin", ext_zhinxmin, false),
 
-    MULTI_EXT_CFG_BOOL("zicbom", ext_icbom, true),
-    MULTI_EXT_CFG_BOOL("zicboz", ext_icboz, true),
+    MULTI_EXT_CFG_BOOL("zicbom", ext_zicbom, true),
+    MULTI_EXT_CFG_BOOL("zicboz", ext_zicboz, true),
 
     MULTI_EXT_CFG_BOOL("zmmul", ext_zmmul, false),
 
@@ -1298,6 +1357,24 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
     MULTI_EXT_CFG_BOOL("zcmt", ext_zcmt, false),
     MULTI_EXT_CFG_BOOL("zicond", ext_zicond, false),
 
+    /* Vector cryptography extensions */
+    MULTI_EXT_CFG_BOOL("zvbb", ext_zvbb, false),
+    MULTI_EXT_CFG_BOOL("zvbc", ext_zvbc, false),
+    MULTI_EXT_CFG_BOOL("zvkb", ext_zvkg, false),
+    MULTI_EXT_CFG_BOOL("zvkg", ext_zvkg, false),
+    MULTI_EXT_CFG_BOOL("zvkned", ext_zvkned, false),
+    MULTI_EXT_CFG_BOOL("zvknha", ext_zvknha, false),
+    MULTI_EXT_CFG_BOOL("zvknhb", ext_zvknhb, false),
+    MULTI_EXT_CFG_BOOL("zvksed", ext_zvksed, false),
+    MULTI_EXT_CFG_BOOL("zvksh", ext_zvksh, false),
+    MULTI_EXT_CFG_BOOL("zvkt", ext_zvkt, false),
+    MULTI_EXT_CFG_BOOL("zvkn", ext_zvkn, false),
+    MULTI_EXT_CFG_BOOL("zvknc", ext_zvknc, false),
+    MULTI_EXT_CFG_BOOL("zvkng", ext_zvkng, false),
+    MULTI_EXT_CFG_BOOL("zvks", ext_zvks, false),
+    MULTI_EXT_CFG_BOOL("zvksc", ext_zvksc, false),
+    MULTI_EXT_CFG_BOOL("zvksg", ext_zvksg, false),
+
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1320,8 +1397,6 @@ const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
 
 /* These are experimental so mark with 'x-' */
 const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
-    /* ePMP 0.9.3 */
-    MULTI_EXT_CFG_BOOL("x-epmp", epmp, false),
     MULTI_EXT_CFG_BOOL("x-smaia", ext_smaia, false),
     MULTI_EXT_CFG_BOOL("x-ssaia", ext_ssaia, false),
 
@@ -1332,23 +1407,13 @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
     MULTI_EXT_CFG_BOOL("x-zvfbfmin", ext_zvfbfmin, false),
     MULTI_EXT_CFG_BOOL("x-zvfbfwma", ext_zvfbfwma, false),
 
-    /* Vector cryptography extensions */
-    MULTI_EXT_CFG_BOOL("x-zvbb", ext_zvbb, false),
-    MULTI_EXT_CFG_BOOL("x-zvbc", ext_zvbc, false),
-    MULTI_EXT_CFG_BOOL("x-zvkg", ext_zvkg, false),
-    MULTI_EXT_CFG_BOOL("x-zvkned", ext_zvkned, false),
-    MULTI_EXT_CFG_BOOL("x-zvknha", ext_zvknha, false),
-    MULTI_EXT_CFG_BOOL("x-zvknhb", ext_zvknhb, false),
-    MULTI_EXT_CFG_BOOL("x-zvksed", ext_zvksed, false),
-    MULTI_EXT_CFG_BOOL("x-zvksh", ext_zvksh, false),
-
     DEFINE_PROP_END_OF_LIST(),
 };
 
 /* Deprecated entries marked for future removal */
 const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = {
-    MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true),
-    MULTI_EXT_CFG_BOOL("Zicsr", ext_icsr, true),
+    MULTI_EXT_CFG_BOOL("Zifencei", ext_zifencei, true),
+    MULTI_EXT_CFG_BOOL("Zicsr", ext_zicsr, true),
     MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true),
     MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true),
     MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true),
@@ -1362,8 +1427,46 @@ const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static void prop_pmu_num_set(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    uint8_t pmu_num;
+
+    visit_type_uint8(v, name, &pmu_num, errp);
+
+    if (pmu_num > (RV_MAX_MHPMCOUNTERS - 3)) {
+        error_setg(errp, "Number of counters exceeds maximum available");
+        return;
+    }
+
+    if (pmu_num == 0) {
+        cpu->cfg.pmu_mask = 0;
+    } else {
+        cpu->cfg.pmu_mask = MAKE_64BIT_MASK(3, pmu_num);
+    }
+
+    warn_report("\"pmu-num\" property is deprecated; use \"pmu-mask\"");
+}
+
+static void prop_pmu_num_get(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    uint8_t pmu_num = ctpop32(cpu->cfg.pmu_mask);
+
+    visit_type_uint8(v, name, &pmu_num, errp);
+}
+
+const PropertyInfo prop_pmu_num = {
+    .name = "pmu-num",
+    .get = prop_pmu_num_get,
+    .set = prop_pmu_num_set,
+};
+
 Property riscv_cpu_options[] = {
-    DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
+    DEFINE_PROP_UINT32("pmu-mask", RISCVCPU, cfg.pmu_mask, MAKE_64BIT_MASK(3, 16)),
+    {.name = "pmu-num", .info = &prop_pmu_num}, /* Deprecated */
 
     DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
     DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index f8ffa5ee38..8efc4d83ec 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -202,6 +202,18 @@ struct CPUArchState {
     uint64_t mie;
     uint64_t mideleg;
 
+    /*
+     * When mideleg[i]=0 and mvien[i]=1, sie[i] is no more
+     * alias of mie[i] and needs to be maintained separatly.
+     */
+    uint64_t sie;
+
+    /*
+     * When hideleg[i]=0 and hvien[i]=1, vsie[i] is no more
+     * alias of sie[i] (mie[i]) and needs to be maintained separatly.
+     */
+    uint64_t vsie;
+
     target_ulong satp;   /* since: priv-1.10.0 */
     target_ulong stval;
     target_ulong medeleg;
@@ -222,6 +234,8 @@ struct CPUArchState {
     /* AIA CSRs */
     target_ulong miselect;
     target_ulong siselect;
+    uint64_t mvien;
+    uint64_t mvip;
 
     /* Hypervisor CSRs */
     target_ulong hstatus;
@@ -234,6 +248,14 @@ struct CPUArchState {
     target_ulong hgeie;
     target_ulong hgeip;
     uint64_t htimedelta;
+    uint64_t hvien;
+
+    /*
+     * Bits VSSIP, VSTIP and VSEIP in hvip are maintained in mip. Other bits
+     * from 0:12 are reserved. Bits 13:63 are not aliased and must be separately
+     * maintain in hvip.
+     */
+    uint64_t hvip;
 
     /* Hypervisor controlled virtual interrupt priorities */
     target_ulong hvictl;
@@ -463,6 +485,7 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env);
 int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts);
 uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask,
                               uint64_t value);
+void riscv_cpu_interrupt(CPURISCVState *env);
 #define BOOL_TO_MASK(x) (-!!(x)) /* helper for riscv_cpu_update_mip value */
 void riscv_cpu_set_rdtime_fn(CPURISCVState *env, uint64_t (*fn)(void *),
                              void *arg);
@@ -733,7 +756,9 @@ typedef struct isa_ext_data {
 extern const RISCVIsaExtData isa_edata_arr[];
 char *riscv_cpu_get_name(RISCVCPU *cpu);
 
+void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp);
 void riscv_add_satp_mode_properties(Object *obj);
+bool riscv_cpu_accelerator_compatible(RISCVCPU *cpu);
 
 /* CSR function table */
 extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 3d6ffaabc7..ebd7917d49 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -735,6 +735,12 @@ typedef enum RISCVException {
 #define MIE_SSIE                           (1 << IRQ_S_SOFT)
 #define MIE_USIE                           (1 << IRQ_U_SOFT)
 
+/* Machine constants */
+#define M_MODE_INTERRUPTS  ((uint64_t)(MIP_MSIP | MIP_MTIP | MIP_MEIP))
+#define S_MODE_INTERRUPTS  ((uint64_t)(MIP_SSIP | MIP_STIP | MIP_SEIP))
+#define VS_MODE_INTERRUPTS ((uint64_t)(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP))
+#define HS_MODE_INTERRUPTS ((uint64_t)(MIP_SGEIP | VS_MODE_INTERRUPTS))
+
 /* General PointerMasking CSR bits */
 #define PM_ENABLE       0x00000001ULL
 #define PM_CURRENT      0x00000002ULL
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index 0e6a0f245c..f4605fb190 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -61,13 +61,15 @@ struct RISCVCPUConfig {
     bool ext_zksed;
     bool ext_zksh;
     bool ext_zkt;
-    bool ext_ifencei;
-    bool ext_icsr;
-    bool ext_icbom;
-    bool ext_icboz;
+    bool ext_zifencei;
+    bool ext_zicntr;
+    bool ext_zicsr;
+    bool ext_zicbom;
+    bool ext_zicboz;
     bool ext_zicond;
     bool ext_zihintntl;
     bool ext_zihintpause;
+    bool ext_zihpm;
     bool ext_smstateen;
     bool ext_sstc;
     bool ext_svadu;
@@ -88,12 +90,20 @@ struct RISCVCPUConfig {
     bool ext_zve64d;
     bool ext_zvbb;
     bool ext_zvbc;
+    bool ext_zvkb;
     bool ext_zvkg;
     bool ext_zvkned;
     bool ext_zvknha;
     bool ext_zvknhb;
     bool ext_zvksed;
     bool ext_zvksh;
+    bool ext_zvkt;
+    bool ext_zvkn;
+    bool ext_zvknc;
+    bool ext_zvkng;
+    bool ext_zvks;
+    bool ext_zvksc;
+    bool ext_zvksg;
     bool ext_zmmul;
     bool ext_zvfbfmin;
     bool ext_zvfbfwma;
@@ -102,6 +112,7 @@ struct RISCVCPUConfig {
     bool ext_smaia;
     bool ext_ssaia;
     bool ext_sscofpmf;
+    bool ext_smepmp;
     bool rvv_ta_all_1s;
     bool rvv_ma_all_1s;
 
@@ -123,7 +134,7 @@ struct RISCVCPUConfig {
     bool ext_xtheadsync;
     bool ext_XVentanaCondOps;
 
-    uint8_t pmu_num;
+    uint32_t pmu_mask;
     char *priv_spec;
     char *user_spec;
     char *bext_spec;
@@ -134,7 +145,6 @@ struct RISCVCPUConfig {
     uint16_t cboz_blocksize;
     bool mmu;
     bool pmp;
-    bool epmp;
     bool debug;
     bool misa_w;
 
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 8c28241c18..b7af69de53 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -376,6 +376,11 @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env,
     return best_irq;
 }
 
+/*
+ * Doesn't report interrupts inserted using mvip from M-mode firmware or
+ * using hvip bits 13:63 from HS-mode. Those are returned in
+ * riscv_cpu_sirq_pending() and riscv_cpu_vsirq_pending().
+ */
 uint64_t riscv_cpu_all_pending(CPURISCVState *env)
 {
     uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN);
@@ -398,24 +403,32 @@ int riscv_cpu_sirq_pending(CPURISCVState *env)
 {
     uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg &
                     ~(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP);
+    uint64_t irqs_f = env->mvip & env->mvien & ~env->mideleg & env->sie;
 
     return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-                                    irqs, env->siprio);
+                                    irqs | irqs_f, env->siprio);
 }
 
 int riscv_cpu_vsirq_pending(CPURISCVState *env)
 {
-    uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg &
-                    (MIP_VSSIP | MIP_VSTIP | MIP_VSEIP);
+    uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg & env->hideleg;
+    uint64_t irqs_f_vs = env->hvip & env->hvien & ~env->hideleg & env->vsie;
+    uint64_t vsbits;
+
+    /* Bring VS-level bits to correct position */
+    vsbits = irqs & VS_MODE_INTERRUPTS;
+    irqs &= ~VS_MODE_INTERRUPTS;
+    irqs |= vsbits >> 1;
 
     return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-                                    irqs >> 1, env->hviprio);
+                                    (irqs | irqs_f_vs), env->hviprio);
 }
 
 static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 {
+    uint64_t irqs, pending, mie, hsie, vsie, irqs_f, irqs_f_vs;
+    uint64_t vsbits, irq_delegated;
     int virq;
-    uint64_t irqs, pending, mie, hsie, vsie;
 
     /* Determine interrupt enable state of all privilege modes */
     if (env->virt_enabled) {
@@ -441,19 +454,36 @@ static int riscv_cpu_local_irq_pending(CPURISCVState *env)
                                         irqs, env->miprio);
     }
 
+    /* Check for virtual S-mode interrupts. */
+    irqs_f = env->mvip & (env->mvien & ~env->mideleg) & env->sie;
+
     /* Check HS-mode interrupts */
-    irqs = pending & env->mideleg & ~env->hideleg & -hsie;
+    irqs =  ((pending & env->mideleg & ~env->hideleg) | irqs_f) & -hsie;
     if (irqs) {
         return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
                                         irqs, env->siprio);
     }
 
+    /* Check for virtual VS-mode interrupts. */
+    irqs_f_vs = env->hvip & env->hvien & ~env->hideleg & env->vsie;
+
     /* Check VS-mode interrupts */
-    irqs = pending & env->mideleg & env->hideleg & -vsie;
+    irq_delegated = pending & env->mideleg & env->hideleg;
+
+    /* Bring VS-level bits to correct position */
+    vsbits = irq_delegated & VS_MODE_INTERRUPTS;
+    irq_delegated &= ~VS_MODE_INTERRUPTS;
+    irq_delegated |= vsbits >> 1;
+
+    irqs = (irq_delegated | irqs_f_vs) & -vsie;
     if (irqs) {
         virq = riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-                                        irqs >> 1, env->hviprio);
-        return (virq <= 0) ? virq : virq + 1;
+                                        irqs, env->hviprio);
+        if (virq <= 0 || (virq > 12 && virq <= 63)) {
+            return virq;
+        } else {
+            return virq + 1;
+        }
     }
 
     /* Indicate no pending interrupt */
@@ -620,28 +650,42 @@ int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts)
     }
 }
 
-uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask,
-                              uint64_t value)
+void riscv_cpu_interrupt(CPURISCVState *env)
 {
+    uint64_t gein, vsgein = 0, vstip = 0, irqf = 0;
     CPUState *cs = env_cpu(env);
-    uint64_t gein, vsgein = 0, vstip = 0, old = env->mip;
+
+    QEMU_IOTHREAD_LOCK_GUARD();
 
     if (env->virt_enabled) {
         gein = get_field(env->hstatus, HSTATUS_VGEIN);
         vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0;
+        irqf = env->hvien & env->hvip & env->vsie;
+    } else {
+        irqf = env->mvien & env->mvip & env->sie;
     }
 
     vstip = env->vstime_irq ? MIP_VSTIP : 0;
 
-    QEMU_IOTHREAD_LOCK_GUARD();
-
-    env->mip = (env->mip & ~mask) | (value & mask);
-
-    if (env->mip | vsgein | vstip) {
+    if (env->mip | vsgein | vstip | irqf) {
         cpu_interrupt(cs, CPU_INTERRUPT_HARD);
     } else {
         cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
     }
+}
+
+uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask, uint64_t value)
+{
+    uint64_t old = env->mip;
+
+    /* No need to update mip for VSTIP */
+    mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask;
+
+    QEMU_IOTHREAD_LOCK_GUARD();
+
+    env->mip = (env->mip & ~mask) | (value & mask);
+
+    riscv_cpu_interrupt(env);
 
     return old;
 }
@@ -1600,20 +1644,22 @@ void riscv_cpu_do_interrupt(CPUState *cs)
     bool async = !!(cs->exception_index & RISCV_EXCP_INT_FLAG);
     target_ulong cause = cs->exception_index & RISCV_EXCP_INT_MASK;
     uint64_t deleg = async ? env->mideleg : env->medeleg;
+    bool s_injected = env->mvip & (1 << cause) & env->mvien &&
+        !(env->mip & (1 << cause));
+    bool vs_injected = env->hvip & (1 << cause) & env->hvien &&
+        !(env->mip & (1 << cause));
     target_ulong tval = 0;
     target_ulong tinst = 0;
     target_ulong htval = 0;
     target_ulong mtval2 = 0;
 
-    if  (cause == RISCV_EXCP_SEMIHOST) {
-        do_common_semihosting(cs);
-        env->pc += 4;
-        return;
-    }
-
     if (!async) {
         /* set tval to badaddr for traps with address information */
         switch (cause) {
+        case RISCV_EXCP_SEMIHOST:
+            do_common_semihosting(cs);
+            env->pc += 4;
+            return;
         case RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT:
         case RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT:
         case RISCV_EXCP_LOAD_ADDR_MIS:
@@ -1690,13 +1736,14 @@ void riscv_cpu_do_interrupt(CPUState *cs)
                   __func__, env->mhartid, async, cause, env->pc, tval,
                   riscv_cpu_get_trap_name(cause, async));
 
-    if (env->priv <= PRV_S &&
-            cause < TARGET_LONG_BITS && ((deleg >> cause) & 1)) {
+    if (env->priv <= PRV_S && cause < 64 &&
+        (((deleg >> cause) & 1) || s_injected || vs_injected)) {
         /* handle the trap in S-mode */
         if (riscv_has_ext(env, RVH)) {
             uint64_t hdeleg = async ? env->hideleg : env->hedeleg;
 
-            if (env->virt_enabled && ((hdeleg >> cause) & 1)) {
+            if (env->virt_enabled &&
+                (((hdeleg >> cause) & 1) || vs_injected)) {
                 /* Trap to VS mode */
                 /*
                  * See if we need to adjust cause. Yes if its VS mode interrupt
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 4b4ab56c40..fde7ce1a53 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -30,6 +30,7 @@
 #include "qemu/guest-random.h"
 #include "qapi/error.h"
 
+
 /* CSR function table public API */
 void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops)
 {
@@ -121,6 +122,10 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 
     if ((csrno >= CSR_CYCLE && csrno <= CSR_INSTRET) ||
         (csrno >= CSR_CYCLEH && csrno <= CSR_INSTRETH)) {
+        if (!riscv_cpu_cfg(env)->ext_zicntr) {
+            return RISCV_EXCP_ILLEGAL_INST;
+        }
+
         goto skip_ext_pmu_check;
     }
 
@@ -183,7 +188,8 @@ static RISCVException zcmt(CPURISCVState *env, int csrno)
 #if !defined(CONFIG_USER_ONLY)
 static RISCVException mctr(CPURISCVState *env, int csrno)
 {
-    int pmu_num = riscv_cpu_cfg(env)->pmu_num;
+    RISCVCPU *cpu = env_archcpu(env);
+    uint32_t pmu_avail_ctrs = cpu->pmu_avail_ctrs;
     int ctr_index;
     int base_csrno = CSR_MHPMCOUNTER3;
 
@@ -192,7 +198,7 @@ static RISCVException mctr(CPURISCVState *env, int csrno)
         base_csrno += 0x80;
     }
     ctr_index = csrno - base_csrno;
-    if (!pmu_num || ctr_index >= pmu_num) {
+    if ((BIT(ctr_index) & pmu_avail_ctrs >> 3) == 0) {
         /* The PMU is not enabled or counter is out of range */
         return RISCV_EXCP_ILLEGAL_INST;
     }
@@ -523,9 +529,12 @@ static RISCVException pmp(CPURISCVState *env, int csrno)
     return RISCV_EXCP_ILLEGAL_INST;
 }
 
-static RISCVException epmp(CPURISCVState *env, int csrno)
+static RISCVException have_mseccfg(CPURISCVState *env, int csrno)
 {
-    if (riscv_cpu_cfg(env)->epmp) {
+    if (riscv_cpu_cfg(env)->ext_smepmp) {
+        return RISCV_EXCP_NONE;
+    }
+    if (riscv_cpu_cfg(env)->ext_zkr) {
         return RISCV_EXCP_NONE;
     }
 
@@ -1117,21 +1126,16 @@ static RISCVException write_stimecmph(CPURISCVState *env, int csrno,
     return RISCV_EXCP_NONE;
 }
 
-/* Machine constants */
-
-#define M_MODE_INTERRUPTS  ((uint64_t)(MIP_MSIP | MIP_MTIP | MIP_MEIP))
-#define S_MODE_INTERRUPTS  ((uint64_t)(MIP_SSIP | MIP_STIP | MIP_SEIP | \
-                                      MIP_LCOFIP))
-#define VS_MODE_INTERRUPTS ((uint64_t)(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP))
-#define HS_MODE_INTERRUPTS ((uint64_t)(MIP_SGEIP | VS_MODE_INTERRUPTS))
-
 #define VSTOPI_NUM_SRCS 5
 
-static const uint64_t delegable_ints = S_MODE_INTERRUPTS |
-                                           VS_MODE_INTERRUPTS;
-static const uint64_t vs_delegable_ints = VS_MODE_INTERRUPTS;
+#define LOCAL_INTERRUPTS (~0x1FFF)
+
+static const uint64_t delegable_ints =
+    S_MODE_INTERRUPTS | VS_MODE_INTERRUPTS | MIP_LCOFIP;
+static const uint64_t vs_delegable_ints =
+    (VS_MODE_INTERRUPTS | LOCAL_INTERRUPTS) & ~MIP_LCOFIP;
 static const uint64_t all_ints = M_MODE_INTERRUPTS | S_MODE_INTERRUPTS |
-                                     HS_MODE_INTERRUPTS;
+                                     HS_MODE_INTERRUPTS | LOCAL_INTERRUPTS;
 #define DELEGABLE_EXCPS ((1ULL << (RISCV_EXCP_INST_ADDR_MIS)) | \
                          (1ULL << (RISCV_EXCP_INST_ACCESS_FAULT)) | \
                          (1ULL << (RISCV_EXCP_ILLEGAL_INST)) | \
@@ -1162,12 +1166,32 @@ static const target_ulong vs_delegable_excps = DELEGABLE_EXCPS &
 static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE |
     SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS |
     SSTATUS_SUM | SSTATUS_MXR | SSTATUS_VS;
-static const target_ulong sip_writable_mask = SIP_SSIP | MIP_USIP | MIP_UEIP |
-                                              SIP_LCOFIP;
+
+/*
+ * Spec allows for bits 13:63 to be either read-only or writable.
+ * So far we have interrupt LCOFIP in that region which is writable.
+ *
+ * Also, spec allows to inject virtual interrupts in this region even
+ * without any hardware interrupts for that interrupt number.
+ *
+ * For now interrupt in 13:63 region are all kept writable. 13 being
+ * LCOFIP and 14:63 being virtual only. Change this in future if we
+ * introduce more interrupts that are not writable.
+ */
+
+/* Bit STIP can be an alias of mip.STIP that's why it's writable in mvip. */
+static const target_ulong mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP |
+                                    LOCAL_INTERRUPTS;
+static const target_ulong mvien_writable_mask = MIP_SSIP | MIP_SEIP |
+                                    LOCAL_INTERRUPTS;
+
+static const target_ulong sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS;
 static const target_ulong hip_writable_mask = MIP_VSSIP;
 static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP |
-                                               MIP_VSEIP;
-static const target_ulong vsip_writable_mask = MIP_VSSIP;
+                                    MIP_VSEIP | LOCAL_INTERRUPTS;
+static const target_ulong hvien_writable_mask = LOCAL_INTERRUPTS;
+
+static const target_ulong vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS;
 
 const bool valid_vm_1_10_32[16] = {
     [VM_1_10_MBARE] = true,
@@ -1525,7 +1549,7 @@ static RISCVException rmw_mie64(CPURISCVState *env, int csrno,
     env->mie = (env->mie & ~mask) | (new_val & mask);
 
     if (!riscv_has_ext(env, RVH)) {
-        env->mie &= ~((uint64_t)MIP_SGEIP);
+        env->mie &= ~((uint64_t)HS_MODE_INTERRUPTS);
     }
 
     return RISCV_EXCP_NONE;
@@ -1562,6 +1586,52 @@ static RISCVException rmw_mieh(CPURISCVState *env, int csrno,
     return ret;
 }
 
+static RISCVException rmw_mvien64(CPURISCVState *env, int csrno,
+                                uint64_t *ret_val,
+                                uint64_t new_val, uint64_t wr_mask)
+{
+    uint64_t mask = wr_mask & mvien_writable_mask;
+
+    if (ret_val) {
+        *ret_val = env->mvien;
+    }
+
+    env->mvien = (env->mvien & ~mask) | (new_val & mask);
+
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_mvien(CPURISCVState *env, int csrno,
+                              target_ulong *ret_val,
+                              target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t rval;
+    RISCVException ret;
+
+    ret = rmw_mvien64(env, csrno, &rval, new_val, wr_mask);
+    if (ret_val) {
+        *ret_val = rval;
+    }
+
+    return ret;
+}
+
+static RISCVException rmw_mvienh(CPURISCVState *env, int csrno,
+                                target_ulong *ret_val,
+                                target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t rval;
+    RISCVException ret;
+
+    ret = rmw_mvien64(env, csrno, &rval,
+        ((uint64_t)new_val) << 32, ((uint64_t)wr_mask) << 32);
+    if (ret_val) {
+        *ret_val = rval >> 32;
+    }
+
+    return ret;
+}
+
 static int read_mtopi(CPURISCVState *env, int csrno, target_ulong *val)
 {
     int irq;
@@ -1703,6 +1773,11 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val,
         priv = PRV_M;
         break;
     case CSR_SIREG:
+        if (env->priv == PRV_S && env->mvien & MIP_SEIP &&
+            env->siselect >= ISELECT_IMSIC_EIDELIVERY &&
+            env->siselect <= ISELECT_IMSIC_EIE63) {
+            goto done;
+        }
         iprio = env->siprio;
         isel = env->siselect;
         priv = PRV_S;
@@ -1769,6 +1844,9 @@ static int rmw_xtopei(CPURISCVState *env, int csrno, target_ulong *val,
         priv = PRV_M;
         break;
     case CSR_STOPEI:
+        if (env->mvien & MIP_SEIP && env->priv == PRV_S) {
+            goto done;
+        }
         priv = PRV_S;
         break;
     case CSR_VSTOPEI:
@@ -2360,6 +2438,143 @@ static RISCVException rmw_miph(CPURISCVState *env, int csrno,
     return ret;
 }
 
+/*
+ * The function is written for two use-cases:
+ * 1- To access mvip csr as is for m-mode access.
+ * 2- To access sip as a combination of mip and mvip for s-mode.
+ *
+ * Both report bits 1, 5, 9 and 13:63 but with the exception of
+ * STIP being read-only zero in case of mvip when sstc extension
+ * is present.
+ * Also, sip needs to be read-only zero when both mideleg[i] and
+ * mvien[i] are zero but mvip needs to be an alias of mip.
+ */
+static RISCVException rmw_mvip64(CPURISCVState *env, int csrno,
+                                uint64_t *ret_val,
+                                uint64_t new_val, uint64_t wr_mask)
+{
+    RISCVCPU *cpu = env_archcpu(env);
+    target_ulong ret_mip = 0;
+    RISCVException ret;
+    uint64_t old_mvip;
+
+    /*
+     * mideleg[i]  mvien[i]
+     *   0           0      No delegation. mvip[i] is alias of mip[i].
+     *   0           1      mvip[i] becomes source of interrupt, mip bypassed.
+     *   1           X      mip[i] is source of interrupt and mvip[i] aliases
+     *                      mip[i].
+     *
+     *   So alias condition would be for bits:
+     *      ((S_MODE_INTERRUPTS | LOCAL_INTERRUPTS) & (mideleg | ~mvien)) |
+     *          (!sstc & MIP_STIP)
+     *
+     *   Non-alias condition will be for bits:
+     *      (S_MODE_INTERRUPTS | LOCAL_INTERRUPTS) & (~mideleg & mvien)
+     *
+     *  alias_mask denotes the bits that come from mip nalias_mask denotes bits
+     *  that come from hvip.
+     */
+    uint64_t alias_mask = ((S_MODE_INTERRUPTS | LOCAL_INTERRUPTS) &
+        (env->mideleg | ~env->mvien)) | MIP_STIP;
+    uint64_t nalias_mask = (S_MODE_INTERRUPTS | LOCAL_INTERRUPTS) &
+        (~env->mideleg & env->mvien);
+    uint64_t wr_mask_mvip;
+    uint64_t wr_mask_mip;
+
+    /*
+     * mideleg[i]  mvien[i]
+     *   0           0      sip[i] read-only zero.
+     *   0           1      sip[i] alias of mvip[i].
+     *   1           X      sip[i] alias of mip[i].
+     *
+     *  Both alias and non-alias mask remain same for sip except for bits
+     *  which are zero in both mideleg and mvien.
+     */
+    if (csrno == CSR_SIP) {
+        /* Remove bits that are zero in both mideleg and mvien. */
+        alias_mask &= (env->mideleg | env->mvien);
+        nalias_mask &= (env->mideleg | env->mvien);
+    }
+
+    /*
+     * If sstc is present, mvip.STIP is not an alias of mip.STIP so clear
+     * that our in mip returned value.
+     */
+    if (cpu->cfg.ext_sstc && (env->priv == PRV_M) &&
+        get_field(env->menvcfg, MENVCFG_STCE)) {
+        alias_mask &= ~MIP_STIP;
+    }
+
+    wr_mask_mip = wr_mask & alias_mask & mvip_writable_mask;
+    wr_mask_mvip = wr_mask & nalias_mask & mvip_writable_mask;
+
+    /*
+     * For bits set in alias_mask, mvip needs to be alias of mip, so forward
+     * this to rmw_mip.
+     */
+    ret = rmw_mip(env, CSR_MIP, &ret_mip, new_val, wr_mask_mip);
+    if (ret != RISCV_EXCP_NONE) {
+        return ret;
+    }
+
+    old_mvip = env->mvip;
+
+    /*
+     * Write to mvip. Update only non-alias bits. Alias bits were updated
+     * in mip in rmw_mip above.
+     */
+    if (wr_mask_mvip) {
+        env->mvip = (env->mvip & ~wr_mask_mvip) | (new_val & wr_mask_mvip);
+
+        /*
+         * Given mvip is separate source from mip, we need to trigger interrupt
+         * from here separately. Normally this happen from riscv_cpu_update_mip.
+         */
+        riscv_cpu_interrupt(env);
+    }
+
+    if (ret_val) {
+        ret_mip &= alias_mask;
+        old_mvip &= nalias_mask;
+
+        *ret_val = old_mvip | ret_mip;
+    }
+
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_mvip(CPURISCVState *env, int csrno,
+                              target_ulong *ret_val,
+                              target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t rval;
+    RISCVException ret;
+
+    ret = rmw_mvip64(env, csrno, &rval, new_val, wr_mask);
+    if (ret_val) {
+        *ret_val = rval;
+    }
+
+    return ret;
+}
+
+static RISCVException rmw_mviph(CPURISCVState *env, int csrno,
+                               target_ulong *ret_val,
+                               target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t rval;
+    RISCVException ret;
+
+    ret = rmw_mvip64(env, csrno, &rval,
+        ((uint64_t)new_val) << 32, ((uint64_t)wr_mask) << 32);
+    if (ret_val) {
+        *ret_val = rval >> 32;
+    }
+
+    return ret;
+}
+
 /* Supervisor Trap Setup */
 static RISCVException read_sstatus_i128(CPURISCVState *env, int csrno,
                                         Int128 *val)
@@ -2404,16 +2619,36 @@ static RISCVException rmw_vsie64(CPURISCVState *env, int csrno,
                                  uint64_t *ret_val,
                                  uint64_t new_val, uint64_t wr_mask)
 {
+    uint64_t alias_mask = (LOCAL_INTERRUPTS | VS_MODE_INTERRUPTS) &
+                            env->hideleg;
+    uint64_t nalias_mask = LOCAL_INTERRUPTS & (~env->hideleg & env->hvien);
+    uint64_t rval, rval_vs, vsbits;
+    uint64_t wr_mask_vsie;
+    uint64_t wr_mask_mie;
     RISCVException ret;
-    uint64_t rval, mask = env->hideleg & VS_MODE_INTERRUPTS;
 
     /* Bring VS-level bits to correct position */
-    new_val = (new_val & (VS_MODE_INTERRUPTS >> 1)) << 1;
-    wr_mask = (wr_mask & (VS_MODE_INTERRUPTS >> 1)) << 1;
+    vsbits = new_val & (VS_MODE_INTERRUPTS >> 1);
+    new_val &= ~(VS_MODE_INTERRUPTS >> 1);
+    new_val |= vsbits << 1;
+
+    vsbits = wr_mask & (VS_MODE_INTERRUPTS >> 1);
+    wr_mask &= ~(VS_MODE_INTERRUPTS >> 1);
+    wr_mask |= vsbits << 1;
+
+    wr_mask_mie = wr_mask & alias_mask;
+    wr_mask_vsie = wr_mask & nalias_mask;
+
+    ret = rmw_mie64(env, csrno, &rval, new_val, wr_mask_mie);
+
+    rval_vs = env->vsie & nalias_mask;
+    env->vsie = (env->vsie & ~wr_mask_vsie) | (new_val & wr_mask_vsie);
 
-    ret = rmw_mie64(env, csrno, &rval, new_val, wr_mask & mask);
     if (ret_val) {
-        *ret_val = (rval & mask) >> 1;
+        rval &= alias_mask;
+        vsbits = rval & VS_MODE_INTERRUPTS;
+        rval &= ~VS_MODE_INTERRUPTS;
+        *ret_val = rval | (vsbits >> 1) | rval_vs;
     }
 
     return ret;
@@ -2454,20 +2689,37 @@ static RISCVException rmw_sie64(CPURISCVState *env, int csrno,
                                 uint64_t *ret_val,
                                 uint64_t new_val, uint64_t wr_mask)
 {
+    uint64_t nalias_mask = (S_MODE_INTERRUPTS | LOCAL_INTERRUPTS) &
+        (~env->mideleg & env->mvien);
+    uint64_t alias_mask = (S_MODE_INTERRUPTS | LOCAL_INTERRUPTS) & env->mideleg;
+    uint64_t sie_mask = wr_mask & nalias_mask;
     RISCVException ret;
-    uint64_t mask = env->mideleg & S_MODE_INTERRUPTS;
 
+    /*
+     * mideleg[i]  mvien[i]
+     *   0           0      sie[i] read-only zero.
+     *   0           1      sie[i] is a separate writable bit.
+     *   1           X      sie[i] alias of mie[i].
+     *
+     *  Both alias and non-alias mask remain same for sip except for bits
+     *  which are zero in both mideleg and mvien.
+     */
     if (env->virt_enabled) {
         if (env->hvictl & HVICTL_VTI) {
             return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
         }
         ret = rmw_vsie64(env, CSR_VSIE, ret_val, new_val, wr_mask);
+        if (ret_val) {
+            *ret_val &= alias_mask;
+        }
     } else {
-        ret = rmw_mie64(env, csrno, ret_val, new_val, wr_mask & mask);
-    }
+        ret = rmw_mie64(env, csrno, ret_val, new_val, wr_mask & alias_mask);
+        if (ret_val) {
+            *ret_val &= alias_mask;
+            *ret_val |= env->sie & nalias_mask;
+        }
 
-    if (ret_val) {
-        *ret_val &= mask;
+        env->sie = (env->sie & ~sie_mask) | (new_val & sie_mask);
     }
 
     return ret;
@@ -2609,21 +2861,36 @@ static RISCVException write_stval(CPURISCVState *env, int csrno,
     return RISCV_EXCP_NONE;
 }
 
+static RISCVException rmw_hvip64(CPURISCVState *env, int csrno,
+                                 uint64_t *ret_val,
+                                 uint64_t new_val, uint64_t wr_mask);
+
 static RISCVException rmw_vsip64(CPURISCVState *env, int csrno,
                                  uint64_t *ret_val,
                                  uint64_t new_val, uint64_t wr_mask)
 {
     RISCVException ret;
     uint64_t rval, mask = env->hideleg & VS_MODE_INTERRUPTS;
+    uint64_t vsbits;
 
-    /* Bring VS-level bits to correct position */
-    new_val = (new_val & (VS_MODE_INTERRUPTS >> 1)) << 1;
-    wr_mask = (wr_mask & (VS_MODE_INTERRUPTS >> 1)) << 1;
+    /* Add virtualized bits into vsip mask. */
+    mask |= env->hvien & ~env->hideleg;
 
-    ret = rmw_mip64(env, csrno, &rval, new_val,
-                    wr_mask & mask & vsip_writable_mask);
+    /* Bring VS-level bits to correct position */
+    vsbits = new_val & (VS_MODE_INTERRUPTS >> 1);
+    new_val &= ~(VS_MODE_INTERRUPTS >> 1);
+    new_val |= vsbits << 1;
+    vsbits = wr_mask & (VS_MODE_INTERRUPTS >> 1);
+    wr_mask &= ~(VS_MODE_INTERRUPTS >> 1);
+    wr_mask |= vsbits << 1;
+
+    ret = rmw_hvip64(env, csrno, &rval, new_val,
+                     wr_mask & mask & vsip_writable_mask);
     if (ret_val) {
-        *ret_val = (rval & mask) >> 1;
+        rval &= mask;
+        vsbits = rval & VS_MODE_INTERRUPTS;
+        rval &= ~VS_MODE_INTERRUPTS;
+        *ret_val = rval | (vsbits >> 1);
     }
 
     return ret;
@@ -2665,7 +2932,7 @@ static RISCVException rmw_sip64(CPURISCVState *env, int csrno,
                                 uint64_t new_val, uint64_t wr_mask)
 {
     RISCVException ret;
-    uint64_t mask = env->mideleg & sip_writable_mask;
+    uint64_t mask = (env->mideleg | env->mvien) & sip_writable_mask;
 
     if (env->virt_enabled) {
         if (env->hvictl & HVICTL_VTI) {
@@ -2673,11 +2940,12 @@ static RISCVException rmw_sip64(CPURISCVState *env, int csrno,
         }
         ret = rmw_vsip64(env, CSR_VSIP, ret_val, new_val, wr_mask);
     } else {
-        ret = rmw_mip64(env, csrno, ret_val, new_val, wr_mask & mask);
+        ret = rmw_mvip64(env, csrno, ret_val, new_val, wr_mask & mask);
     }
 
     if (ret_val) {
-        *ret_val &= env->mideleg & S_MODE_INTERRUPTS;
+        *ret_val &= (env->mideleg | env->mvien) &
+            (S_MODE_INTERRUPTS | LOCAL_INTERRUPTS);
     }
 
     return ret;
@@ -2842,6 +3110,7 @@ static int read_vstopi(CPURISCVState *env, int csrno, target_ulong *val)
 
     *val = (iid & TOPI_IID_MASK) << TOPI_IID_SHIFT;
     *val |= iprio;
+
     return RISCV_EXCP_NONE;
 }
 
@@ -2913,6 +3182,52 @@ static RISCVException write_hedeleg(CPURISCVState *env, int csrno,
     return RISCV_EXCP_NONE;
 }
 
+static RISCVException rmw_hvien64(CPURISCVState *env, int csrno,
+                                    uint64_t *ret_val,
+                                    uint64_t new_val, uint64_t wr_mask)
+{
+    uint64_t mask = wr_mask & hvien_writable_mask;
+
+    if (ret_val) {
+        *ret_val = env->hvien;
+    }
+
+    env->hvien = (env->hvien & ~mask) | (new_val & mask);
+
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_hvien(CPURISCVState *env, int csrno,
+                               target_ulong *ret_val,
+                               target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t rval;
+    RISCVException ret;
+
+    ret = rmw_hvien64(env, csrno, &rval, new_val, wr_mask);
+    if (ret_val) {
+        *ret_val = rval;
+    }
+
+    return ret;
+}
+
+static RISCVException rmw_hvienh(CPURISCVState *env, int csrno,
+                                   target_ulong *ret_val,
+                                   target_ulong new_val, target_ulong wr_mask)
+{
+    uint64_t rval;
+    RISCVException ret;
+
+    ret = rmw_hvien64(env, csrno, &rval,
+        ((uint64_t)new_val) << 32, ((uint64_t)wr_mask) << 32);
+    if (ret_val) {
+        *ret_val = rval >> 32;
+    }
+
+    return ret;
+}
+
 static RISCVException rmw_hideleg64(CPURISCVState *env, int csrno,
                                     uint64_t *ret_val,
                                     uint64_t new_val, uint64_t wr_mask)
@@ -2958,16 +3273,94 @@ static RISCVException rmw_hidelegh(CPURISCVState *env, int csrno,
     return ret;
 }
 
+/*
+ * The function is written for two use-cases:
+ * 1- To access hvip csr as is for HS-mode access.
+ * 2- To access vsip as a combination of hvip, and mip for vs-mode.
+ *
+ * Both report bits 2, 6, 10 and 13:63.
+ * vsip needs to be read-only zero when both hideleg[i] and
+ * hvien[i] are zero.
+ */
 static RISCVException rmw_hvip64(CPURISCVState *env, int csrno,
                                  uint64_t *ret_val,
                                  uint64_t new_val, uint64_t wr_mask)
 {
     RISCVException ret;
+    uint64_t old_hvip;
+    uint64_t ret_mip;
+
+    /*
+     * For bits 10, 6 and 2, vsip[i] is an alias of hip[i]. These bits are
+     * present in hip, hvip and mip. Where mip[i] is alias of hip[i] and hvip[i]
+     * is OR'ed in hip[i] to inject virtual interrupts from hypervisor. These
+     * bits are actually being maintained in mip so we read them from there.
+     * This way we have a single source of truth and allows for easier
+     * implementation.
+     *
+     * For bits 13:63 we have:
+     *
+     * hideleg[i]  hvien[i]
+     *   0           0      No delegation. vsip[i] readonly zero.
+     *   0           1      vsip[i] is alias of hvip[i], sip bypassed.
+     *   1           X      vsip[i] is alias of sip[i], hvip bypassed.
+     *
+     *  alias_mask denotes the bits that come from sip (mip here given we
+     *  maintain all bits there). nalias_mask denotes bits that come from
+     *  hvip.
+     */
+    uint64_t alias_mask = (env->hideleg | ~env->hvien) | VS_MODE_INTERRUPTS;
+    uint64_t nalias_mask = (~env->hideleg & env->hvien);
+    uint64_t wr_mask_hvip;
+    uint64_t wr_mask_mip;
+
+    /*
+     * Both alias and non-alias mask remain same for vsip except:
+     *  1- For VS* bits if they are zero in hideleg.
+     *  2- For 13:63 bits if they are zero in both hideleg and hvien.
+     */
+    if (csrno == CSR_VSIP) {
+        /* zero-out VS* bits that are not delegated to VS mode. */
+        alias_mask &= (env->hideleg | ~VS_MODE_INTERRUPTS);
+
+        /*
+         * zero-out 13:63 bits that are zero in both hideleg and hvien.
+         * nalias_mask mask can not contain any VS* bits so only second
+         * condition applies on it.
+         */
+        nalias_mask &= (env->hideleg | env->hvien);
+        alias_mask &= (env->hideleg | env->hvien);
+    }
+
+    wr_mask_hvip = wr_mask & nalias_mask & hvip_writable_mask;
+    wr_mask_mip = wr_mask & alias_mask & hvip_writable_mask;
+
+    /* Aliased bits, bits 10, 6, 2 need to come from mip. */
+    ret = rmw_mip64(env, csrno, &ret_mip, new_val, wr_mask_mip);
+    if (ret != RISCV_EXCP_NONE) {
+        return ret;
+    }
+
+    old_hvip = env->hvip;
+
+    if (wr_mask_hvip) {
+        env->hvip = (env->hvip & ~wr_mask_hvip) | (new_val & wr_mask_hvip);
+
+        /*
+         * Given hvip is separate source from mip, we need to trigger interrupt
+         * from here separately. Normally this happen from riscv_cpu_update_mip.
+         */
+        riscv_cpu_interrupt(env);
+    }
 
-    ret = rmw_mip64(env, csrno, ret_val, new_val,
-                    wr_mask & hvip_writable_mask);
     if (ret_val) {
-        *ret_val &= VS_MODE_INTERRUPTS;
+        /* Only take VS* bits from mip. */
+        ret_mip &= alias_mask;
+
+        /* Take in non-delegated 13:63 bits from hvip. */
+        old_hvip &= nalias_mask;
+
+        *ret_val = ret_mip | old_hvip;
     }
 
     return ret;
@@ -3858,7 +4251,7 @@ static inline RISCVException riscv_csrrw_check(CPURISCVState *env,
     int csr_min_priv = csr_ops[csrno].min_priv_ver;
 
     /* ensure the CSR extension is enabled */
-    if (!riscv_cpu_cfg(env)->ext_icsr) {
+    if (!riscv_cpu_cfg(env)->ext_zicsr) {
         return RISCV_EXCP_ILLEGAL_INST;
     }
 
@@ -4165,14 +4558,14 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_MTOPI]    = { "mtopi",    aia_any, read_mtopi },
 
     /* Virtual Interrupts for Supervisor Level (AIA) */
-    [CSR_MVIEN]    = { "mvien",    aia_any, read_zero, write_ignore },
-    [CSR_MVIP]     = { "mvip",     aia_any, read_zero, write_ignore },
+    [CSR_MVIEN]    = { "mvien",    aia_any, NULL, NULL, rmw_mvien   },
+    [CSR_MVIP]     = { "mvip",     aia_any, NULL, NULL, rmw_mvip    },
 
     /* Machine-Level High-Half CSRs (AIA) */
     [CSR_MIDELEGH] = { "midelegh", aia_any32, NULL, NULL, rmw_midelegh },
     [CSR_MIEH]     = { "mieh",     aia_any32, NULL, NULL, rmw_mieh     },
-    [CSR_MVIENH]   = { "mvienh",   aia_any32, read_zero,  write_ignore },
-    [CSR_MVIPH]    = { "mviph",    aia_any32, read_zero,  write_ignore },
+    [CSR_MVIENH]   = { "mvienh",   aia_any32, NULL, NULL, rmw_mvienh   },
+    [CSR_MVIPH]    = { "mviph",    aia_any32, NULL, NULL, rmw_mviph    },
     [CSR_MIPH]     = { "miph",     aia_any32, NULL, NULL, rmw_miph     },
 
     /* Execution environment configuration */
@@ -4346,14 +4739,13 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
                           .min_priv_ver = PRIV_VERSION_1_12_0                },
 
     /* Virtual Interrupts and Interrupt Priorities (H-extension with AIA) */
-    [CSR_HVIEN]       = { "hvien",       aia_hmode, read_zero, write_ignore },
+    [CSR_HVIEN]       = { "hvien",       aia_hmode, NULL, NULL, rmw_hvien },
     [CSR_HVICTL]      = { "hvictl",      aia_hmode, read_hvictl,
                           write_hvictl                                      },
     [CSR_HVIPRIO1]    = { "hviprio1",    aia_hmode, read_hviprio1,
                           write_hviprio1                                    },
     [CSR_HVIPRIO2]    = { "hviprio2",    aia_hmode, read_hviprio2,
                           write_hviprio2                                    },
-
     /*
      * VS-Level Window to Indirectly Accessed Registers (H-extension with AIA)
      */
@@ -4368,8 +4760,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     /* Hypervisor and VS-Level High-Half CSRs (H-extension with AIA) */
     [CSR_HIDELEGH]    = { "hidelegh",    aia_hmode32, NULL, NULL,
                           rmw_hidelegh                                      },
-    [CSR_HVIENH]      = { "hvienh",      aia_hmode32, read_zero,
-                          write_ignore                                      },
+    [CSR_HVIENH]      = { "hvienh",      aia_hmode32, NULL, NULL, rmw_hvienh },
     [CSR_HVIPH]       = { "hviph",       aia_hmode32, NULL, NULL, rmw_hviph },
     [CSR_HVIPRIO1H]   = { "hviprio1h",   aia_hmode32, read_hviprio1h,
                           write_hviprio1h                                   },
@@ -4379,7 +4770,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_VSIPH]       = { "vsiph",       aia_hmode32, NULL, NULL, rmw_vsiph },
 
     /* Physical Memory Protection */
-    [CSR_MSECCFG]    = { "mseccfg",  epmp, read_mseccfg, write_mseccfg,
+    [CSR_MSECCFG]    = { "mseccfg",   have_mseccfg, read_mseccfg, write_mseccfg,
                          .min_priv_ver = PRIV_VERSION_1_11_0           },
     [CSR_PMPCFG0]    = { "pmpcfg0",   pmp, read_pmpcfg,  write_pmpcfg  },
     [CSR_PMPCFG1]    = { "pmpcfg1",   pmp, read_pmpcfg,  write_pmpcfg  },
diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index 524bede865..58b3ace0fe 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -342,7 +342,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
         g_assert_not_reached();
     }
 
-    if (cpu->cfg.ext_icsr) {
+    if (cpu->cfg.ext_zicsr) {
         int base_reg = cs->gdb_num_regs;
         gdb_register_coprocessor(cs, riscv_gdb_get_csr, riscv_gdb_set_csr,
                                  riscv_gen_dynamic_csr_xml(cs, base_reg),
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index 25cb60558a..faf6d65064 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -799,7 +799,7 @@ static bool trans_fence(DisasContext *ctx, arg_fence *a)
 
 static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
 {
-    if (!ctx->cfg_ptr->ext_ifencei) {
+    if (!ctx->cfg_ptr->ext_zifencei) {
         return false;
     }
 
diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc
index e691519ed7..3801c16829 100644
--- a/target/riscv/insn_trans/trans_rvvk.c.inc
+++ b/target/riscv/insn_trans/trans_rvvk.c.inc
@@ -112,24 +112,27 @@ GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check)
         return false;                                            \
     }
 
-static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a)
+static bool zvkb_vv_check(DisasContext *s, arg_rmrr *a)
 {
-    return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true;
+    return opivv_check(s, a) &&
+           (s->cfg_ptr->ext_zvbb == true || s->cfg_ptr->ext_zvkb == true);
 }
 
-static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a)
+static bool zvkb_vx_check(DisasContext *s, arg_rmrr *a)
 {
-    return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true;
+    return opivx_check(s, a) &&
+           (s->cfg_ptr->ext_zvbb == true || s->cfg_ptr->ext_zvkb == true);
 }
 
 /* vrol.v[vx] */
-GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check)
-GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check)
+GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvkb_vv_check)
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvkb_vx_check)
 
 /* vror.v[vxi] */
-GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check)
-GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check)
-GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check)
+GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvkb_vv_check)
+GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvkb_vx_check)
+GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri,
+                           zvkb_vx_check)
 
 #define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK)                     \
     static bool trans_##NAME(DisasContext *s, arg_rmrr *a)               \
@@ -147,8 +150,8 @@ GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check
     }
 
 /* vandn.v[vx] */
-GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check)
-GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check)
+GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvkb_vv_check)
+GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvkb_vx_check)
 
 #define GEN_OPIV_TRANS(NAME, CHECK)                                        \
     static bool trans_##NAME(DisasContext *s, arg_rmr *a)                  \
@@ -188,8 +191,16 @@ static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a)
            vext_check_ss(s, a->rd, a->rs2, a->vm);
 }
 
-GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check)
-GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check)
+static bool zvkb_opiv_check(DisasContext *s, arg_rmr *a)
+{
+    return (s->cfg_ptr->ext_zvbb == true || s->cfg_ptr->ext_zvkb == true) &&
+           require_rvv(s) &&
+           vext_check_isa_ill(s) &&
+           vext_check_ss(s, a->rd, a->rs2, a->vm);
+}
+
+GEN_OPIV_TRANS(vbrev8_v, zvkb_opiv_check)
+GEN_OPIV_TRANS(vrev8_v, zvkb_opiv_check)
 GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check)
 GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check)
 GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check)
diff --git a/target/riscv/insn_trans/trans_rvzicbo.c.inc b/target/riscv/insn_trans/trans_rvzicbo.c.inc
index e5a7704f54..d5d7095903 100644
--- a/target/riscv/insn_trans/trans_rvzicbo.c.inc
+++ b/target/riscv/insn_trans/trans_rvzicbo.c.inc
@@ -16,16 +16,16 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define REQUIRE_ZICBOM(ctx) do {    \
-    if (!ctx->cfg_ptr->ext_icbom) { \
-        return false;               \
-    }                               \
+#define REQUIRE_ZICBOM(ctx) do {     \
+    if (!ctx->cfg_ptr->ext_zicbom) { \
+        return false;                \
+    }                                \
 } while (0)
 
-#define REQUIRE_ZICBOZ(ctx) do {    \
-    if (!ctx->cfg_ptr->ext_icboz) { \
-        return false;               \
-    }                               \
+#define REQUIRE_ZICBOZ(ctx) do {     \
+    if (!ctx->cfg_ptr->ext_zicboz) { \
+        return false;                \
+    }                                \
 } while (0)
 
 static bool trans_cbo_clean(DisasContext *ctx, arg_cbo_clean *a)
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 26e68c7ab4..78fa1fa162 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -140,6 +140,19 @@ static KVMCPUConfig kvm_misa_ext_cfgs[] = {
     KVM_MISA_CFG(RVM, KVM_RISCV_ISA_EXT_M),
 };
 
+static void kvm_cpu_get_misa_ext_cfg(Object *obj, Visitor *v,
+                                     const char *name,
+                                     void *opaque, Error **errp)
+{
+    KVMCPUConfig *misa_ext_cfg = opaque;
+    target_ulong misa_bit = misa_ext_cfg->offset;
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    CPURISCVState *env = &cpu->env;
+    bool value = env->misa_ext_mask & misa_bit;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
 static void kvm_cpu_set_misa_ext_cfg(Object *obj, Visitor *v,
                                      const char *name,
                                      void *opaque, Error **errp)
@@ -213,13 +226,20 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs)
      .kvm_reg_id = _reg_id}
 
 static KVMCPUConfig kvm_multi_ext_cfgs[] = {
-    KVM_EXT_CFG("zicbom", ext_icbom, KVM_RISCV_ISA_EXT_ZICBOM),
-    KVM_EXT_CFG("zicboz", ext_icboz, KVM_RISCV_ISA_EXT_ZICBOZ),
+    KVM_EXT_CFG("zicbom", ext_zicbom, KVM_RISCV_ISA_EXT_ZICBOM),
+    KVM_EXT_CFG("zicboz", ext_zicboz, KVM_RISCV_ISA_EXT_ZICBOZ),
+    KVM_EXT_CFG("zicntr", ext_zicntr, KVM_RISCV_ISA_EXT_ZICNTR),
+    KVM_EXT_CFG("zicsr", ext_zicsr, KVM_RISCV_ISA_EXT_ZICSR),
+    KVM_EXT_CFG("zifencei", ext_zifencei, KVM_RISCV_ISA_EXT_ZIFENCEI),
     KVM_EXT_CFG("zihintpause", ext_zihintpause, KVM_RISCV_ISA_EXT_ZIHINTPAUSE),
+    KVM_EXT_CFG("zihpm", ext_zihpm, KVM_RISCV_ISA_EXT_ZIHPM),
+    KVM_EXT_CFG("zba", ext_zba, KVM_RISCV_ISA_EXT_ZBA),
     KVM_EXT_CFG("zbb", ext_zbb, KVM_RISCV_ISA_EXT_ZBB),
+    KVM_EXT_CFG("zbs", ext_zbs, KVM_RISCV_ISA_EXT_ZBS),
     KVM_EXT_CFG("ssaia", ext_ssaia, KVM_RISCV_ISA_EXT_SSAIA),
     KVM_EXT_CFG("sstc", ext_sstc, KVM_RISCV_ISA_EXT_SSTC),
     KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL),
+    KVM_EXT_CFG("svnapot", ext_svnapot, KVM_RISCV_ISA_EXT_SVNAPOT),
     KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT),
 };
 
@@ -244,6 +264,17 @@ static uint32_t kvm_cpu_cfg_get(RISCVCPU *cpu,
     return *ext_enabled;
 }
 
+static void kvm_cpu_get_multi_ext_cfg(Object *obj, Visitor *v,
+                                      const char *name,
+                                      void *opaque, Error **errp)
+{
+    KVMCPUConfig *multi_ext_cfg = opaque;
+    RISCVCPU *cpu = RISCV_CPU(obj);
+    bool value = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
+
+    visit_type_bool(v, name, &value, errp);
+}
+
 static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v,
                                       const char *name,
                                       void *opaque, Error **errp)
@@ -346,6 +377,15 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs)
     }
 }
 
+static void cpu_get_cfg_unavailable(Object *obj, Visitor *v,
+                                    const char *name,
+                                    void *opaque, Error **errp)
+{
+    bool value = false;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
 static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
                                     const char *name,
                                     void *opaque, Error **errp)
@@ -376,7 +416,8 @@ static void riscv_cpu_add_kvm_unavail_prop(Object *obj, const char *prop_name)
      * to enable any of them.
      */
     object_property_add(obj, prop_name, "bool",
-                        NULL, cpu_set_cfg_unavailable,
+                        cpu_get_cfg_unavailable,
+                        cpu_set_cfg_unavailable,
                         NULL, (void *)prop_name);
 }
 
@@ -406,7 +447,7 @@ static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj)
         misa_cfg->description = riscv_get_misa_ext_description(bit);
 
         object_property_add(cpu_obj, misa_cfg->name, "bool",
-                            NULL,
+                            kvm_cpu_get_misa_ext_cfg,
                             kvm_cpu_set_misa_ext_cfg,
                             NULL, misa_cfg);
         object_property_set_description(cpu_obj, misa_cfg->name,
@@ -422,7 +463,7 @@ static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj)
         KVMCPUConfig *multi_cfg = &kvm_multi_ext_cfgs[i];
 
         object_property_add(cpu_obj, multi_cfg->name, "bool",
-                            NULL,
+                            kvm_cpu_get_multi_ext_cfg,
                             kvm_cpu_set_multi_ext_cfg,
                             NULL, multi_cfg);
     }
@@ -804,11 +845,11 @@ static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu,
         kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
     }
 
-    if (cpu->cfg.ext_icbom) {
+    if (cpu->cfg.ext_zicbom) {
         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
     }
 
-    if (cpu->cfg.ext_icboz) {
+    if (cpu->cfg.ext_zicboz) {
         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
     }
 }
@@ -897,11 +938,11 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
         kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
     }
 
-    if (cpu->cfg.ext_icbom) {
+    if (cpu->cfg.ext_zicbom) {
         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
     }
 
-    if (cpu->cfg.ext_icboz) {
+    if (cpu->cfg.ext_zicboz) {
         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
     }
 }
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index c7c862cdd3..fdde243e04 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -79,8 +79,8 @@ static bool hyper_needed(void *opaque)
 
 static const VMStateDescription vmstate_hyper = {
     .name = "cpu/hyper",
-    .version_id = 2,
-    .minimum_version_id = 2,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .needed = hyper_needed,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL(env.hstatus, RISCVCPU),
@@ -92,6 +92,8 @@ static const VMStateDescription vmstate_hyper = {
         VMSTATE_UINTTL(env.hgatp, RISCVCPU),
         VMSTATE_UINTTL(env.hgeie, RISCVCPU),
         VMSTATE_UINTTL(env.hgeip, RISCVCPU),
+        VMSTATE_UINT64(env.hvien, RISCVCPU),
+        VMSTATE_UINT64(env.hvip, RISCVCPU),
         VMSTATE_UINT64(env.htimedelta, RISCVCPU),
         VMSTATE_UINT64(env.vstimecmp, RISCVCPU),
 
@@ -106,6 +108,7 @@ static const VMStateDescription vmstate_hyper = {
         VMSTATE_UINTTL(env.vstval, RISCVCPU),
         VMSTATE_UINTTL(env.vsatp, RISCVCPU),
         VMSTATE_UINTTL(env.vsiselect, RISCVCPU),
+        VMSTATE_UINT64(env.vsie, RISCVCPU),
 
         VMSTATE_UINTTL(env.mtval2, RISCVCPU),
         VMSTATE_UINTTL(env.mtinst, RISCVCPU),
@@ -313,7 +316,7 @@ static bool pmu_needed(void *opaque)
 {
     RISCVCPU *cpu = opaque;
 
-    return cpu->cfg.pmu_num;
+    return (cpu->cfg.pmu_mask > 0);
 }
 
 static const VMStateDescription vmstate_pmu_ctr_state = {
@@ -351,8 +354,8 @@ static const VMStateDescription vmstate_jvt = {
 
 const VMStateDescription vmstate_riscv_cpu = {
     .name = "cpu",
-    .version_id = 8,
-    .minimum_version_id = 8,
+    .version_id = 9,
+    .minimum_version_id = 9,
     .post_load = riscv_cpu_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINTTL_ARRAY(env.gpr, RISCVCPU, 32),
@@ -379,6 +382,9 @@ const VMStateDescription vmstate_riscv_cpu = {
         VMSTATE_UINT64(env.mip, RISCVCPU),
         VMSTATE_UINT64(env.miclaim, RISCVCPU),
         VMSTATE_UINT64(env.mie, RISCVCPU),
+        VMSTATE_UINT64(env.mvien, RISCVCPU),
+        VMSTATE_UINT64(env.mvip, RISCVCPU),
+        VMSTATE_UINT64(env.sie, RISCVCPU),
         VMSTATE_UINT64(env.mideleg, RISCVCPU),
         VMSTATE_UINTTL(env.satp, RISCVCPU),
         VMSTATE_UINTTL(env.stval, RISCVCPU),
diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index 5e60c26031..162e88a90a 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -91,7 +91,7 @@ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val)
     if (pmp_index < MAX_RISCV_PMPS) {
         bool locked = true;
 
-        if (riscv_cpu_cfg(env)->epmp) {
+        if (riscv_cpu_cfg(env)->ext_smepmp) {
             /* mseccfg.RLB is set */
             if (MSECCFG_RLB_ISSET(env)) {
                 locked = false;
@@ -123,6 +123,11 @@ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val)
         if (locked) {
             qemu_log_mask(LOG_GUEST_ERROR, "ignoring pmpcfg write - locked\n");
         } else if (env->pmp_state.pmp[pmp_index].cfg_reg != val) {
+            /* If !mseccfg.MML then ignore writes with encoding RW=01 */
+            if ((val & PMP_WRITE) && !(val & PMP_READ) &&
+                !MSECCFG_MML_ISSET(env)) {
+                val &= ~(PMP_WRITE | PMP_READ);
+            }
             env->pmp_state.pmp[pmp_index].cfg_reg = val;
             pmp_update_rule_addr(env, pmp_index);
             return true;
@@ -135,6 +140,16 @@ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val)
     return false;
 }
 
+void pmp_unlock_entries(CPURISCVState *env)
+{
+    uint32_t pmp_num = pmp_get_num_rules(env);
+    int i;
+
+    for (i = 0; i < pmp_num; i++) {
+        env->pmp_state.pmp[i].cfg_reg &= ~(PMP_LOCK | PMP_AMATCH);
+    }
+}
+
 static void pmp_decode_napot(target_ulong a, target_ulong *sa,
                              target_ulong *ea)
 {
@@ -340,9 +355,9 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr,
 
         /*
          * Convert the PMP permissions to match the truth table in the
-         * ePMP spec.
+         * Smepmp spec.
          */
-        const uint8_t epmp_operation =
+        const uint8_t smepmp_operation =
             ((env->pmp_state.pmp[i].cfg_reg & PMP_LOCK) >> 4) |
             ((env->pmp_state.pmp[i].cfg_reg & PMP_READ) << 2) |
             (env->pmp_state.pmp[i].cfg_reg & PMP_WRITE) |
@@ -367,7 +382,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr,
                  * If mseccfg.MML Bit set, do the enhanced pmp priv check
                  */
                 if (mode == PRV_M) {
-                    switch (epmp_operation) {
+                    switch (smepmp_operation) {
                     case 0:
                     case 1:
                     case 4:
@@ -398,7 +413,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr,
                         g_assert_not_reached();
                     }
                 } else {
-                    switch (epmp_operation) {
+                    switch (smepmp_operation) {
                     case 0:
                     case 8:
                     case 9:
@@ -574,7 +589,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val)
         }
     }
 
-    if (riscv_cpu_cfg(env)->epmp) {
+    if (riscv_cpu_cfg(env)->ext_smepmp) {
         /* Sticky bits */
         val |= (env->mseccfg & (MSECCFG_MMWP | MSECCFG_MML));
         if ((val ^ env->mseccfg) & (MSECCFG_MMWP | MSECCFG_MML)) {
diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h
index cf5c99f8e6..9af8614cd4 100644
--- a/target/riscv/pmp.h
+++ b/target/riscv/pmp.h
@@ -28,6 +28,7 @@ typedef enum {
     PMP_READ  = 1 << 0,
     PMP_WRITE = 1 << 1,
     PMP_EXEC  = 1 << 2,
+    PMP_AMATCH = (3 << 3),
     PMP_LOCK  = 1 << 7
 } pmp_priv_t;
 
@@ -81,6 +82,7 @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index);
 void pmp_update_rule_nums(CPURISCVState *env);
 uint32_t pmp_get_num_rules(CPURISCVState *env);
 int pmp_priv_to_page_prot(pmp_priv_t pmp_priv);
+void pmp_unlock_entries(CPURISCVState *env);
 
 #define MSECCFG_MML_ISSET(env) get_field(env->mseccfg, MSECCFG_MML)
 #define MSECCFG_MMWP_ISSET(env) get_field(env->mseccfg, MSECCFG_MMWP)
diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
index 36f6307d28..0e7d58b8a5 100644
--- a/target/riscv/pmu.c
+++ b/target/riscv/pmu.c
@@ -18,14 +18,13 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/error-report.h"
 #include "cpu.h"
 #include "pmu.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/device_tree.h"
 
 #define RISCV_TIMEBASE_FREQ 1000000000 /* 1Ghz */
-#define MAKE_32BIT_MASK(shift, length) \
-        (((uint32_t)(~0UL) >> (32 - (length))) << (shift))
 
 /*
  * To keep it simple, any event can be mapped to any programmable counters in
@@ -34,13 +33,9 @@
  * to provide the correct value as well. Heterogeneous PMU per hart is not
  * supported yet. Thus, number of counters are same across all harts.
  */
-void riscv_pmu_generate_fdt_node(void *fdt, int num_ctrs, char *pmu_name)
+void riscv_pmu_generate_fdt_node(void *fdt, uint32_t cmask, char *pmu_name)
 {
     uint32_t fdt_event_ctr_map[15] = {};
-    uint32_t cmask;
-
-    /* All the programmable counters can map to any event */
-    cmask = MAKE_32BIT_MASK(3, num_ctrs);
 
    /*
     * The event encoding is specified in the SBI specification
@@ -188,7 +183,7 @@ int riscv_pmu_incr_ctr(RISCVCPU *cpu, enum riscv_pmu_event_idx event_idx)
     CPURISCVState *env = &cpu->env;
     gpointer value;
 
-    if (!cpu->cfg.pmu_num) {
+    if (!cpu->cfg.pmu_mask) {
         return 0;
     }
     value = g_hash_table_lookup(cpu->pmu_event_ctr_map,
@@ -434,22 +429,23 @@ int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value, uint32_t ctr_idx)
 }
 
 
-int riscv_pmu_init(RISCVCPU *cpu, int num_counters)
+void riscv_pmu_init(RISCVCPU *cpu, Error **errp)
 {
-    if (num_counters > (RV_MAX_MHPMCOUNTERS - 3)) {
-        return -1;
+    if (cpu->cfg.pmu_mask & (COUNTEREN_CY | COUNTEREN_TM | COUNTEREN_IR)) {
+        error_setg(errp, "\"pmu-mask\" contains invalid bits (0-2) set");
+        return;
+    }
+
+    if (ctpop32(cpu->cfg.pmu_mask) > (RV_MAX_MHPMCOUNTERS - 3)) {
+        error_setg(errp, "Number of counters exceeds maximum available");
+        return;
     }
 
     cpu->pmu_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
     if (!cpu->pmu_event_ctr_map) {
-        /* PMU support can not be enabled */
-        qemu_log_mask(LOG_UNIMP, "PMU events can't be supported\n");
-        cpu->cfg.pmu_num = 0;
-        return -1;
+        error_setg(errp, "Unable to allocate PMU event hash table");
+        return;
     }
 
-    /* Create a bitmask of available programmable counters */
-    cpu->pmu_avail_ctrs = MAKE_32BIT_MASK(3, num_counters);
-
-    return 0;
+    cpu->pmu_avail_ctrs = cpu->cfg.pmu_mask;
 }
diff --git a/target/riscv/pmu.h b/target/riscv/pmu.h
index 2bfb71ba87..505fc850d3 100644
--- a/target/riscv/pmu.h
+++ b/target/riscv/pmu.h
@@ -17,16 +17,17 @@
  */
 
 #include "cpu.h"
+#include "qapi/error.h"
 
 bool riscv_pmu_ctr_monitor_instructions(CPURISCVState *env,
                                         uint32_t target_ctr);
 bool riscv_pmu_ctr_monitor_cycles(CPURISCVState *env,
                                   uint32_t target_ctr);
 void riscv_pmu_timer_cb(void *priv);
-int riscv_pmu_init(RISCVCPU *cpu, int num_counters);
+void riscv_pmu_init(RISCVCPU *cpu, Error **errp);
 int riscv_pmu_update_event_map(CPURISCVState *env, uint64_t value,
                                uint32_t ctr_idx);
 int riscv_pmu_incr_ctr(RISCVCPU *cpu, enum riscv_pmu_event_idx event_idx);
-void riscv_pmu_generate_fdt_node(void *fdt, int num_counters, char *pmu_name);
+void riscv_pmu_generate_fdt_node(void *fdt, uint32_t cmask, char *pmu_name);
 int riscv_pmu_setup_timer(CPURISCVState *env, uint64_t value,
                           uint32_t ctr_idx);
diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c
index 5ecff1afb3..2f2dbae7c8 100644
--- a/target/riscv/riscv-qmp-cmds.c
+++ b/target/riscv/riscv-qmp-cmds.c
@@ -24,8 +24,17 @@
 
 #include "qemu/osdep.h"
 
+#include "qapi/error.h"
 #include "qapi/qapi-commands-machine-target.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/visitor.h"
+#include "qom/qom-qobject.h"
+#include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
 #include "cpu-qom.h"
+#include "cpu.h"
 
 static void riscv_cpu_add_definition(gpointer data, gpointer user_data)
 {
@@ -55,3 +64,154 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
 
     return cpu_list;
 }
+
+static void riscv_check_if_cpu_available(RISCVCPU *cpu, Error **errp)
+{
+    if (!riscv_cpu_accelerator_compatible(cpu)) {
+        g_autofree char *name = riscv_cpu_get_name(cpu);
+        const char *accel = kvm_enabled() ? "kvm" : "tcg";
+
+        error_setg(errp, "'%s' CPU not available with %s", name, accel);
+        return;
+    }
+}
+
+static void riscv_obj_add_qdict_prop(Object *obj, QDict *qdict_out,
+                                     const char *name)
+{
+    ObjectProperty *prop = object_property_find(obj, name);
+
+    if (prop) {
+        QObject *value;
+
+        assert(prop->get);
+        value = object_property_get_qobject(obj, name, &error_abort);
+
+        qdict_put_obj(qdict_out, name, value);
+    }
+}
+
+static void riscv_obj_add_multiext_props(Object *obj, QDict *qdict_out,
+                                         const RISCVCPUMultiExtConfig *arr)
+{
+    for (int i = 0; arr[i].name != NULL; i++) {
+        riscv_obj_add_qdict_prop(obj, qdict_out, arr[i].name);
+    }
+}
+
+static void riscv_cpuobj_validate_qdict_in(Object *obj, QObject *props,
+                                           const QDict *qdict_in,
+                                           Error **errp)
+{
+    const QDictEntry *qe;
+    Visitor *visitor;
+    Error *local_err = NULL;
+
+    visitor = qobject_input_visitor_new(props);
+    if (!visit_start_struct(visitor, NULL, NULL, 0, &local_err)) {
+        goto err;
+    }
+
+    for (qe = qdict_first(qdict_in); qe; qe = qdict_next(qdict_in, qe)) {
+        object_property_find_err(obj, qe->key, &local_err);
+        if (local_err) {
+            goto err;
+        }
+
+        object_property_set(obj, qe->key, visitor, &local_err);
+        if (local_err) {
+            goto err;
+        }
+    }
+
+    visit_check_struct(visitor, &local_err);
+    if (local_err) {
+        goto err;
+    }
+
+    riscv_cpu_finalize_features(RISCV_CPU(obj), &local_err);
+    if (local_err) {
+        goto err;
+    }
+
+    visit_end_struct(visitor, NULL);
+
+err:
+    error_propagate(errp, local_err);
+    visit_free(visitor);
+}
+
+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                     CpuModelInfo *model,
+                                                     Error **errp)
+{
+    CpuModelExpansionInfo *expansion_info;
+    const QDict *qdict_in = NULL;
+    QDict *qdict_out;
+    ObjectClass *oc;
+    Object *obj;
+    Error *local_err = NULL;
+
+    if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
+        error_setg(errp, "The requested expansion type is not supported");
+        return NULL;
+    }
+
+    oc = cpu_class_by_name(TYPE_RISCV_CPU, model->name);
+    if (!oc) {
+        error_setg(errp, "The CPU type '%s' is not a known RISC-V CPU type",
+                   model->name);
+        return NULL;
+    }
+
+    if (model->props) {
+        qdict_in = qobject_to(QDict, model->props);
+        if (!qdict_in) {
+            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
+            return NULL;
+        }
+    }
+
+    obj = object_new(object_class_get_name(oc));
+
+    riscv_check_if_cpu_available(RISCV_CPU(obj), &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        object_unref(obj);
+        return NULL;
+    }
+
+    if (qdict_in) {
+        riscv_cpuobj_validate_qdict_in(obj, model->props, qdict_in,
+                                       &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            object_unref(obj);
+            return NULL;
+        }
+    }
+
+    expansion_info = g_new0(CpuModelExpansionInfo, 1);
+    expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
+    expansion_info->model->name = g_strdup(model->name);
+
+    qdict_out = qdict_new();
+
+    riscv_obj_add_multiext_props(obj, qdict_out, riscv_cpu_extensions);
+    riscv_obj_add_multiext_props(obj, qdict_out, riscv_cpu_experimental_exts);
+    riscv_obj_add_multiext_props(obj, qdict_out, riscv_cpu_vendor_exts);
+
+    /* Add our CPU boolean options too */
+    riscv_obj_add_qdict_prop(obj, qdict_out, "mmu");
+    riscv_obj_add_qdict_prop(obj, qdict_out, "pmp");
+
+    if (!qdict_size(qdict_out)) {
+        qobject_unref(qdict_out);
+    } else {
+        expansion_info->model->props = QOBJECT(qdict_out);
+    }
+
+    object_unref(obj);
+
+    return expansion_info;
+}
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index a28918ab30..08adad304d 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -278,23 +278,23 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         !(riscv_has_ext(env, RVI) && riscv_has_ext(env, RVM) &&
           riscv_has_ext(env, RVA) && riscv_has_ext(env, RVF) &&
           riscv_has_ext(env, RVD) &&
-          cpu->cfg.ext_icsr && cpu->cfg.ext_ifencei)) {
+          cpu->cfg.ext_zicsr && cpu->cfg.ext_zifencei)) {
 
-        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_icsr)) &&
-            !cpu->cfg.ext_icsr) {
+        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zicsr)) &&
+            !cpu->cfg.ext_zicsr) {
             error_setg(errp, "RVG requires Zicsr but user set Zicsr to false");
             return;
         }
 
-        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_ifencei)) &&
-            !cpu->cfg.ext_ifencei) {
+        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zifencei)) &&
+            !cpu->cfg.ext_zifencei) {
             error_setg(errp, "RVG requires Zifencei but user set "
                        "Zifencei to false");
             return;
         }
 
-        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_icsr), true);
-        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_ifencei), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zicsr), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zifencei), true);
 
         env->misa_ext |= RVI | RVM | RVA | RVF | RVD;
         env->misa_ext_mask |= RVI | RVM | RVA | RVF | RVD;
@@ -329,7 +329,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
-    if (riscv_has_ext(env, RVF) && !cpu->cfg.ext_icsr) {
+    if (riscv_has_ext(env, RVF) && !cpu->cfg.ext_zicsr) {
         error_setg(errp, "F extension requires Zicsr");
         return;
     }
@@ -434,7 +434,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
     }
 
     if (cpu->cfg.ext_zfinx) {
-        if (!cpu->cfg.ext_icsr) {
+        if (!cpu->cfg.ext_zicsr) {
             error_setg(errp, "Zfinx extension requires Zicsr");
             return;
         }
@@ -494,18 +494,60 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         return;
     }
 
-    if (cpu->cfg.ext_zcmt && !cpu->cfg.ext_icsr) {
+    if (cpu->cfg.ext_zcmt && !cpu->cfg.ext_zicsr) {
         error_setg(errp, "Zcmt extension requires Zicsr extension");
         return;
     }
 
     /*
+     * Shorthand vector crypto extensions
+     */
+    if (cpu->cfg.ext_zvknc) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
+    }
+
+    if (cpu->cfg.ext_zvkng) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true);
+    }
+
+    if (cpu->cfg.ext_zvkn) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkned), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvknhb), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true);
+    }
+
+    if (cpu->cfg.ext_zvksc) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
+    }
+
+    if (cpu->cfg.ext_zvksg) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true);
+    }
+
+    if (cpu->cfg.ext_zvks) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksed), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksh), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true);
+    }
+
+    if (cpu->cfg.ext_zvkt) {
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbb), true);
+        cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
+    }
+
+    /*
      * In principle Zve*x would also suffice here, were they supported
      * in qemu
      */
-    if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned ||
-         cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) &&
-        !cpu->cfg.ext_zve32f) {
+    if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkb || cpu->cfg.ext_zvkg ||
+         cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed ||
+         cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
         error_setg(errp,
                    "Vector crypto extensions require V or Zve* extensions");
         return;
@@ -541,6 +583,27 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
         cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksh), true);
     }
 
+    if (cpu->cfg.ext_zicntr && !cpu->cfg.ext_zicsr) {
+        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zicntr))) {
+            error_setg(errp, "zicntr requires zicsr");
+            return;
+        }
+        cpu->cfg.ext_zicntr = false;
+    }
+
+    if (cpu->cfg.ext_zihpm && !cpu->cfg.ext_zicsr) {
+        if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zihpm))) {
+            error_setg(errp, "zihpm requires zicsr");
+            return;
+        }
+        cpu->cfg.ext_zihpm = false;
+    }
+
+    if (!cpu->cfg.ext_zihpm) {
+        cpu->cfg.pmu_mask = 0;
+        cpu->pmu_avail_ctrs = 0;
+    }
+
     /*
      * Disable isa extensions based on priv spec after we
      * validated and set everything we need.
@@ -548,6 +611,44 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
     riscv_cpu_disable_priv_spec_isa_exts(cpu);
 }
 
+void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
+{
+    CPURISCVState *env = &cpu->env;
+    Error *local_err = NULL;
+
+    riscv_cpu_validate_priv_spec(cpu, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    riscv_cpu_validate_misa_priv(env, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (cpu->cfg.ext_smepmp && !cpu->cfg.pmp) {
+        /*
+         * Enhanced PMP should only be available
+         * on harts with PMP support
+         */
+        error_setg(errp, "Invalid configuration: Smepmp requires PMP support");
+        return;
+    }
+
+    riscv_cpu_validate_set_extensions(cpu, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+bool riscv_cpu_tcg_compatible(RISCVCPU *cpu)
+{
+    return object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST) == NULL;
+}
+
 static bool riscv_cpu_is_generic(Object *cpu_obj)
 {
     return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL;
@@ -563,10 +664,9 @@ static bool riscv_cpu_is_generic(Object *cpu_obj)
 static bool tcg_cpu_realize(CPUState *cs, Error **errp)
 {
     RISCVCPU *cpu = RISCV_CPU(cs);
-    CPURISCVState *env = &cpu->env;
     Error *local_err = NULL;
 
-    if (object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) {
+    if (!riscv_cpu_tcg_compatible(cpu)) {
         g_autofree char *name = riscv_cpu_get_name(cpu);
         error_setg(errp, "'%s' CPU is not compatible with TCG acceleration",
                    name);
@@ -579,46 +679,32 @@ static bool tcg_cpu_realize(CPUState *cs, Error **errp)
         return false;
     }
 
-    riscv_cpu_validate_priv_spec(cpu, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return false;
-    }
-
-    riscv_cpu_validate_misa_priv(env, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return false;
-    }
-
-    if (cpu->cfg.epmp && !cpu->cfg.pmp) {
-        /*
-         * Enhanced PMP should only be available
-         * on harts with PMP support
-         */
-        error_setg(errp, "Invalid configuration: EPMP requires PMP support");
-        return false;
-    }
-
-    riscv_cpu_validate_set_extensions(cpu, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return false;
-    }
-
 #ifndef CONFIG_USER_ONLY
+    CPURISCVState *env = &cpu->env;
+
     CPU(cs)->tcg_cflags |= CF_PCREL;
 
     if (cpu->cfg.ext_sstc) {
         riscv_timer_init(cpu);
     }
 
-    if (cpu->cfg.pmu_num) {
-        if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) && cpu->cfg.ext_sscofpmf) {
+    if (cpu->cfg.pmu_mask) {
+        riscv_pmu_init(cpu, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return false;
+        }
+
+        if (cpu->cfg.ext_sscofpmf) {
             cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                           riscv_pmu_timer_cb, cpu);
         }
-     }
+    }
+
+    /* With H-Ext, VSSIP, VSTIP, VSEIP and SGEIP are hardwired to one. */
+    if (riscv_has_ext(env, RVH)) {
+        env->mideleg = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP | MIP_SGEIP;
+    }
 #endif
 
     return true;
diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h
index 630184759d..f7b32417f8 100644
--- a/target/riscv/tcg/tcg-cpu.h
+++ b/target/riscv/tcg/tcg-cpu.h
@@ -23,5 +23,7 @@
 #include "cpu.h"
 
 void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp);
+void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp);
+bool riscv_cpu_tcg_compatible(RISCVCPU *cpu);
 
 #endif
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 98727ea53b..33f15a564a 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -71,7 +71,6 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_orc_i32          1
@@ -85,7 +84,6 @@ typedef enum {
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     1
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -108,7 +106,6 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_andc_i64         1
 #define TCG_TARGET_HAS_orc_i64          1
@@ -122,7 +119,6 @@ typedef enum {
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     1
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 311a985209..a712cc80ad 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -101,7 +101,6 @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_rot_i32          1
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_orc_i32          0
@@ -115,7 +114,6 @@ extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
 #define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
 #define TCG_TARGET_HAS_extract2_i32     1
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        1
 #define TCG_TARGET_HAS_muls2_i32        1
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 8417ea4899..fa34deec47 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -135,7 +135,6 @@ typedef enum {
 #define TCG_TARGET_HAS_ext16u_i32       1
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_andc_i32         have_bmi1
 #define TCG_TARGET_HAS_orc_i32          0
@@ -149,7 +148,6 @@ typedef enum {
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     1
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -172,7 +170,6 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap16_i64      1
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
 #define TCG_TARGET_HAS_andc_i64         have_bmi1
 #define TCG_TARGET_HAS_orc_i64          0
@@ -186,7 +183,6 @@ typedef enum {
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_extract2_i64     1
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
index 77d62e38e7..cae6c2aad6 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -38,4 +38,4 @@ C_O1_I2(w, w, wM)
 C_O1_I2(w, w, wA)
 C_O1_I3(w, w, w, w)
 C_O1_I4(r, rZ, rJ, rZ, rZ)
-C_O2_I1(r, r, r)
+C_N2_I1(r, r, r)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index ccf133db4b..a588fb3085 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -32,8 +32,6 @@
 #include "../tcg-ldst.c.inc"
 #include <asm/hwcap.h>
 
-bool use_lsx_instructions;
-
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "zero",
@@ -1103,13 +1101,18 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi
         }
     } else {
         /* Otherwise use a pair of LD/ST. */
-        tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index);
+        TCGReg base = h.base;
+        if (h.index != TCG_REG_ZERO) {
+            base = TCG_REG_TMP0;
+            tcg_out_opc_add_d(s, base, h.base, h.index);
+        }
         if (is_ld) {
-            tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0);
-            tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8);
+            tcg_debug_assert(base != data_lo);
+            tcg_out_opc_ld_d(s, data_lo, base, 0);
+            tcg_out_opc_ld_d(s, data_hi, base, 8);
         } else {
-            tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0);
-            tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8);
+            tcg_out_opc_st_d(s, data_lo, base, 0);
+            tcg_out_opc_st_d(s, data_hi, base, 8);
         }
     }
 
@@ -1438,6 +1441,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    case INDEX_op_neg_i32:
+        tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1);
+        break;
+    case INDEX_op_neg_i64:
+        tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1);
+        break;
+
     case INDEX_op_mul_i32:
         tcg_out_opc_mul_w(s, a0, a1, a2);
         break;
@@ -2049,7 +2059,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
     case INDEX_op_qemu_ld_a32_i128:
     case INDEX_op_qemu_ld_a64_i128:
-        return C_O2_I1(r, r, r);
+        return C_N2_I1(r, r, r);
 
     case INDEX_op_qemu_st_a32_i128:
     case INDEX_op_qemu_st_a64_i128:
@@ -2073,6 +2083,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_extrl_i64_i32:
     case INDEX_op_extrh_i64_i32:
     case INDEX_op_ext_i32_i64:
+    case INDEX_op_neg_i32:
+    case INDEX_op_neg_i64:
     case INDEX_op_not_i32:
     case INDEX_op_not_i64:
     case INDEX_op_extract_i32:
@@ -2309,10 +2321,6 @@ static void tcg_target_init(TCGContext *s)
         exit(EXIT_FAILURE);
     }
 
-    if (hwcap & HWCAP_LOONGARCH_LSX) {
-        use_lsx_instructions = 1;
-    }
-
     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
     tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
@@ -2328,7 +2336,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
 
-    if (use_lsx_instructions) {
+    if (cpuinfo & CPUINFO_LSX) {
         tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
         tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
         tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 03017672f6..9c70ebfefc 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -29,6 +29,8 @@
 #ifndef LOONGARCH_TCG_TARGET_H
 #define LOONGARCH_TCG_TARGET_H
 
+#include "host/cpuinfo.h"
+
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_NB_REGS 64
 
@@ -85,8 +87,6 @@ typedef enum {
     TCG_VEC_TMP0 = TCG_REG_V23,
 } TCGReg;
 
-extern bool use_lsx_instructions;
-
 /* used for function call generation */
 #define TCG_REG_CALL_STACK              TCG_REG_SP
 #define TCG_TARGET_STACK_ALIGN          16
@@ -97,7 +97,6 @@ extern bool use_lsx_instructions;
 #define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
 
 /* optional instructions */
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   0
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_rem_i32          1
@@ -120,7 +119,6 @@ extern bool use_lsx_instructions;
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i32          0
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_eqv_i32          0
@@ -134,7 +132,6 @@ extern bool use_lsx_instructions;
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
 /* 64-bit operations */
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   0
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
@@ -155,7 +152,6 @@ extern bool use_lsx_instructions;
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_neg_i64          0
 #define TCG_TARGET_HAS_andc_i64         1
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_eqv_i64          0
@@ -171,10 +167,10 @@ extern bool use_lsx_instructions;
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_qemu_ldst_i128   use_lsx_instructions
+#define TCG_TARGET_HAS_qemu_ldst_i128   (cpuinfo & CPUINFO_LSX)
 
 #define TCG_TARGET_HAS_v64              0
-#define TCG_TARGET_HAS_v128             use_lsx_instructions
+#define TCG_TARGET_HAS_v128             (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v256             0
 
 #define TCG_TARGET_HAS_not_vec          1
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 328984ccff..8328dbdecc 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -871,71 +871,83 @@ static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al,
     }
 }
 
-/* Bit 0 set if inversion required; bit 1 set if swapping required.  */
-#define MIPS_CMP_INV  1
-#define MIPS_CMP_SWAP 2
-
-static const uint8_t mips_cmp_map[16] = {
-    [TCG_COND_LT]  = 0,
-    [TCG_COND_LTU] = 0,
-    [TCG_COND_GE]  = MIPS_CMP_INV,
-    [TCG_COND_GEU] = MIPS_CMP_INV,
-    [TCG_COND_LE]  = MIPS_CMP_INV | MIPS_CMP_SWAP,
-    [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP,
-    [TCG_COND_GT]  = MIPS_CMP_SWAP,
-    [TCG_COND_GTU] = MIPS_CMP_SWAP,
-};
+#define SETCOND_INV    TCG_TARGET_NB_REGS
+#define SETCOND_NEZ    (SETCOND_INV << 1)
+#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
 
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
-                            TCGReg arg1, TCGReg arg2)
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
+                               TCGReg arg1, TCGReg arg2)
 {
-    MIPSInsn s_opc = OPC_SLTU;
-    int cmp_map;
+    int flags = 0;
 
     switch (cond) {
-    case TCG_COND_EQ:
-        if (arg2 != 0) {
-            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
-            arg1 = ret;
-        }
-        tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1);
+    case TCG_COND_EQ:    /* -> NE  */
+    case TCG_COND_GE:    /* -> LT  */
+    case TCG_COND_GEU:   /* -> LTU */
+    case TCG_COND_LE:    /* -> GT  */
+    case TCG_COND_LEU:   /* -> GTU */
+        cond = tcg_invert_cond(cond);
+        flags ^= SETCOND_INV;
         break;
+    default:
+        break;
+    }
 
+    switch (cond) {
     case TCG_COND_NE:
-        if (arg2 != 0) {
-            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
-            arg1 = ret;
+        flags |= SETCOND_NEZ;
+        if (arg2 == 0) {
+            return arg1 | flags;
         }
-        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1);
+        tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
         break;
-
     case TCG_COND_LT:
-    case TCG_COND_GE:
-    case TCG_COND_LE:
-    case TCG_COND_GT:
-        s_opc = OPC_SLT;
-        /* FALLTHRU */
-
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+        break;
     case TCG_COND_LTU:
-    case TCG_COND_GEU:
-    case TCG_COND_LEU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+        break;
+    case TCG_COND_GT:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+        break;
     case TCG_COND_GTU:
-        cmp_map = mips_cmp_map[cond];
-        if (cmp_map & MIPS_CMP_SWAP) {
-            TCGReg t = arg1;
-            arg1 = arg2;
-            arg2 = t;
-        }
-        tcg_out_opc_reg(s, s_opc, ret, arg1, arg2);
-        if (cmp_map & MIPS_CMP_INV) {
-            tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-        }
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
         break;
+    default:
+        g_assert_not_reached();
+    }
+    return ret | flags;
+}
+
+static void tcg_out_setcond_end(TCGContext *s, TCGReg ret, int tmpflags)
+{
+    if (tmpflags != ret) {
+        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
 
-     default:
-         g_assert_not_reached();
-         break;
-     }
+        switch (tmpflags & SETCOND_FLAGS) {
+        case SETCOND_INV:
+            /* Intermediate result is boolean: simply invert. */
+            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
+            break;
+        case SETCOND_NEZ:
+            /* Intermediate result is zero/non-zero: test != 0. */
+            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
+            break;
+        case SETCOND_NEZ | SETCOND_INV:
+            /* Intermediate result is zero/non-zero: test == 0. */
+            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg arg1, TCGReg arg2)
+{
+    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2);
+    tcg_out_setcond_end(s, ret, tmpflags);
 }
 
 static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
@@ -948,9 +960,7 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
         [TCG_COND_GE] = OPC_BGEZ,
     };
 
-    MIPSInsn s_opc = OPC_SLTU;
-    MIPSInsn b_opc;
-    int cmp_map;
+    MIPSInsn b_opc = 0;
 
     switch (cond) {
     case TCG_COND_EQ:
@@ -959,7 +969,6 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
     case TCG_COND_NE:
         b_opc = OPC_BNE;
         break;
-
     case TCG_COND_LT:
     case TCG_COND_GT:
     case TCG_COND_LE:
@@ -968,133 +977,76 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
             b_opc = b_zero[cond];
             arg2 = arg1;
             arg1 = 0;
-            break;
         }
-        s_opc = OPC_SLT;
-        /* FALLTHRU */
-
-    case TCG_COND_LTU:
-    case TCG_COND_GTU:
-    case TCG_COND_LEU:
-    case TCG_COND_GEU:
-        cmp_map = mips_cmp_map[cond];
-        if (cmp_map & MIPS_CMP_SWAP) {
-            TCGReg t = arg1;
-            arg1 = arg2;
-            arg2 = t;
-        }
-        tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2);
-        b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE);
-        arg1 = TCG_TMP0;
-        arg2 = TCG_REG_ZERO;
         break;
-
     default:
-        g_assert_not_reached();
         break;
     }
 
-    tcg_out_opc_br(s, b_opc, arg1, arg2);
-    tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
-    tcg_out_nop(s);
-}
+    if (b_opc == 0) {
+        int tmpflags = tcg_out_setcond_int(s, cond, TCG_TMP0, arg1, arg2);
 
-static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1,
-                                 TCGReg al, TCGReg ah,
-                                 TCGReg bl, TCGReg bh)
-{
-    /* Merge highpart comparison into AH.  */
-    if (bh != 0) {
-        if (ah != 0) {
-            tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh);
-            ah = tmp0;
-        } else {
-            ah = bh;
-        }
-    }
-    /* Merge lowpart comparison into AL.  */
-    if (bl != 0) {
-        if (al != 0) {
-            tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl);
-            al = tmp1;
-        } else {
-            al = bl;
-        }
-    }
-    /* Merge high and low part comparisons into AL.  */
-    if (ah != 0) {
-        if (al != 0) {
-            tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al);
-            al = tmp0;
-        } else {
-            al = ah;
-        }
+        arg2 = TCG_REG_ZERO;
+        arg1 = tmpflags & ~SETCOND_FLAGS;
+        b_opc = tmpflags & SETCOND_INV ? OPC_BEQ : OPC_BNE;
     }
-    return al;
+
+    tcg_out_reloc(s, s->code_ptr, R_MIPS_PC16, l, 0);
+    tcg_out_opc_br(s, b_opc, arg1, arg2);
+    tcg_out_nop(s);
 }
 
-static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
-                             TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret,
+                                TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
 {
-    TCGReg tmp0 = TCG_TMP0;
-    TCGReg tmp1 = ret;
-
-    tcg_debug_assert(ret != TCG_TMP0);
-    if (ret == ah || ret == bh) {
-        tcg_debug_assert(ret != TCG_TMP1);
-        tmp1 = TCG_TMP1;
-    }
+    int flags = 0;
 
     switch (cond) {
     case TCG_COND_EQ:
+        flags |= SETCOND_INV;
+        /* fall through */
     case TCG_COND_NE:
-        tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh);
-        tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO);
+        flags |= SETCOND_NEZ;
+        tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, al, bl);
+        tcg_out_opc_reg(s, OPC_XOR, TCG_TMP1, ah, bh);
+        tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1);
         break;
 
     default:
-        tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh);
-        tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl);
-        tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0);
-        tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh);
-        tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0);
+        tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP0, ah, bh);
+        tcg_out_setcond(s, tcg_unsigned_cond(cond), TCG_TMP1, al, bl);
+        tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP0);
+        tcg_out_setcond(s, tcg_high_cond(cond), TCG_TMP0, ah, bh);
+        tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1);
         break;
     }
+    return ret | flags;
+}
+
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+                             TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+{
+    int tmpflags = tcg_out_setcond2_int(s, cond, ret, al, ah, bl, bh);
+    tcg_out_setcond_end(s, ret, tmpflags);
 }
 
 static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
                             TCGReg bl, TCGReg bh, TCGLabel *l)
 {
-    TCGCond b_cond = TCG_COND_NE;
-    TCGReg tmp = TCG_TMP1;
-
-    /* With branches, we emit between 4 and 9 insns with 2 or 3 branches.
-       With setcond, we emit between 3 and 10 insns and only 1 branch,
-       which ought to get better branch prediction.  */
-     switch (cond) {
-     case TCG_COND_EQ:
-     case TCG_COND_NE:
-        b_cond = cond;
-        tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh);
-        break;
-
-    default:
-        /* Minimize code size by preferring a compare not requiring INV.  */
-        if (mips_cmp_map[cond] & MIPS_CMP_INV) {
-            cond = tcg_invert_cond(cond);
-            b_cond = TCG_COND_EQ;
-        }
-        tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh);
-        break;
-    }
+    int tmpflags = tcg_out_setcond2_int(s, cond, TCG_TMP0, al, ah, bl, bh);
+    TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
+    MIPSInsn b_opc = tmpflags & SETCOND_INV ? OPC_BEQ : OPC_BNE;
 
-    tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l);
+    tcg_out_reloc(s, s->code_ptr, R_MIPS_PC16, l, 0);
+    tcg_out_opc_br(s, b_opc, tmp, TCG_REG_ZERO);
+    tcg_out_nop(s);
 }
 
 static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
                             TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2)
 {
-    bool eqz = false;
+    int tmpflags;
+    bool eqz;
 
     /* If one of the values is zero, put it last to match SEL*Z instructions */
     if (use_mips32r6_instructions && v1 == 0) {
@@ -1103,27 +1055,9 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
         cond = tcg_invert_cond(cond);
     }
 
-    switch (cond) {
-    case TCG_COND_EQ:
-        eqz = true;
-        /* FALLTHRU */
-    case TCG_COND_NE:
-        if (c2 != 0) {
-            tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2);
-            c1 = TCG_TMP0;
-        }
-        break;
-
-    default:
-        /* Minimize code size by preferring a compare not requiring INV.  */
-        if (mips_cmp_map[cond] & MIPS_CMP_INV) {
-            cond = tcg_invert_cond(cond);
-            eqz = true;
-        }
-        tcg_out_setcond(s, cond, TCG_TMP0, c1, c2);
-        c1 = TCG_TMP0;
-        break;
-    }
+    tmpflags = tcg_out_setcond_int(s, cond, TCG_TMP0, c1, c2);
+    c1 = tmpflags & ~SETCOND_FLAGS;
+    eqz = tmpflags & SETCOND_INV;
 
     if (use_mips32r6_instructions) {
         MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ;
@@ -1136,13 +1070,22 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
         if (v2 != 0) {
             tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
         }
-    } else {
-        MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN;
+        return;
+    }
 
-        tcg_out_opc_reg(s, m_opc, ret, v1, c1);
+    /* This should be guaranteed via constraints */
+    tcg_debug_assert(v2 == ret);
 
-        /* This should be guaranteed via constraints */
-        tcg_debug_assert(v2 == ret);
+    if (use_movnz_instructions) {
+        MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN;
+        tcg_out_opc_reg(s, m_opc, ret, v1, c1);
+    } else {
+        /* Invert the condition in order to branch over the move. */
+        MIPSInsn b_opc = eqz ? OPC_BNE : OPC_BEQ;
+        tcg_out_opc_imm(s, b_opc, c1, TCG_REG_ZERO, 2);
+        tcg_out_nop(s);
+        /* Open-code tcg_out_mov, without the nop-move check. */
+        tcg_out_opc_reg(s, OPC_OR, ret, v1, TCG_REG_ZERO);
     }
 }
 
@@ -1977,6 +1920,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0);
         break;
 
+    case INDEX_op_neg_i32:
+        i1 = OPC_SUBU;
+        goto do_unary;
+    case INDEX_op_neg_i64:
+        i1 = OPC_DSUBU;
+        goto do_unary;
     case INDEX_op_not_i32:
     case INDEX_op_not_i64:
         i1 = OPC_NOR;
@@ -2201,6 +2150,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_ld16u_i32:
     case INDEX_op_ld16s_i32:
     case INDEX_op_ld_i32:
+    case INDEX_op_neg_i32:
     case INDEX_op_not_i32:
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap32_i32:
@@ -2214,6 +2164,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_ld32s_i64:
     case INDEX_op_ld32u_i64:
     case INDEX_op_ld_i64:
+    case INDEX_op_neg_i64:
     case INDEX_op_not_i64:
     case INDEX_op_bswap16_i64:
     case INDEX_op_bswap32_i64:
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c0576f66d7..b98ffae1d0 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -154,7 +154,6 @@ extern bool use_mips32r2_instructions;
 #endif
 
 /* optional instructions detected at runtime */
-#define TCG_TARGET_HAS_movcond_i32      use_movnz_instructions
 #define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions
 #define TCG_TARGET_HAS_extract_i32      use_mips32r2_instructions
@@ -169,7 +168,6 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_movcond_i64      use_movnz_instructions
 #define TCG_TARGET_HAS_bswap16_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_bswap32_i64      use_mips32r2_instructions
 #define TCG_TARGET_HAS_bswap64_i64      use_mips32r2_instructions
@@ -186,12 +184,10 @@ extern bool use_mips32r2_instructions;
 #endif
 
 /* optional instructions automatically implemented */
-#define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
 #define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
 
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_neg_i64          0 /* sub  rd, zero, rt   */
 #define TCG_TARGET_HAS_ext8u_i64        0 /* andi rt, rs, 0xff   */
 #define TCG_TARGET_HAS_ext16u_i64       0 /* andi rt, rs, 0xffff */
 #endif
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2db5177c32..f2d01654c5 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -25,6 +25,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/int128.h"
+#include "qemu/interval-tree.h"
 #include "tcg/tcg-op-common.h"
 #include "tcg-internal.h"
 
@@ -37,10 +38,18 @@
         glue(glue(case INDEX_op_, x), _i64):    \
         glue(glue(case INDEX_op_, x), _vec)
 
+typedef struct MemCopyInfo {
+    IntervalTreeNode itree;
+    QSIMPLEQ_ENTRY (MemCopyInfo) next;
+    TCGTemp *ts;
+    TCGType type;
+} MemCopyInfo;
+
 typedef struct TempOptInfo {
     bool is_const;
     TCGTemp *prev_copy;
     TCGTemp *next_copy;
+    QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
     uint64_t val;
     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
@@ -51,6 +60,9 @@ typedef struct OptContext {
     TCGOp *prev_mb;
     TCGTempSet temps_used;
 
+    IntervalTreeRoot mem_copy;
+    QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
+
     /* In flight values from optimization. */
     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
@@ -122,25 +134,9 @@ static inline bool ts_is_copy(TCGTemp *ts)
     return ts_info(ts)->next_copy != ts;
 }
 
-/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
-static void reset_ts(TCGTemp *ts)
-{
-    TempOptInfo *ti = ts_info(ts);
-    TempOptInfo *pi = ts_info(ti->prev_copy);
-    TempOptInfo *ni = ts_info(ti->next_copy);
-
-    ni->prev_copy = ti->prev_copy;
-    pi->next_copy = ti->next_copy;
-    ti->next_copy = ts;
-    ti->prev_copy = ts;
-    ti->is_const = false;
-    ti->z_mask = -1;
-    ti->s_mask = 0;
-}
-
-static void reset_temp(TCGArg arg)
+static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
 {
-    reset_ts(arg_temp(arg));
+    return a->kind < b->kind ? b : a;
 }
 
 /* Initialize and activate a temporary.  */
@@ -162,6 +158,7 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 
     ti->next_copy = ts;
     ti->prev_copy = ts;
+    QSIMPLEQ_INIT(&ti->mem_copy);
     if (ts->kind == TEMP_CONST) {
         ti->is_const = true;
         ti->val = ts->val;
@@ -174,30 +171,133 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
     }
 }
 
-static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
+static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
+{
+    IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
+    return r ? container_of(r, MemCopyInfo, itree) : NULL;
+}
+
+static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
+{
+    IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
+    return r ? container_of(r, MemCopyInfo, itree) : NULL;
+}
+
+static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
+{
+    TCGTemp *ts = mc->ts;
+    TempOptInfo *ti = ts_info(ts);
+
+    interval_tree_remove(&mc->itree, &ctx->mem_copy);
+    QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
+    QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
+}
+
+static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
+{
+    while (true) {
+        MemCopyInfo *mc = mem_copy_first(ctx, s, l);
+        if (!mc) {
+            break;
+        }
+        remove_mem_copy(ctx, mc);
+    }
+}
+
+static void remove_mem_copy_all(OptContext *ctx)
+{
+    remove_mem_copy_in(ctx, 0, -1);
+    tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
+}
+
+static TCGTemp *find_better_copy(TCGTemp *ts)
 {
-    TCGTemp *i, *g, *l;
+    TCGTemp *i, *ret;
 
     /* If this is already readonly, we can't do better. */
     if (temp_readonly(ts)) {
         return ts;
     }
 
-    g = l = NULL;
+    ret = ts;
     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
-        if (temp_readonly(i)) {
-            return i;
-        } else if (i->kind > ts->kind) {
-            if (i->kind == TEMP_GLOBAL) {
-                g = i;
-            } else if (i->kind == TEMP_TB) {
-                l = i;
+        ret = cmp_better_copy(ret, i);
+    }
+    return ret;
+}
+
+static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
+{
+    TempOptInfo *si = ts_info(src_ts);
+    TempOptInfo *di = ts_info(dst_ts);
+    MemCopyInfo *mc;
+
+    QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
+        tcg_debug_assert(mc->ts == src_ts);
+        mc->ts = dst_ts;
+    }
+    QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
+}
+
+/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
+static void reset_ts(OptContext *ctx, TCGTemp *ts)
+{
+    TempOptInfo *ti = ts_info(ts);
+    TCGTemp *pts = ti->prev_copy;
+    TCGTemp *nts = ti->next_copy;
+    TempOptInfo *pi = ts_info(pts);
+    TempOptInfo *ni = ts_info(nts);
+
+    ni->prev_copy = ti->prev_copy;
+    pi->next_copy = ti->next_copy;
+    ti->next_copy = ts;
+    ti->prev_copy = ts;
+    ti->is_const = false;
+    ti->z_mask = -1;
+    ti->s_mask = 0;
+
+    if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
+        if (ts == nts) {
+            /* Last temp copy being removed, the mem copies die. */
+            MemCopyInfo *mc;
+            QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
+                interval_tree_remove(&mc->itree, &ctx->mem_copy);
             }
+            QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
+        } else {
+            move_mem_copies(find_better_copy(nts), ts);
         }
     }
+}
 
-    /* If we didn't find a better representation, return the same temp. */
-    return g ? g : l ? l : ts;
+static void reset_temp(OptContext *ctx, TCGArg arg)
+{
+    reset_ts(ctx, arg_temp(arg));
+}
+
+static void record_mem_copy(OptContext *ctx, TCGType type,
+                            TCGTemp *ts, intptr_t start, intptr_t last)
+{
+    MemCopyInfo *mc;
+    TempOptInfo *ti;
+
+    mc = QSIMPLEQ_FIRST(&ctx->mem_free);
+    if (mc) {
+        QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
+    } else {
+        mc = tcg_malloc(sizeof(*mc));
+    }
+
+    memset(mc, 0, sizeof(*mc));
+    mc->itree.start = start;
+    mc->itree.last = last;
+    mc->type = type;
+    interval_tree_insert(&mc->itree, &ctx->mem_copy);
+
+    ts = find_better_copy(ts);
+    ti = ts_info(ts);
+    mc->ts = ts;
+    QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
 }
 
 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
@@ -226,6 +326,33 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 }
 
+static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
+{
+    MemCopyInfo *mc;
+
+    for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
+        if (mc->itree.start == s && mc->type == type) {
+            return find_better_copy(mc->ts);
+        }
+    }
+    return NULL;
+}
+
+static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
+{
+    TCGType type = ctx->type;
+    TCGTemp *ts;
+
+    if (type == TCG_TYPE_I32) {
+        val = (int32_t)val;
+    }
+
+    ts = tcg_constant_internal(type, val);
+    init_ts_info(ctx, ts);
+
+    return temp_arg(ts);
+}
+
 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 {
     TCGTemp *dst_ts = arg_temp(dst);
@@ -239,7 +366,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
         return true;
     }
 
-    reset_ts(dst_ts);
+    reset_ts(ctx, dst_ts);
     di = ts_info(dst_ts);
     si = ts_info(src_ts);
 
@@ -275,6 +402,11 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
         si->next_copy = dst_ts;
         di->is_const = si->is_const;
         di->val = si->val;
+
+        if (!QSIMPLEQ_EMPTY(&si->mem_copy)
+            && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
+            move_mem_copies(dst_ts, src_ts);
+        }
     }
     return true;
 }
@@ -282,16 +414,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
                              TCGArg dst, uint64_t val)
 {
-    TCGTemp *tv;
-
-    if (ctx->type == TCG_TYPE_I32) {
-        val = (int32_t)val;
-    }
-
     /* Convert movi to mov with constant temp. */
-    tv = tcg_constant_internal(ctx->type, val);
-    init_ts_info(ctx, tv);
-    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+    return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
 }
 
 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
@@ -672,12 +796,10 @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 static void copy_propagate(OptContext *ctx, TCGOp *op,
                            int nb_oargs, int nb_iargs)
 {
-    TCGContext *s = ctx->tcg;
-
     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
         TCGTemp *ts = arg_temp(op->args[i]);
         if (ts_is_copy(ts)) {
-            op->args[i] = temp_arg(find_better_copy(s, ts));
+            op->args[i] = temp_arg(find_better_copy(ts));
         }
     }
 }
@@ -695,6 +817,7 @@ static void finish_folding(OptContext *ctx, TCGOp *op)
         ctx->prev_mb = NULL;
         if (!(def->flags & TCG_OPF_COND_BRANCH)) {
             memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+            remove_mem_copy_all(ctx);
         }
         return;
     }
@@ -702,7 +825,7 @@ static void finish_folding(OptContext *ctx, TCGOp *op)
     nb_oargs = def->nb_oargs;
     for (i = 0; i < nb_oargs; i++) {
         TCGTemp *ts = arg_temp(op->args[i]);
-        reset_ts(ts);
+        reset_ts(ctx, ts);
         /*
          * Save the corresponding known-zero/sign bits mask for the
          * first output argument (only one supported so far).
@@ -921,8 +1044,10 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op)
 
 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
-    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
-        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+    bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
+    bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
+
+    if (a_const && b_const) {
         uint64_t al = arg_info(op->args[2])->val;
         uint64_t ah = arg_info(op->args[3])->val;
         uint64_t bl = arg_info(op->args[4])->val;
@@ -966,6 +1091,21 @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
         tcg_opt_gen_movi(ctx, op2, rh, ah);
         return true;
     }
+
+    /* Fold sub2 r,x,i to add2 r,x,-i */
+    if (!add && b_const) {
+        uint64_t bl = arg_info(op->args[4])->val;
+        uint64_t bh = arg_info(op->args[5])->val;
+
+        /* Negate the two parts without assembling and disassembling. */
+        bl = -bl;
+        bh = ~bh + !bl;
+
+        op->opc = (ctx->type == TCG_TYPE_I32
+                   ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
+        op->args[4] = arg_new_constant(ctx, bl);
+        op->args[5] = arg_new_constant(ctx, bh);
+    }
     return false;
 }
 
@@ -1215,14 +1355,19 @@ static bool fold_call(OptContext *ctx, TCGOp *op)
 
         for (i = 0; i < nb_globals; i++) {
             if (test_bit(i, ctx->temps_used.l)) {
-                reset_ts(&ctx->tcg->temps[i]);
+                reset_ts(ctx, &ctx->tcg->temps[i]);
             }
         }
     }
 
+    /* If the function has side effects, reset mem data. */
+    if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
+        remove_mem_copy_all(ctx);
+    }
+
     /* Reset temp data for outputs. */
     for (i = 0; i < nb_oargs; i++) {
-        reset_temp(op->args[i]);
+        reset_temp(ctx, op->args[i]);
     }
 
     /* Stop optimizing MB across calls. */
@@ -1310,7 +1455,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
 
         op->opc = and_opc;
         op->args[1] = op->args[2];
-        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
+        op->args[2] = arg_new_constant(ctx, mask);
         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
         return false;
     }
@@ -1321,7 +1466,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
         uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
 
         op->opc = and_opc;
-        op->args[2] = temp_arg(tcg_constant_internal(ctx->type, mask));
+        op->args[2] = arg_new_constant(ctx, mask);
         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
         return false;
     }
@@ -2001,11 +2146,11 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
     switch (ctx->type) {
     case TCG_TYPE_I32:
         neg_op = INDEX_op_neg_i32;
-        have_neg = TCG_TARGET_HAS_neg_i32;
+        have_neg = true;
         break;
     case TCG_TYPE_I64:
         neg_op = INDEX_op_neg_i64;
-        have_neg = TCG_TARGET_HAS_neg_i64;
+        have_neg = true;
         break;
     case TCG_TYPE_V64:
     case TCG_TYPE_V128:
@@ -2038,7 +2183,19 @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
 
 static bool fold_sub(OptContext *ctx, TCGOp *op)
 {
-    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+    if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
+        return true;
+    }
+
+    /* Fold sub r,x,i to add r,x,-i */
+    if (arg_is_const(op->args[2])) {
+        uint64_t val = arg_info(op->args[2])->val;
+
+        op->opc = (ctx->type == TCG_TYPE_I32
+                   ? INDEX_op_add_i32 : INDEX_op_add_i64);
+        op->args[2] = arg_new_constant(ctx, -val);
+    }
+    return false;
 }
 
 static bool fold_sub2(OptContext *ctx, TCGOp *op)
@@ -2077,6 +2234,96 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
+{
+    TCGTemp *dst, *src;
+    intptr_t ofs;
+    TCGType type;
+
+    if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+        return false;
+    }
+
+    type = ctx->type;
+    ofs = op->args[2];
+    dst = arg_temp(op->args[0]);
+    src = find_mem_copy_for(ctx, type, ofs);
+    if (src && src->base_type == type) {
+        return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
+    }
+
+    reset_ts(ctx, dst);
+    record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
+    return true;
+}
+
+static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
+{
+    intptr_t ofs = op->args[2];
+    intptr_t lm1;
+
+    if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+        remove_mem_copy_all(ctx);
+        return false;
+    }
+
+    switch (op->opc) {
+    CASE_OP_32_64(st8):
+        lm1 = 0;
+        break;
+    CASE_OP_32_64(st16):
+        lm1 = 1;
+        break;
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i32:
+        lm1 = 3;
+        break;
+    case INDEX_op_st_i64:
+        lm1 = 7;
+        break;
+    case INDEX_op_st_vec:
+        lm1 = tcg_type_size(ctx->type) - 1;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    remove_mem_copy_in(ctx, ofs, ofs + lm1);
+    return false;
+}
+
+static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
+{
+    TCGTemp *src;
+    intptr_t ofs, last;
+    TCGType type;
+
+    if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+        fold_tcg_st(ctx, op);
+        return false;
+    }
+
+    src = arg_temp(op->args[0]);
+    ofs = op->args[2];
+    type = ctx->type;
+
+    /*
+     * Eliminate duplicate stores of a constant.
+     * This happens frequently when the target ISA zero-extends.
+     */
+    if (ts_is_const(src)) {
+        TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
+        if (src == prev) {
+            tcg_op_remove(ctx->tcg, op);
+            return true;
+        }
+    }
+
+    last = ofs + tcg_type_size(type) - 1;
+    remove_mem_copy_in(ctx, ofs, last);
+    record_mem_copy(ctx, type, src, ofs, last);
+    return false;
+}
+
 static bool fold_xor(OptContext *ctx, TCGOp *op)
 {
     if (fold_const2_commutative(ctx, op) ||
@@ -2100,6 +2347,8 @@ void tcg_optimize(TCGContext *s)
     TCGOp *op, *op_next;
     OptContext ctx = { .tcg = s };
 
+    QSIMPLEQ_INIT(&ctx.mem_free);
+
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
        If this temp is a copy of other ones then the other copies are
@@ -2221,6 +2470,21 @@ void tcg_optimize(TCGContext *s)
         case INDEX_op_ld32u_i64:
             done = fold_tcg_ld(&ctx, op);
             break;
+        case INDEX_op_ld_i32:
+        case INDEX_op_ld_i64:
+        case INDEX_op_ld_vec:
+            done = fold_tcg_ld_memcopy(&ctx, op);
+            break;
+        CASE_OP_32_64(st8):
+        CASE_OP_32_64(st16):
+        case INDEX_op_st32_i64:
+            done = fold_tcg_st(&ctx, op);
+            break;
+        case INDEX_op_st_i32:
+        case INDEX_op_st_i64:
+        case INDEX_op_st_vec:
+            done = fold_tcg_st_memcopy(&ctx, op);
+            break;
         case INDEX_op_mb:
             done = fold_mb(&ctx, op);
             break;
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 8bfb14998e..5295e4f9ab 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -83,7 +83,6 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap16_i32      1
 #define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_eqv_i32          1
@@ -96,7 +95,6 @@ typedef enum {
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_mulu2_i32        0
 #define TCG_TARGET_HAS_muls2_i32        0
@@ -121,7 +119,6 @@ typedef enum {
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_andc_i64         1
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_eqv_i64          1
@@ -134,7 +131,6 @@ typedef enum {
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index c1132d178f..a4edc3dc74 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -87,7 +87,6 @@ extern bool have_zbb;
 #endif
 
 /* optional instructions */
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_rem_i32          1
@@ -110,7 +109,6 @@ extern bool have_zbb;
 #define TCG_TARGET_HAS_bswap16_i32      have_zbb
 #define TCG_TARGET_HAS_bswap32_i32      have_zbb
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_andc_i32         have_zbb
 #define TCG_TARGET_HAS_orc_i32          have_zbb
 #define TCG_TARGET_HAS_eqv_i32          have_zbb
@@ -123,7 +121,6 @@ extern bool have_zbb;
 #define TCG_TARGET_HAS_setcond2         1
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
@@ -144,7 +141,6 @@ extern bool have_zbb;
 #define TCG_TARGET_HAS_bswap32_i64      have_zbb
 #define TCG_TARGET_HAS_bswap64_i64      have_zbb
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_andc_i64         have_zbb
 #define TCG_TARGET_HAS_orc_i64          have_zbb
 #define TCG_TARGET_HAS_eqv_i64          have_zbb
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 50e12ef9d6..e69b0d2ddd 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -82,7 +82,6 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_bswap16_i32    1
 #define TCG_TARGET_HAS_bswap32_i32    1
 #define TCG_TARGET_HAS_not_i32        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_neg_i32        1
 #define TCG_TARGET_HAS_andc_i32       HAVE_FACILITY(MISC_INSN_EXT3)
 #define TCG_TARGET_HAS_orc_i32        HAVE_FACILITY(MISC_INSN_EXT3)
 #define TCG_TARGET_HAS_eqv_i32        HAVE_FACILITY(MISC_INSN_EXT3)
@@ -95,7 +94,6 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_extract_i32    1
 #define TCG_TARGET_HAS_sextract_i32   0
 #define TCG_TARGET_HAS_extract2_i32   0
-#define TCG_TARGET_HAS_movcond_i32    1
 #define TCG_TARGET_HAS_negsetcond_i32 1
 #define TCG_TARGET_HAS_add2_i32       1
 #define TCG_TARGET_HAS_sub2_i32       1
@@ -118,7 +116,6 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_bswap32_i64    1
 #define TCG_TARGET_HAS_bswap64_i64    1
 #define TCG_TARGET_HAS_not_i64        HAVE_FACILITY(MISC_INSN_EXT3)
-#define TCG_TARGET_HAS_neg_i64        1
 #define TCG_TARGET_HAS_andc_i64       HAVE_FACILITY(MISC_INSN_EXT3)
 #define TCG_TARGET_HAS_orc_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 #define TCG_TARGET_HAS_eqv_i64        HAVE_FACILITY(MISC_INSN_EXT3)
@@ -131,7 +128,6 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_extract_i64    1
 #define TCG_TARGET_HAS_sextract_i64   0
 #define TCG_TARGET_HAS_extract2_i64   0
-#define TCG_TARGET_HAS_movcond_i64    1
 #define TCG_TARGET_HAS_negsetcond_i64 1
 #define TCG_TARGET_HAS_add2_i64       1
 #define TCG_TARGET_HAS_sub2_i64       1
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 19d9df4a09..a91defd0ac 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -529,6 +529,11 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
     tcg_out_ext32u(s, rd, rs);
 }
 
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_out_ext32u(s, rd, rs);
+}
+
 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
 {
     return false;
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
index 5cfc4b4679..f8cf145266 100644
--- a/tcg/sparc64/tcg-target.h
+++ b/tcg/sparc64/tcg-target.h
@@ -91,7 +91,6 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_ext16u_i32       0
 #define TCG_TARGET_HAS_bswap16_i32      0
 #define TCG_TARGET_HAS_bswap32_i32      0
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_orc_i32          1
@@ -105,7 +104,6 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_extract_i32      0
 #define TCG_TARGET_HAS_sextract_i32     0
 #define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   1
 #define TCG_TARGET_HAS_add2_i32         1
 #define TCG_TARGET_HAS_sub2_i32         1
@@ -128,7 +126,6 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_bswap16_i64      0
 #define TCG_TARGET_HAS_bswap32_i64      0
 #define TCG_TARGET_HAS_bswap64_i64      0
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
 #define TCG_TARGET_HAS_andc_i64         1
 #define TCG_TARGET_HAS_orc_i64          1
@@ -142,7 +139,6 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_extract_i64      0
 #define TCG_TARGET_HAS_sextract_i64     0
 #define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
 #define TCG_TARGET_HAS_sub2_i64         1
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index 40a69e6e6e..6c9d9e48db 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -83,4 +83,22 @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
 
 bool tcg_target_has_memory_bswap(MemOp memop);
 
+/*
+ * Locate or create a read-only temporary that is a constant.
+ * This kind of temporary need not be freed, but for convenience
+ * will be silently ignored by tcg_temp_free_*.
+ */
+TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
+
+void tcg_gen_op1(TCGOpcode, TCGArg);
+void tcg_gen_op2(TCGOpcode, TCGArg, TCGArg);
+void tcg_gen_op3(TCGOpcode, TCGArg, TCGArg, TCGArg);
+void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
+void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
+
+void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
+void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
+void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
+
 #endif /* TCG_INTERNAL_H */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index feb2d3686b..bb88943f79 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -561,7 +561,6 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
             tcg_gen_dupi_vec(vece, t_vec, in_c);
         }
         do_dup_store(type, dofs, oprsz, maxsz, t_vec);
-        tcg_temp_free_vec(t_vec);
         return;
     }
 
@@ -1024,11 +1023,10 @@ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                          bool load_dest,
                          void (*fni)(unsigned, TCGv_vec, TCGv_vec))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
         if (load_dest) {
             tcg_gen_ld_vec(t1, tcg_env, dofs + i);
@@ -1036,8 +1034,6 @@ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
         fni(vece, t1, t0);
         tcg_gen_st_vec(t1, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t0);
-    tcg_temp_free_vec(t1);
 }
 
 /* Expand OPSZ bytes worth of two-vector operands and an immediate operand
@@ -1047,11 +1043,10 @@ static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                           int64_t c, bool load_dest,
                           void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
         if (load_dest) {
             tcg_gen_ld_vec(t1, tcg_env, dofs + i);
@@ -1059,8 +1054,6 @@ static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
         fni(vece, t1, t0, c);
         tcg_gen_st_vec(t1, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t0);
-    tcg_temp_free_vec(t1);
 }
 
 static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
@@ -1068,11 +1061,10 @@ static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                           TCGv_vec c, bool scalar_first,
                           void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
         if (scalar_first) {
             fni(vece, t1, c, t0);
@@ -1081,8 +1073,6 @@ static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
         }
         tcg_gen_st_vec(t1, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t0);
-    tcg_temp_free_vec(t1);
 }
 
 /* Expand OPSZ bytes worth of three-operand operations using host vectors.  */
@@ -1091,12 +1081,11 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                          uint32_t tysz, TCGType type, bool load_dest,
                          void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    TCGv_vec t2 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
+        TCGv_vec t2 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
         tcg_gen_ld_vec(t1, tcg_env, bofs + i);
         if (load_dest) {
@@ -1105,9 +1094,6 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
         fni(vece, t2, t0, t1);
         tcg_gen_st_vec(t2, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t2);
-    tcg_temp_free_vec(t1);
-    tcg_temp_free_vec(t0);
 }
 
 /*
@@ -1120,12 +1106,11 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                           void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec,
                                       int64_t))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    TCGv_vec t2 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
+        TCGv_vec t2 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
         tcg_gen_ld_vec(t1, tcg_env, bofs + i);
         if (load_dest) {
@@ -1134,9 +1119,6 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
         fni(vece, t2, t0, t1, c);
         tcg_gen_st_vec(t2, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t0);
-    tcg_temp_free_vec(t1);
-    tcg_temp_free_vec(t2);
 }
 
 /* Expand OPSZ bytes worth of four-operand operations using host vectors.  */
@@ -1146,13 +1128,12 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                          void (*fni)(unsigned, TCGv_vec, TCGv_vec,
                                      TCGv_vec, TCGv_vec))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    TCGv_vec t2 = tcg_temp_new_vec(type);
-    TCGv_vec t3 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
+        TCGv_vec t2 = tcg_temp_new_vec(type);
+        TCGv_vec t3 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t1, tcg_env, aofs + i);
         tcg_gen_ld_vec(t2, tcg_env, bofs + i);
         tcg_gen_ld_vec(t3, tcg_env, cofs + i);
@@ -1162,10 +1143,6 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
             tcg_gen_st_vec(t1, tcg_env, aofs + i);
         }
     }
-    tcg_temp_free_vec(t3);
-    tcg_temp_free_vec(t2);
-    tcg_temp_free_vec(t1);
-    tcg_temp_free_vec(t0);
 }
 
 /*
@@ -1178,23 +1155,18 @@ static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                           void (*fni)(unsigned, TCGv_vec, TCGv_vec,
                                      TCGv_vec, TCGv_vec, int64_t))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    TCGv_vec t2 = tcg_temp_new_vec(type);
-    TCGv_vec t3 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
+        TCGv_vec t2 = tcg_temp_new_vec(type);
+        TCGv_vec t3 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t1, tcg_env, aofs + i);
         tcg_gen_ld_vec(t2, tcg_env, bofs + i);
         tcg_gen_ld_vec(t3, tcg_env, cofs + i);
         fni(vece, t0, t1, t2, t3, c);
         tcg_gen_st_vec(t0, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t3);
-    tcg_temp_free_vec(t2);
-    tcg_temp_free_vec(t1);
-    tcg_temp_free_vec(t0);
 }
 
 /* Expand a vector two-operand operation.  */
@@ -1732,7 +1704,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
             TCGv_vec t_vec = tcg_temp_new_vec(type);
             tcg_gen_dup_mem_vec(vece, t_vec, tcg_env, aofs);
             do_dup_store(type, dofs, oprsz, maxsz, t_vec);
-            tcg_temp_free_vec(t_vec);
         } else if (vece <= MO_32) {
             TCGv_i32 in = tcg_temp_ebb_new_i32();
             switch (vece) {
@@ -1766,7 +1737,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
             for (i = (aofs == dofs) * 16; i < oprsz; i += 16) {
                 tcg_gen_st_vec(in, tcg_env, dofs + i);
             }
-            tcg_temp_free_vec(in);
         } else {
             TCGv_i64 in0 = tcg_temp_ebb_new_i64();
             TCGv_i64 in1 = tcg_temp_ebb_new_i64();
@@ -1796,7 +1766,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
             for (i = (aofs == dofs) * 32; i < oprsz; i += 32) {
                 tcg_gen_st_vec(in, tcg_env, dofs + i);
             }
-            tcg_temp_free_vec(in);
         } else if (TCG_TARGET_HAS_v128) {
             TCGv_vec in0 = tcg_temp_new_vec(TCG_TYPE_V128);
             TCGv_vec in1 = tcg_temp_new_vec(TCG_TYPE_V128);
@@ -1807,8 +1776,6 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
                 tcg_gen_st_vec(in0, tcg_env, dofs + i);
                 tcg_gen_st_vec(in1, tcg_env, dofs + i + 16);
             }
-            tcg_temp_free_vec(in0);
-            tcg_temp_free_vec(in1);
         } else {
             TCGv_i64 in[4];
             int j;
@@ -3136,15 +3103,14 @@ static void expand_2sh_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                            TCGv_i32 shift,
                            void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32))
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
-        fni(vece, t0, t0, shift);
-        tcg_gen_st_vec(t0, tcg_env, dofs + i);
+        fni(vece, t1, t0, shift);
+        tcg_gen_st_vec(t1, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t0);
 }
 
 static void
@@ -3720,18 +3686,16 @@ static void expand_cmp_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
                            uint32_t bofs, uint32_t oprsz, uint32_t tysz,
                            TCGType type, TCGCond cond)
 {
-    TCGv_vec t0 = tcg_temp_new_vec(type);
-    TCGv_vec t1 = tcg_temp_new_vec(type);
-    uint32_t i;
+    for (uint32_t i = 0; i < oprsz; i += tysz) {
+        TCGv_vec t0 = tcg_temp_new_vec(type);
+        TCGv_vec t1 = tcg_temp_new_vec(type);
+        TCGv_vec t2 = tcg_temp_new_vec(type);
 
-    for (i = 0; i < oprsz; i += tysz) {
         tcg_gen_ld_vec(t0, tcg_env, aofs + i);
         tcg_gen_ld_vec(t1, tcg_env, bofs + i);
-        tcg_gen_cmp_vec(cond, vece, t0, t0, t1);
-        tcg_gen_st_vec(t0, tcg_env, dofs + i);
+        tcg_gen_cmp_vec(cond, vece, t2, t0, t1);
+        tcg_gen_st_vec(t2, tcg_env, dofs + i);
     }
-    tcg_temp_free_vec(t1);
-    tcg_temp_free_vec(t0);
 }
 
 void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 828eb9ee46..aa6bc6f57d 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -31,20 +31,26 @@
 #include "tcg-internal.h"
 
 
-void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
+/*
+ * Encourage the compiler to tail-call to a function, rather than inlining.
+ * Minimizes code size across 99 bottles of beer on the wall.
+ */
+#define NI  __attribute__((noinline))
+
+void NI tcg_gen_op1(TCGOpcode opc, TCGArg a1)
 {
     TCGOp *op = tcg_emit_op(opc, 1);
     op->args[0] = a1;
 }
 
-void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
+void NI tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
 {
     TCGOp *op = tcg_emit_op(opc, 2);
     op->args[0] = a1;
     op->args[1] = a2;
 }
 
-void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
+void NI tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
 {
     TCGOp *op = tcg_emit_op(opc, 3);
     op->args[0] = a1;
@@ -52,7 +58,7 @@ void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
     op->args[2] = a3;
 }
 
-void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
+void NI tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
 {
     TCGOp *op = tcg_emit_op(opc, 4);
     op->args[0] = a1;
@@ -61,8 +67,8 @@ void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
     op->args[3] = a4;
 }
 
-void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
-                 TCGArg a4, TCGArg a5)
+void NI tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+                     TCGArg a4, TCGArg a5)
 {
     TCGOp *op = tcg_emit_op(opc, 5);
     op->args[0] = a1;
@@ -72,8 +78,8 @@ void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
     op->args[4] = a5;
 }
 
-void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
-                 TCGArg a4, TCGArg a5, TCGArg a6)
+void NI tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+                     TCGArg a4, TCGArg a5, TCGArg a6)
 {
     TCGOp *op = tcg_emit_op(opc, 6);
     op->args[0] = a1;
@@ -84,8 +90,195 @@ void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
     op->args[5] = a6;
 }
 
+/*
+ * With CONFIG_DEBUG_TCG, tcgv_*_tmp via tcgv_*_arg, is an out-of-line
+ * assertion check.  Force tail calls to avoid too much code expansion.
+ */
+#ifdef CONFIG_DEBUG_TCG
+# define DNI NI
+#else
+# define DNI
+#endif
+
+static void DNI tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
+{
+    tcg_gen_op1(opc, tcgv_i32_arg(a1));
+}
+
+static void DNI tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
+{
+    tcg_gen_op1(opc, tcgv_i64_arg(a1));
+}
+
+static void DNI tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
+{
+    tcg_gen_op1(opc, a1);
+}
+
+static void DNI tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
+{
+    tcg_gen_op2(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2));
+}
+
+static void DNI tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2)
+{
+    tcg_gen_op2(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2));
+}
+
+static void DNI tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1,
+                                TCGv_i32 a2, TCGv_i32 a3)
+{
+    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), tcgv_i32_arg(a3));
+}
+
+static void DNI tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1,
+                                TCGv_i64 a2, TCGv_i64 a3)
+{
+    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), tcgv_i64_arg(a3));
+}
+
+static void DNI tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1,
+                                 TCGv_i32 a2, TCGArg a3)
+{
+    tcg_gen_op3(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3);
+}
+
+static void DNI tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1,
+                                 TCGv_i64 a2, TCGArg a3)
+{
+    tcg_gen_op3(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3);
+}
+
+static void DNI tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
+                                    TCGv_ptr base, TCGArg offset)
+{
+    tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset);
+}
+
+static void DNI tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
+                                    TCGv_ptr base, TCGArg offset)
+{
+    tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_ptr_arg(base), offset);
+}
+
+static void DNI tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                TCGv_i32 a3, TCGv_i32 a4)
+{
+    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), tcgv_i32_arg(a4));
+}
+
+static void DNI tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                TCGv_i64 a3, TCGv_i64 a4)
+{
+    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                tcgv_i64_arg(a3), tcgv_i64_arg(a4));
+}
+
+static void DNI tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                 TCGv_i32 a3, TCGArg a4)
+{
+    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), a4);
+}
+
+static void DNI tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                 TCGv_i64 a3, TCGArg a4)
+{
+    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                tcgv_i64_arg(a3), a4);
+}
+
+static void DNI tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                  TCGArg a3, TCGArg a4)
+{
+    tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
+}
+
+static void DNI tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                  TCGArg a3, TCGArg a4)
+{
+    tcg_gen_op4(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2), a3, a4);
+}
+
+static void DNI tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5)
+{
+    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5));
+}
+
+static void DNI tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5)
+{
+    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5));
+}
+
+static void DNI tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                  TCGv_i32 a3, TCGArg a4, TCGArg a5)
+{
+    tcg_gen_op5(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), a4, a5);
+}
+
+static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                  TCGv_i64 a3, TCGArg a4, TCGArg a5)
+{
+    tcg_gen_op5(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                tcgv_i64_arg(a3), a4, a5);
+}
+
+static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                TCGv_i32 a3, TCGv_i32 a4,
+                                TCGv_i32 a5, TCGv_i32 a6)
+{
+    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5),
+                tcgv_i32_arg(a6));
+}
+
+static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                TCGv_i64 a3, TCGv_i64 a4,
+                                TCGv_i64 a5, TCGv_i64 a6)
+{
+    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5),
+                tcgv_i64_arg(a6));
+}
+
+static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                 TCGv_i32 a3, TCGv_i32 a4,
+                                 TCGv_i32 a5, TCGArg a6)
+{
+    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), a6);
+}
+
+static void DNI tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+                                 TCGv_i64 a3, TCGv_i64 a4,
+                                 TCGv_i64 a5, TCGArg a6)
+{
+    tcg_gen_op6(opc, tcgv_i64_arg(a1), tcgv_i64_arg(a2),
+                tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), a6);
+}
+
+static void DNI tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+                                  TCGv_i32 a3, TCGv_i32 a4,
+                                  TCGArg a5, TCGArg a6)
+{
+    tcg_gen_op6(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2),
+                tcgv_i32_arg(a3), tcgv_i32_arg(a4), a5, a6);
+}
+
 /* Generic ops.  */
 
+void gen_set_label(TCGLabel *l)
+{
+    l->present = 1;
+    tcg_gen_op1(INDEX_op_set_label, label_arg(l));
+}
+
 static void add_last_as_label_use(TCGLabel *l)
 {
     TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
@@ -119,13 +312,40 @@ void tcg_gen_mb(TCGBar mb_type)
     }
 }
 
+void tcg_gen_plugin_cb_start(unsigned from, unsigned type, unsigned wr)
+{
+    tcg_gen_op3(INDEX_op_plugin_cb_start, from, type, wr);
+}
+
+void tcg_gen_plugin_cb_end(void)
+{
+    tcg_emit_op(INDEX_op_plugin_cb_end, 0);
+}
+
 /* 32 bit ops */
 
+void tcg_gen_discard_i32(TCGv_i32 arg)
+{
+    tcg_gen_op1_i32(INDEX_op_discard, arg);
+}
+
+void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (ret != arg) {
+        tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg);
+    }
+}
+
 void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
 {
     tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
 }
 
+void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     /* some cases can be optimized here */
@@ -136,11 +356,15 @@ void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     }
 }
 
+void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
 {
-    if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
-        /* Don't recurse with tcg_gen_neg_i32.  */
-        tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
+    if (arg1 == 0) {
+        tcg_gen_neg_i32(ret, arg2);
     } else {
         tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
     }
@@ -148,12 +372,17 @@ void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
 
 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    /* some cases can be optimized here */
-    if (arg2 == 0) {
-        tcg_gen_mov_i32(ret, arg1);
-    } else {
-        tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
-    }
+    tcg_gen_addi_i32(ret, arg1, -arg2);
+}
+
+void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg);
+}
+
+void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2);
 }
 
 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
@@ -184,6 +413,11 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
 }
 
+void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     /* Some cases can be optimized here.  */
@@ -196,6 +430,11 @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     }
 }
 
+void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     /* Some cases can be optimized here.  */
@@ -209,6 +448,20 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     }
 }
 
+void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_not_i32) {
+        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg);
+    } else {
+        tcg_gen_xori_i32(ret, arg, -1);
+    }
+}
+
+void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
@@ -219,6 +472,11 @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     }
 }
 
+void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
@@ -229,6 +487,11 @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
     }
 }
 
+void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
@@ -297,6 +560,11 @@ void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret,
     tcg_gen_negsetcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
 }
 
+void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2);
+}
+
 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
     if (arg2 == 0) {
@@ -864,17 +1132,8 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
         tcg_gen_mov_i32(ret, v1);
     } else if (cond == TCG_COND_NEVER) {
         tcg_gen_mov_i32(ret, v2);
-    } else if (TCG_TARGET_HAS_movcond_i32) {
-        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
     } else {
-        TCGv_i32 t0 = tcg_temp_ebb_new_i32();
-        TCGv_i32 t1 = tcg_temp_ebb_new_i32();
-        tcg_gen_negsetcond_i32(cond, t0, c1, c2);
-        tcg_gen_and_i32(t1, v1, t0);
-        tcg_gen_andc_i32(ret, v2, t0);
-        tcg_gen_or_i32(ret, ret, t1);
-        tcg_temp_free_i32(t0);
-        tcg_temp_free_i32(t1);
+        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
     }
 }
 
@@ -1155,154 +1414,281 @@ void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
     tcg_temp_free_i32(t);
 }
 
-/* 64-bit ops */
+void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset);
+}
+
+void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset);
+}
+
+void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset);
+}
+
+void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset);
+}
+
+void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset);
+}
+
+void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset);
+}
+
+void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset);
+}
 
-#if TCG_TARGET_REG_BITS == 32
-/* These are all inline for TCG_TARGET_REG_BITS == 64.  */
+void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset);
+}
+
+
+/* 64-bit ops */
 
 void tcg_gen_discard_i64(TCGv_i64 arg)
 {
-    tcg_gen_discard_i32(TCGV_LOW(arg));
-    tcg_gen_discard_i32(TCGV_HIGH(arg));
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op1_i64(INDEX_op_discard, arg);
+    } else {
+        tcg_gen_discard_i32(TCGV_LOW(arg));
+        tcg_gen_discard_i32(TCGV_HIGH(arg));
+    }
 }
 
 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
-    TCGTemp *ts = tcgv_i64_temp(arg);
-
-    /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
-    if (ts->kind == TEMP_CONST) {
-        tcg_gen_movi_i64(ret, ts->val);
+    if (ret == arg) {
+        return;
+    }
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg);
     } else {
-        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
-        tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+        TCGTemp *ts = tcgv_i64_temp(arg);
+
+        /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
+        if (ts->kind == TEMP_CONST) {
+            tcg_gen_movi_i64(ret, ts->val);
+        } else {
+            tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+            tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+        }
     }
 }
 
 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
 {
-    tcg_gen_movi_i32(TCGV_LOW(ret), arg);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
+    } else {
+        tcg_gen_movi_i32(TCGV_LOW(ret), arg);
+        tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
+    }
 }
 
 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset);
+    } else {
+        tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    }
 }
 
 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset);
+    } else {
+        tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    }
 }
 
 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset);
+    } else {
+        tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    }
 }
 
 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset);
+    } else {
+        tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    }
 }
 
 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset);
+    } else {
+        tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    }
 }
 
 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset);
+    } else {
+        tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    }
 }
 
 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
-    /* Since arg2 and ret have different types,
-       they cannot be the same temporary */
-#if HOST_BIG_ENDIAN
-    tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
-    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
-#else
-    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
-#endif
+    /*
+     * For 32-bit host, since arg2 and ret have different types,
+     * they cannot be the same temporary -- no chance of overlap.
+     */
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset);
+    } else if (HOST_BIG_ENDIAN) {
+        tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
+        tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
+    } else {
+        tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+        tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
+    }
 }
 
 void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset);
+    } else {
+        tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
+    }
 }
 
 void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset);
+    } else {
+        tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
+    }
 }
 
 void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
 {
-    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset);
+    } else {
+        tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+    }
 }
 
 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
 {
-#if HOST_BIG_ENDIAN
-    tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
-    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
-#else
-    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
-    tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
-#endif
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset);
+    } else if (HOST_BIG_ENDIAN) {
+        tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
+        tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
+    } else {
+        tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+        tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
+    }
 }
 
 void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
-                     TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
+                         TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
+    }
 }
 
 void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
-                     TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
+                         TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
+    }
 }
 
 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
-    tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    }
 }
 
 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
-    tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    }
 }
 
 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
-    tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    }
 }
 
 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    gen_helper_shl_i64(ret, arg1, arg2);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2);
+    } else {
+        gen_helper_shl_i64(ret, arg1, arg2);
+    }
 }
 
 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    gen_helper_shr_i64(ret, arg1, arg2);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2);
+    } else {
+        gen_helper_shr_i64(ret, arg1, arg2);
+    }
 }
 
 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    gen_helper_sar_i64(ret, arg1, arg2);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2);
+    } else {
+        gen_helper_sar_i64(ret, arg1, arg2);
+    }
 }
 
 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1310,6 +1696,12 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     TCGv_i64 t0;
     TCGv_i32 t1;
 
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2);
+        return;
+    }
+
+
     t0 = tcg_temp_ebb_new_i64();
     t1 = tcg_temp_ebb_new_i32();
 
@@ -1326,15 +1718,6 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
     tcg_temp_free_i32(t1);
 }
 
-#else
-
-void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
-{
-    tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
-}
-
-#endif /* TCG_TARGET_REG_SIZE == 32 */
-
 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
     /* some cases can be optimized here */
@@ -1351,9 +1734,8 @@ void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 
 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
 {
-    if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
-        /* Don't recurse with tcg_gen_neg_i64.  */
-        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
+    if (arg1 == 0) {
+        tcg_gen_neg_i64(ret, arg2);
     } else if (TCG_TARGET_REG_BITS == 64) {
         tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
     } else {
@@ -1365,15 +1747,17 @@ void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
 
 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    /* some cases can be optimized here */
-    if (arg2 == 0) {
-        tcg_gen_mov_i64(ret, arg1);
-    } else if (TCG_TARGET_REG_BITS == 64) {
-        tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
+    tcg_gen_addi_i64(ret, arg1, -arg2);
+}
+
+void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg);
     } else {
+        TCGv_i32 zero = tcg_constant_i32(0);
         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
-                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
-                         tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+                         zero, zero, TCGV_LOW(arg), TCGV_HIGH(arg));
     }
 }
 
@@ -2600,43 +2984,22 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
         tcg_gen_mov_i64(ret, v1);
     } else if (cond == TCG_COND_NEVER) {
         tcg_gen_mov_i64(ret, v2);
-    } else if (TCG_TARGET_REG_BITS == 32) {
+    } else if (TCG_TARGET_REG_BITS == 64) {
+        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+    } else {
         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
-        TCGv_i32 t1 = tcg_temp_ebb_new_i32();
+        TCGv_i32 zero = tcg_constant_i32(0);
+
         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
                          TCGV_LOW(c1), TCGV_HIGH(c1),
                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
 
-        if (TCG_TARGET_HAS_movcond_i32) {
-            tcg_gen_movi_i32(t1, 0);
-            tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
-                                TCGV_LOW(v1), TCGV_LOW(v2));
-            tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
-                                TCGV_HIGH(v1), TCGV_HIGH(v2));
-        } else {
-            tcg_gen_neg_i32(t0, t0);
-
-            tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
-            tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
-            tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
+                            TCGV_LOW(v1), TCGV_LOW(v2));
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
+                            TCGV_HIGH(v1), TCGV_HIGH(v2));
 
-            tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
-            tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
-            tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
-        }
         tcg_temp_free_i32(t0);
-        tcg_temp_free_i32(t1);
-    } else if (TCG_TARGET_HAS_movcond_i64) {
-        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
-    } else {
-        TCGv_i64 t0 = tcg_temp_ebb_new_i64();
-        TCGv_i64 t1 = tcg_temp_ebb_new_i64();
-        tcg_gen_negsetcond_i64(cond, t0, c1, c2);
-        tcg_gen_and_i64(t1, v1, t0);
-        tcg_gen_andc_i64(ret, v2, t0);
-        tcg_gen_or_i64(ret, ret, t1);
-        tcg_temp_free_i64(t0);
-        tcg_temp_free_i64(t1);
     }
 }
 
@@ -2872,6 +3235,11 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
     tcg_gen_shri_i64(hi, arg, 32);
 }
 
+void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
+{
+    tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
+}
+
 void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg)
 {
     tcg_gen_mov_i64(lo, TCGV128_LOW(arg));
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 35158a0846..d2ea22b397 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -653,6 +653,7 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
 
 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
+#define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
 
 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
@@ -675,6 +676,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 #undef C_O1_I3
 #undef C_O1_I4
 #undef C_N1_I2
+#undef C_N2_I1
 #undef C_O2_I1
 #undef C_O2_I2
 #undef C_O2_I3
@@ -694,6 +696,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
 
 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
+#define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
 
 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
@@ -715,6 +718,7 @@ static const TCGTargetOpDef constraint_sets[] = {
 #undef C_O1_I3
 #undef C_O1_I4
 #undef C_N1_I2
+#undef C_N2_I1
 #undef C_O2_I1
 #undef C_O2_I2
 #undef C_O2_I3
@@ -734,6 +738,7 @@ static const TCGTargetOpDef constraint_sets[] = {
 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
 
 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
+#define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
 
 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
@@ -1567,8 +1572,8 @@ void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 }
 
-TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
-                                     intptr_t offset, const char *name)
+static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
+                                            const char *name, TCGType type)
 {
     TCGContext *s = tcg_ctx;
     TCGTemp *base_ts = tcgv_ptr_temp(base);
@@ -1627,7 +1632,25 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
     return ts;
 }
 
-TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
+TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
+{
+    TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
+    return temp_tcgv_i32(ts);
+}
+
+TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
+{
+    TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
+    return temp_tcgv_i64(ts);
+}
+
+TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
+{
+    TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
+    return temp_tcgv_ptr(ts);
+}
+
+static TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
 {
     TCGContext *s = tcg_ctx;
     TCGTemp *ts;
@@ -1691,6 +1714,46 @@ TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
     return ts;
 }
 
+TCGv_i32 tcg_temp_new_i32(void)
+{
+    return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
+}
+
+TCGv_i32 tcg_temp_ebb_new_i32(void)
+{
+    return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
+}
+
+TCGv_i64 tcg_temp_new_i64(void)
+{
+    return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
+}
+
+TCGv_i64 tcg_temp_ebb_new_i64(void)
+{
+    return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
+}
+
+TCGv_ptr tcg_temp_new_ptr(void)
+{
+    return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
+}
+
+TCGv_ptr tcg_temp_ebb_new_ptr(void)
+{
+    return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
+}
+
+TCGv_i128 tcg_temp_new_i128(void)
+{
+    return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
+}
+
+TCGv_i128 tcg_temp_ebb_new_i128(void)
+{
+    return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
+}
+
 TCGv_vec tcg_temp_new_vec(TCGType type)
 {
     TCGTemp *t;
@@ -1746,6 +1809,31 @@ void tcg_temp_free_internal(TCGTemp *ts)
     }
 }
 
+void tcg_temp_free_i32(TCGv_i32 arg)
+{
+    tcg_temp_free_internal(tcgv_i32_temp(arg));
+}
+
+void tcg_temp_free_i64(TCGv_i64 arg)
+{
+    tcg_temp_free_internal(tcgv_i64_temp(arg));
+}
+
+void tcg_temp_free_i128(TCGv_i128 arg)
+{
+    tcg_temp_free_internal(tcgv_i128_temp(arg));
+}
+
+void tcg_temp_free_ptr(TCGv_ptr arg)
+{
+    tcg_temp_free_internal(tcgv_ptr_temp(arg));
+}
+
+void tcg_temp_free_vec(TCGv_vec arg)
+{
+    tcg_temp_free_internal(tcgv_vec_temp(arg));
+}
+
 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
 {
     TCGContext *s = tcg_ctx;
@@ -1801,6 +1889,21 @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
     return ts;
 }
 
+TCGv_i32 tcg_constant_i32(int32_t val)
+{
+    return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
+}
+
+TCGv_i64 tcg_constant_i64(int64_t val)
+{
+    return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
+}
+
+TCGv_ptr tcg_constant_ptr_int(intptr_t val)
+{
+    return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
+}
+
 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
 {
     val = dup_const(vece, val);
@@ -1874,6 +1977,7 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_mov_i32:
     case INDEX_op_setcond_i32:
     case INDEX_op_brcond_i32:
+    case INDEX_op_movcond_i32:
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8s_i32:
     case INDEX_op_ld16u_i32:
@@ -1884,6 +1988,7 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_st_i32:
     case INDEX_op_add_i32:
     case INDEX_op_sub_i32:
+    case INDEX_op_neg_i32:
     case INDEX_op_mul_i32:
     case INDEX_op_and_i32:
     case INDEX_op_or_i32:
@@ -1895,8 +2000,6 @@ bool tcg_op_supported(TCGOpcode op)
 
     case INDEX_op_negsetcond_i32:
         return TCG_TARGET_HAS_negsetcond_i32;
-    case INDEX_op_movcond_i32:
-        return TCG_TARGET_HAS_movcond_i32;
     case INDEX_op_div_i32:
     case INDEX_op_divu_i32:
         return TCG_TARGET_HAS_div_i32;
@@ -1943,8 +2046,6 @@ bool tcg_op_supported(TCGOpcode op)
         return TCG_TARGET_HAS_bswap32_i32;
     case INDEX_op_not_i32:
         return TCG_TARGET_HAS_not_i32;
-    case INDEX_op_neg_i32:
-        return TCG_TARGET_HAS_neg_i32;
     case INDEX_op_andc_i32:
         return TCG_TARGET_HAS_andc_i32;
     case INDEX_op_orc_i32:
@@ -1969,6 +2070,7 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_mov_i64:
     case INDEX_op_setcond_i64:
     case INDEX_op_brcond_i64:
+    case INDEX_op_movcond_i64:
     case INDEX_op_ld8u_i64:
     case INDEX_op_ld8s_i64:
     case INDEX_op_ld16u_i64:
@@ -1982,6 +2084,7 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_st_i64:
     case INDEX_op_add_i64:
     case INDEX_op_sub_i64:
+    case INDEX_op_neg_i64:
     case INDEX_op_mul_i64:
     case INDEX_op_and_i64:
     case INDEX_op_or_i64:
@@ -1995,8 +2098,6 @@ bool tcg_op_supported(TCGOpcode op)
 
     case INDEX_op_negsetcond_i64:
         return TCG_TARGET_HAS_negsetcond_i64;
-    case INDEX_op_movcond_i64:
-        return TCG_TARGET_HAS_movcond_i64;
     case INDEX_op_div_i64:
     case INDEX_op_divu_i64:
         return TCG_TARGET_HAS_div_i64;
@@ -2040,8 +2141,6 @@ bool tcg_op_supported(TCGOpcode op)
         return TCG_TARGET_HAS_bswap64_i64;
     case INDEX_op_not_i64:
         return TCG_TARGET_HAS_not_i64;
-    case INDEX_op_neg_i64:
-        return TCG_TARGET_HAS_neg_i64;
     case INDEX_op_andc_i64:
         return TCG_TARGET_HAS_andc_i64;
     case INDEX_op_orc_i64:
@@ -5927,11 +6026,6 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
 }
 
-void tcg_dump_op_count(GString *buf)
-{
-    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
-}
-
 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
 {
     int i, start_words, num_insns;
@@ -6128,11 +6222,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
     return tcg_current_code_size(s);
 }
 
-void tcg_dump_info(GString *buf)
-{
-    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
-}
-
 #ifdef ELF_HOST_MACHINE
 /* In order to use this feature, the backend needs to do three things:
 
diff --git a/tcg/tci.c b/tcg/tci.c
index 4640902c88..3cc851b7bd 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -733,12 +733,10 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             regs[r0] = ~regs[r1];
             break;
 #endif
-#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
         CASE_32_64(neg)
             tci_args_rr(insn, &r0, &r1);
             regs[r0] = -regs[r1];
             break;
-#endif
 #if TCG_TARGET_REG_BITS == 64
             /* Load/store operations (64 bit). */
 
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 91ca33b616..2a13816c8e 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -65,11 +65,9 @@
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_ctpop_i32        1
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_movcond_i32      1
 #define TCG_TARGET_HAS_negsetcond_i32   0
 #define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
@@ -100,11 +98,9 @@
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
 #define TCG_TARGET_HAS_ctpop_i64        1
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
 #define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_negsetcond_i64   0
 #define TCG_TARGET_HAS_muls2_i64        1
 #define TCG_TARGET_HAS_add2_i32         1
diff --git a/tests/data/acpi/virt/DBG2 b/tests/data/acpi/virt/DBG2
index 86e6314f7b..0a05e1a47f 100644
--- a/tests/data/acpi/virt/DBG2
+++ b/tests/data/acpi/virt/DBG2
diff --git a/tests/data/acpi/virt/SPCR b/tests/data/acpi/virt/SPCR
index 24e0a579e7..cf0f2b7522 100644
--- a/tests/data/acpi/virt/SPCR
+++ b/tests/data/acpi/virt/SPCR
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index 73a670e8fa..2c15f60958 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -45,6 +45,7 @@ static int query_error_class(const char *cmd)
         { "query-acpi-ospm-status", ERROR_CLASS_GENERIC_ERROR },
         { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
         { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
+        { "query-hv-balloon-status-report", ERROR_CLASS_GENERIC_ERROR },
         { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
         /* Only valid with a USB bus added */
         { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index f33ae64b8d..e6c51e7a86 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -21,6 +21,7 @@ tests = {
   'test-opts-visitor': [testqapi],
   'test-visitor-serialization': [testqapi],
   'test-bitmap': [],
+  'test-resv-mem': [],
   # all code tested by test-x86-cpuid is inside topology.h
   'test-x86-cpuid': [],
   'test-cutils': [],
diff --git a/tests/unit/test-resv-mem.c b/tests/unit/test-resv-mem.c
new file mode 100644
index 0000000000..5963274e2c
--- /dev/null
+++ b/tests/unit/test-resv-mem.c
@@ -0,0 +1,316 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * reserved-region/range.c unit-tests.
+ *
+ * Copyright (C) 2023, Red Hat, Inc.
+ *
+ * Author: Eric Auger <eric.auger@redhat.com>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "qemu/reserved-region.h"
+
+#define DEBUG 0
+
+#if DEBUG
+static void print_ranges(const char *prefix, GList *ranges)
+{
+    GList *l;
+    int i = 0;
+
+    if (!g_list_length(ranges)) {
+        printf("%s is void\n", prefix);
+        return;
+    }
+    for (l = ranges; l; l = l->next) {
+        Range *r = (Range *)l->data;
+
+        printf("%s rev[%i] = [0x%"PRIx64",0x%"PRIx64"]\n",
+               prefix, i, range_lob(r), range_upb(r));
+        i++;
+    }
+}
+#endif
+
+static void compare_ranges(const char *prefix, GList *ranges,
+                           GList *expected)
+{
+    GList *l, *e;
+
+#if DEBUG
+    print_ranges("out", ranges);
+    print_ranges("expected", expected);
+#endif
+    g_assert_cmpint(g_list_length(ranges), ==, g_list_length(expected));
+    for (l = ranges, e = expected; l ; l = l->next, e = e->next) {
+        Range *r = (Range *)l->data;
+        Range *er = (Range *)e->data;
+
+        g_assert_true(range_lob(r) == range_lob(er) &&
+                      range_upb(r) == range_upb(er));
+    }
+}
+
+static GList *insert_sorted_range(GList *list, uint64_t lob, uint64_t upb)
+{
+    Range *new = g_new0(Range, 1);
+
+    range_set_bounds(new, lob, upb);
+    return range_list_insert(list, new);
+}
+
+static void reset(GList **in, GList **out, GList **expected)
+{
+    g_list_free_full(*in, g_free);
+    g_list_free_full(*out, g_free);
+    g_list_free_full(*expected, g_free);
+    *in = NULL;
+    *out = NULL;
+    *expected = NULL;
+}
+
+static void
+run_range_inverse_array(const char *prefix, GList **in, GList **expected,
+                        uint64_t low, uint64_t high)
+{
+    GList *out = NULL;
+    range_inverse_array(*in, &out, low, high);
+    compare_ranges(prefix, out, *expected);
+    reset(in, &out, expected);
+}
+
+static void check_range_reverse_array(void)
+{
+    GList *in = NULL, *expected = NULL;
+
+    /* test 1 */
+
+    in = insert_sorted_range(in, 0x10000, UINT64_MAX);
+    expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+    run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+    /* test 2 */
+
+    in = insert_sorted_range(in, 0x10000, 0xFFFFFFFFFFFF);
+    expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+    expected = insert_sorted_range(expected, 0x1000000000000, UINT64_MAX);
+    run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+    /* test 3 */
+
+    in = insert_sorted_range(in, 0x0, 0xFFFF);
+    in = insert_sorted_range(in, 0x10000, 0x2FFFF);
+    expected = insert_sorted_range(expected, 0x30000, UINT64_MAX);
+    run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+    /* test 4 */
+
+    in = insert_sorted_range(in, 0x50000, 0x5FFFF);
+    in = insert_sorted_range(in, 0x60000, 0xFFFFFFFFFFFF);
+    expected = insert_sorted_range(expected, 0x0, 0x4FFFF);
+    expected = insert_sorted_range(expected, 0x1000000000000, UINT64_MAX);
+    run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+    /* test 5 */
+
+    in = insert_sorted_range(in, 0x0, UINT64_MAX);
+    run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+    /* test 6 */
+    in = insert_sorted_range(in,  0x10000, 0x1FFFF);
+    in = insert_sorted_range(in,  0x30000, 0x6FFFF);
+    in = insert_sorted_range(in,  0x90000, UINT64_MAX);
+    expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+    expected = insert_sorted_range(expected, 0x20000, 0x2FFFF);
+    expected = insert_sorted_range(expected, 0x70000, 0x8FFFF);
+    run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+}
+
+static void check_range_reverse_array_low_end(void)
+{
+    GList *in = NULL, *expected = NULL;
+
+    /* test 1 */
+    in = insert_sorted_range(in,  0x0, UINT64_MAX);
+    run_range_inverse_array("test1", &in, &expected, 0x10000, 0xFFFFFF);
+
+    /* test 2 */
+
+    in = insert_sorted_range(in,  0x0, 0xFFFF);
+    in = insert_sorted_range(in,  0x20000, 0x2FFFF);
+    expected = insert_sorted_range(expected, 0x40000, 0xFFFFFFFFFFFF);
+    run_range_inverse_array("test2", &in, &expected, 0x40000, 0xFFFFFFFFFFFF);
+
+    /* test 3 */
+    in = insert_sorted_range(in,  0x0, 0xFFFF);
+    in = insert_sorted_range(in,  0x20000, 0x2FFFF);
+    in = insert_sorted_range(in,  0x1000000000000, UINT64_MAX);
+    expected = insert_sorted_range(expected, 0x40000, 0xFFFFFFFFFFFF);
+    run_range_inverse_array("test3", &in, &expected, 0x40000, 0xFFFFFFFFFFFF);
+
+    /* test 4 */
+
+    in = insert_sorted_range(in,  0x0, 0xFFFF);
+    in = insert_sorted_range(in,  0x20000, 0x2FFFF);
+    in = insert_sorted_range(in,  0x1000000000000, UINT64_MAX);
+    expected = insert_sorted_range(expected, 0x30000, 0xFFFFFFFFFFFF);
+    run_range_inverse_array("test4", &in, &expected, 0x20000, 0xFFFFFFFFFFFF);
+
+    /* test 5 */
+
+    in = insert_sorted_range(in,  0x2000, 0xFFFF);
+    in = insert_sorted_range(in,  0x20000, 0x2FFFF);
+    in = insert_sorted_range(in,  0x100000000, 0x1FFFFFFFF);
+    expected = insert_sorted_range(expected, 0x1000, 0x1FFF);
+    expected = insert_sorted_range(expected, 0x10000, 0x1FFFF);
+    expected = insert_sorted_range(expected, 0x30000, 0xFFFFFFFF);
+    expected = insert_sorted_range(expected, 0x200000000, 0xFFFFFFFFFFFF);
+    run_range_inverse_array("test5", &in, &expected, 0x1000, 0xFFFFFFFFFFFF);
+
+    /* test 6 */
+
+    in = insert_sorted_range(in,  0x10000000 , 0x1FFFFFFF);
+    in = insert_sorted_range(in,  0x100000000, 0x1FFFFFFFF);
+    expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+    run_range_inverse_array("test6", &in, &expected, 0x0, 0xFFFF);
+}
+
+static ReservedRegion *alloc_resv_mem(unsigned type, uint64_t lob, uint64_t upb)
+{
+    ReservedRegion *r;
+
+    r = g_new0(ReservedRegion, 1);
+    r->type = type;
+    range_set_bounds(&r->range, lob, upb);
+    return r;
+}
+
+static void print_resv_region_list(const char *prefix, GList *list,
+                                   uint32_t expected_length)
+{
+    int i = g_list_length(list);
+
+    g_assert_cmpint(i, ==, expected_length);
+#if DEBUG
+    i = 0;
+    for (GList *l = list; l; l = l->next) {
+        ReservedRegion *r = (ReservedRegion *)l->data;
+        Range *range = &r->range;
+
+        printf("%s item[%d]=[0x%x, 0x%"PRIx64", 0x%"PRIx64"]\n",
+               prefix, i++, r->type, range_lob(range), range_upb(range));
+    }
+#endif
+}
+
+static void free_resv_region(gpointer data)
+{
+    ReservedRegion *reg = (ReservedRegion *)data;
+
+    g_free(reg);
+}
+
+static void check_resv_region_list_insert(void)
+{
+    ReservedRegion *r[10];
+    GList *l = NULL;
+
+    r[0] = alloc_resv_mem(0xA, 0, 0xFFFF);
+    r[1] = alloc_resv_mem(0xA, 0x20000, 0x2FFFF);
+    l = resv_region_list_insert(l, r[0]);
+    l = resv_region_list_insert(l, r[1]);
+    print_resv_region_list("test1", l, 2);
+
+    /* adjacent on left */
+    r[2] = alloc_resv_mem(0xB, 0x0, 0xFFF);
+    l = resv_region_list_insert(l, r[2]);
+    /* adjacent on right */
+    r[3] = alloc_resv_mem(0xC, 0x21000, 0x2FFFF);
+    l = resv_region_list_insert(l, r[3]);
+    print_resv_region_list("test2", l, 4);
+
+    /* exact overlap of D into C*/
+    r[4] = alloc_resv_mem(0xD, 0x21000, 0x2FFFF);
+    l = resv_region_list_insert(l, r[4]);
+    print_resv_region_list("test3", l, 4);
+
+    /* in the middle */
+    r[5] = alloc_resv_mem(0xE, 0x22000, 0x23FFF);
+    l = resv_region_list_insert(l, r[5]);
+    print_resv_region_list("test4", l, 6);
+
+    /* overwrites several existing ones */
+    r[6] = alloc_resv_mem(0xF, 0x10000, 0x2FFFF);
+    l = resv_region_list_insert(l, r[6]);
+    print_resv_region_list("test5", l, 3);
+
+    /* contiguous at the end */
+    r[7] = alloc_resv_mem(0x0, 0x30000, 0x40000);
+    l = resv_region_list_insert(l, r[7]);
+    print_resv_region_list("test6", l, 4);
+
+    g_list_free_full(l, free_resv_region);
+    l = NULL;
+
+    r[0] = alloc_resv_mem(0x0, 0x10000, 0x1FFFF);
+    l = resv_region_list_insert(l, r[0]);
+    /* insertion before the 1st item */
+    r[1] = alloc_resv_mem(0x1, 0x0, 0xFF);
+    l = resv_region_list_insert(l, r[1]);
+    print_resv_region_list("test8", l, 2);
+
+    /* collision on the left side */
+    r[2] = alloc_resv_mem(0xA, 0x1200, 0x11FFF);
+    l = resv_region_list_insert(l, r[2]);
+    print_resv_region_list("test9", l, 3);
+
+    /* collision on the right side */
+    r[3] = alloc_resv_mem(0xA, 0x1F000, 0x2FFFF);
+    l = resv_region_list_insert(l, r[3]);
+    print_resv_region_list("test10", l, 4);
+
+    /* override everything */
+    r[4] = alloc_resv_mem(0xF, 0x0, UINT64_MAX);
+    l = resv_region_list_insert(l, r[4]);
+    print_resv_region_list("test11", l, 1);
+
+    g_list_free_full(l, free_resv_region);
+    l = NULL;
+
+    r[0] = alloc_resv_mem(0xF, 0x1000000000000, UINT64_MAX);
+    l = resv_region_list_insert(l, r[0]);
+    print_resv_region_list("test12", l, 1);
+
+    r[1] = alloc_resv_mem(0xA, 0x0, 0xFFFFFFF);
+    l = resv_region_list_insert(l, r[1]);
+    print_resv_region_list("test12", l, 2);
+
+    r[2] = alloc_resv_mem(0xB, 0x100000000, 0x1FFFFFFFF);
+    l = resv_region_list_insert(l, r[2]);
+    print_resv_region_list("test12", l, 3);
+
+    r[3] = alloc_resv_mem(0x0, 0x010000000, 0x2FFFFFFFF);
+    l = resv_region_list_insert(l, r[3]);
+    print_resv_region_list("test12", l, 3);
+
+    g_list_free_full(l, free_resv_region);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/resv-mem/range_reverse_array",
+                    check_range_reverse_array);
+    g_test_add_func("/resv-mem/range_reverse_array_low_end",
+                    check_range_reverse_array_low_end);
+    g_test_add_func("/resv-mem/resv_region_list_insert",
+                    check_resv_region_list_insert);
+
+    g_test_run();
+
+    return 0;
+}
diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
index aedc125ae9..739b91583c 100644
--- a/tests/unit/test-uuid.c
+++ b/tests/unit/test-uuid.c
@@ -145,7 +145,7 @@ static void test_uuid_unparse(void)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(uuid_test_data); i++) {
-        char out[37];
+        char out[UUID_STR_LEN];
 
         if (!uuid_test_data[i].check_unparse) {
             continue;
diff --git a/ui/cocoa.m b/ui/cocoa.m
index d95276013c..cd069da696 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1247,7 +1247,6 @@ static CGEventRef handleTapEvent(CGEventTapProxy proxy, CGEventType type, CGEven
         [normalWindow makeKeyAndOrderFront:self];
         [normalWindow center];
         [normalWindow setDelegate: self];
-        stretch_video = false;
 
         /* Used for displaying pause on the screen */
         pauseLabel = [NSTextField new];
@@ -1671,7 +1670,9 @@ static void create_initial_menus(void)
     // View menu
     menu = [[NSMenu alloc] initWithTitle:@"View"];
     [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"Enter Fullscreen" action:@selector(doToggleFullScreen:) keyEquivalent:@"f"] autorelease]]; // Fullscreen
-    [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"Zoom To Fit" action:@selector(zoomToFit:) keyEquivalent:@""] autorelease]];
+    menuItem = [[[NSMenuItem alloc] initWithTitle:@"Zoom To Fit" action:@selector(zoomToFit:) keyEquivalent:@""] autorelease];
+    [menuItem setState: stretch_video ? NSControlStateValueOn : NSControlStateValueOff];
+    [menu addItem: menuItem];
     menuItem = [[[NSMenuItem alloc] initWithTitle:@"View" action:nil keyEquivalent:@""] autorelease];
     [menuItem setSubmenu:menu];
     [[NSApp mainMenu] addItem:menuItem];
@@ -2041,18 +2042,6 @@ static void cocoa_display_init(DisplayState *ds, DisplayOptions *opts)
 
     [QemuApplication sharedApplication];
 
-    create_initial_menus();
-
-    /*
-     * Create the menu entries which depend on QEMU state (for consoles
-     * and removable devices). These make calls back into QEMU functions,
-     * which is OK because at this point we know that the second thread
-     * holds the iothread lock and is synchronously waiting for us to
-     * finish.
-     */
-    add_console_menu_entries();
-    addRemovableDevicesMenuItems();
-
     // Create an Application controller
     QemuCocoaAppController *controller = [[QemuCocoaAppController alloc] init];
     [NSApp setDelegate:controller];
@@ -2077,6 +2066,21 @@ static void cocoa_display_init(DisplayState *ds, DisplayOptions *opts)
         left_command_key_enabled = 0;
     }
 
+    if (opts->u.cocoa.has_zoom_to_fit && opts->u.cocoa.zoom_to_fit) {
+        stretch_video = true;
+    }
+
+    create_initial_menus();
+    /*
+     * Create the menu entries which depend on QEMU state (for consoles
+     * and removable devices). These make calls back into QEMU functions,
+     * which is OK because at this point we know that the second thread
+     * holds the iothread lock and is synchronously waiting for us to
+     * finish.
+     */
+    add_console_menu_entries();
+    addRemovableDevicesMenuItems();
+
     // register vga output callbacks
     register_displaychangelistener(&dcl);
 
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index a1060fd80f..cd2f176502 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -69,15 +69,16 @@ void gd_egl_draw(VirtualConsole *vc)
 #ifdef CONFIG_GBM
     QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
 #endif
-    int ww, wh;
+    int ww, wh, ws;
 
     if (!vc->gfx.gls) {
         return;
     }
 
     window = gtk_widget_get_window(vc->gfx.drawing_area);
-    ww = gdk_window_get_width(window);
-    wh = gdk_window_get_height(window);
+    ws = gdk_window_get_scale_factor(window);
+    ww = gdk_window_get_width(window) * ws;
+    wh = gdk_window_get_height(window) * ws;
 
     if (vc->gfx.scanout_mode) {
 #ifdef CONFIG_GBM
@@ -243,12 +244,19 @@ void gd_egl_scanout_texture(DisplayChangeListener *dcl,
     vc->gfx.h = h;
     vc->gfx.y0_top = backing_y_0_top;
 
-    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
-                   vc->gfx.esurface, vc->gfx.ectx);
+    if (!vc->gfx.esurface) {
+        gd_egl_init(vc);
+        if (!vc->gfx.esurface) {
+            return;
+        }
+
+        eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                       vc->gfx.esurface, vc->gfx.ectx);
 
-    gtk_egl_set_scanout_mode(vc, true);
-    egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height,
-                         backing_id, false);
+        gtk_egl_set_scanout_mode(vc, true);
+        egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height,
+                             backing_id, false);
+    }
 }
 
 void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
@@ -312,7 +320,7 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
     GdkWindow *window;
-    int ww, wh;
+    int ww, wh, ws;
 
     if (!vc->gfx.scanout_mode) {
         return;
@@ -325,8 +333,9 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
                    vc->gfx.esurface, vc->gfx.ectx);
 
     window = gtk_widget_get_window(vc->gfx.drawing_area);
-    ww = gdk_window_get_width(window);
-    wh = gdk_window_get_height(window);
+    ws = gdk_window_get_scale_factor(window);
+    ww = gdk_window_get_width(window) * ws;
+    wh = gdk_window_get_height(window) * ws;
     egl_fb_setup_default(&vc->gfx.win_fb, ww, wh);
     if (vc->gfx.cursor_fb.texture) {
         egl_texture_blit(vc->gfx.gls, &vc->gfx.win_fb, &vc->gfx.guest_fb,
diff --git a/ui/gtk.c b/ui/gtk.c
index 935de1209b..be047a41ad 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -1400,7 +1400,7 @@ static void gd_menu_untabify(GtkMenuItem *item, void *opaque)
             eglDestroySurface(qemu_egl_display, vc->gfx.esurface);
             vc->gfx.esurface = NULL;
         }
-        if (vc->gfx.esurface) {
+        if (vc->gfx.ectx) {
             eglDestroyContext(qemu_egl_display, vc->gfx.ectx);
             vc->gfx.ectx = NULL;
         }
@@ -2371,6 +2371,7 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
     GdkDisplay *window_display;
     GtkIconTheme *theme;
     char *dir;
+    int idx;
 
     if (!gtkinit) {
         fprintf(stderr, "gtk initialization failed\n");
@@ -2434,6 +2435,15 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
     gtk_container_add(GTK_CONTAINER(s->window), s->vbox);
 
     gtk_widget_show_all(s->window);
+
+    for (idx = 0;; idx++) {
+        QemuConsole *con = qemu_console_lookup_by_index(idx);
+        if (!con) {
+            break;
+        }
+        gtk_widget_realize(s->vc[idx].gfx.drawing_area);
+    }
+
     if (opts->u.gtk.has_show_menubar &&
         !opts->u.gtk.show_menubar) {
         gtk_widget_hide(s->menu_bar);
diff --git a/util/cpuinfo-loongarch.c b/util/cpuinfo-loongarch.c
new file mode 100644
index 0000000000..08b6d7460c
--- /dev/null
+++ b/util/cpuinfo-loongarch.c
@@ -0,0 +1,35 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for LoongArch.
+ */
+
+#include "qemu/osdep.h"
+#include "host/cpuinfo.h"
+
+#ifdef CONFIG_GETAUXVAL
+# include <sys/auxv.h>
+#else
+# include "elf.h"
+#endif
+#include <asm/hwcap.h>
+
+unsigned cpuinfo;
+
+/* Called both as constructor and (possibly) via other constructors. */
+unsigned __attribute__((constructor)) cpuinfo_init(void)
+{
+    unsigned info = cpuinfo;
+    unsigned long hwcap;
+
+    if (info) {
+        return info;
+    }
+
+    hwcap = qemu_getauxval(AT_HWCAP);
+
+    info = CPUINFO_ALWAYS;
+    info |= (hwcap & HWCAP_LOONGARCH_LSX ? CPUINFO_LSX : 0);
+
+    cpuinfo = info;
+    return info;
+}
diff --git a/util/filemonitor-inotify.c b/util/filemonitor-inotify.c
index 2c45f7f176..2121111f38 100644
--- a/util/filemonitor-inotify.c
+++ b/util/filemonitor-inotify.c
@@ -81,16 +81,25 @@ static void qemu_file_monitor_watch(void *arg)
 
     /* Loop over all events in the buffer */
     while (used < len) {
-        struct inotify_event *ev =
-            (struct inotify_event *)(buf + used);
-        const char *name = ev->len ? ev->name : "";
-        QFileMonitorDir *dir = g_hash_table_lookup(mon->idmap,
-                                                   GINT_TO_POINTER(ev->wd));
-        uint32_t iev = ev->mask &
-            (IN_CREATE | IN_MODIFY | IN_DELETE | IN_IGNORED |
-             IN_MOVED_TO | IN_MOVED_FROM | IN_ATTRIB);
+        const char *name;
+        QFileMonitorDir *dir;
+        uint32_t iev;
         int qev;
         gsize i;
+        struct inotify_event *ev = (struct inotify_event *)(buf + used);
+
+        /*
+         * We trust the kenel to provide valid buffer with complete event
+         * records.
+         */
+        assert(len - used >= sizeof(struct inotify_event));
+        assert(len - used - sizeof(struct inotify_event) >= ev->len);
+
+        name = ev->len ? ev->name : "";
+        dir = g_hash_table_lookup(mon->idmap, GINT_TO_POINTER(ev->wd));
+        iev = ev->mask &
+            (IN_CREATE | IN_MODIFY | IN_DELETE | IN_IGNORED |
+             IN_MOVED_TO | IN_MOVED_FROM | IN_ATTRIB);
 
         used += sizeof(struct inotify_event) + ev->len;
 
diff --git a/util/meson.build b/util/meson.build
index 769b24f2e0..c2322ef6e7 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -52,6 +52,7 @@ util_ss.add(files('qdist.c'))
 util_ss.add(files('qht.c'))
 util_ss.add(files('qsp.c'))
 util_ss.add(files('range.c'))
+util_ss.add(files('reserved-region.c'))
 util_ss.add(files('stats64.c'))
 util_ss.add(files('systemd.c'))
 util_ss.add(files('transactions.c'))
@@ -113,6 +114,8 @@ if cpu == 'aarch64'
   util_ss.add(files('cpuinfo-aarch64.c'))
 elif cpu in ['x86', 'x86_64']
   util_ss.add(files('cpuinfo-i386.c'))
+elif cpu == 'loongarch64'
+  util_ss.add(files('cpuinfo-loongarch.c'))
 elif cpu in ['ppc', 'ppc64']
   util_ss.add(files('cpuinfo-ppc.c'))
 endif
diff --git a/util/range.c b/util/range.c
index 098d9d2dc0..9605ccfcbe 100644
--- a/util/range.c
+++ b/util/range.c
@@ -20,11 +20,7 @@
 #include "qemu/osdep.h"
 #include "qemu/range.h"
 
-/*
- * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
- * Both @a and @b must not be empty.
- */
-static inline int range_compare(Range *a, Range *b)
+int range_compare(Range *a, Range *b)
 {
     assert(!range_is_empty(a) && !range_is_empty(b));
 
@@ -70,3 +66,58 @@ GList *range_list_insert(GList *list, Range *data)
 
     return list;
 }
+
+static inline
+GList *append_new_range(GList *list, uint64_t lob, uint64_t upb)
+{
+    Range *new = g_new0(Range, 1);
+
+    range_set_bounds(new, lob, upb);
+    return g_list_append(list, new);
+}
+
+
+void range_inverse_array(GList *in, GList **rev,
+                         uint64_t low, uint64_t high)
+{
+    Range *r, *rn;
+    GList *l = in, *out = *rev;
+
+    for (l = in; l && range_upb(l->data) < low; l = l->next) {
+        continue;
+    }
+
+    if (!l) {
+        out = append_new_range(out, low, high);
+        goto exit;
+    }
+    r = (Range *)l->data;
+
+    /* first range lob is greater than min, insert a first range */
+    if (range_lob(r) > low) {
+        out = append_new_range(out, low, MIN(range_lob(r) - 1, high));
+    }
+
+    /* insert a range inbetween each original range until we reach high */
+    for (; l->next; l = l->next) {
+        r = (Range *)l->data;
+        rn = (Range *)l->next->data;
+        if (range_lob(r) >= high) {
+            goto exit;
+        }
+        if (range_compare(r, rn)) {
+            out = append_new_range(out, range_upb(r) + 1,
+                                   MIN(range_lob(rn) - 1, high));
+        }
+    }
+
+    /* last range */
+    r = (Range *)l->data;
+
+    /* last range upb is less than max, insert a last range */
+    if (range_upb(r) <  high) {
+        out = append_new_range(out, range_upb(r) + 1, high);
+    }
+exit:
+    *rev = out;
+}
diff --git a/util/reserved-region.c b/util/reserved-region.c
new file mode 100644
index 0000000000..18f83eb4c6
--- /dev/null
+++ b/util/reserved-region.c
@@ -0,0 +1,91 @@
+/*
+ * QEMU ReservedRegion helpers
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+#include "qemu/reserved-region.h"
+
+GList *resv_region_list_insert(GList *list, ReservedRegion *reg)
+{
+    ReservedRegion *resv_iter, *new_reg;
+    Range *r = &reg->range;
+    Range *range_iter;
+    GList *l;
+
+    for (l = list; l ; ) {
+        resv_iter = (ReservedRegion *)l->data;
+        range_iter = &resv_iter->range;
+
+        /* Skip all list elements strictly less than range to add */
+        if (range_compare(range_iter, r) < 0) {
+            l = l->next;
+        } else if (range_compare(range_iter, r) > 0) {
+            return g_list_insert_before(list, l, reg);
+        } else { /* there is an overlap */
+            if (range_contains_range(r, range_iter)) {
+                /* new range contains current item, simply remove this latter */
+                GList *prev = l->prev;
+                g_free(l->data);
+                list = g_list_delete_link(list, l);
+                if (prev) {
+                    l = prev->next;
+                } else {
+                    l = list;
+                }
+            } else if (range_contains_range(range_iter, r)) {
+                /* new region is included in the current region */
+                if (range_lob(range_iter) == range_lob(r)) {
+                    /* adjacent on the left side, derives into 2 regions */
+                    range_set_bounds(range_iter, range_upb(r) + 1,
+                                     range_upb(range_iter));
+                    return g_list_insert_before(list, l, reg);
+                } else if (range_upb(range_iter) == range_upb(r)) {
+                    /* adjacent on the right side, derives into 2 regions */
+                    range_set_bounds(range_iter, range_lob(range_iter),
+                                     range_lob(r) - 1);
+                    l = l->next;
+                } else {
+                    uint64_t lob = range_lob(range_iter);
+                    /*
+                     * the new range is in the middle of an existing one,
+                     * split this latter into 3 regs instead
+                     */
+                    range_set_bounds(range_iter, range_upb(r) + 1,
+                                     range_upb(range_iter));
+                    new_reg = g_new0(ReservedRegion, 1);
+                    new_reg->type = resv_iter->type;
+                    range_set_bounds(&new_reg->range,
+                                     lob, range_lob(r) - 1);
+                    list = g_list_insert_before(list, l, new_reg);
+                    return g_list_insert_before(list, l, reg);
+                }
+            } else if (range_lob(r) < range_lob(range_iter)) {
+                range_set_bounds(range_iter, range_upb(r) + 1,
+                                 range_upb(range_iter));
+                return g_list_insert_before(list, l, reg);
+            } else { /* intersection on the upper range */
+                range_set_bounds(range_iter, range_lob(range_iter),
+                                 range_lob(r) - 1);
+                l = l->next;
+            }
+        } /* overlap */
+    }
+    return g_list_append(list, reg);
+}
+
diff --git a/util/uuid.c b/util/uuid.c
index d71aa79e5e..234619dd5e 100644
--- a/util/uuid.c
+++ b/util/uuid.c
@@ -51,7 +51,7 @@ int qemu_uuid_is_equal(const QemuUUID *lhv, const QemuUUID *rhv)
 void qemu_uuid_unparse(const QemuUUID *uuid, char *out)
 {
     const unsigned char *uu = &uuid->data[0];
-    snprintf(out, UUID_FMT_LEN + 1, UUID_FMT,
+    snprintf(out, UUID_STR_LEN, UUID_FMT,
              uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
              uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
 }