From 808832277af11dafee5a55da2b9e41d019b879ca Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 23 Jun 2016 21:54:28 -0700
Subject: target-sparc: Use overalignment flags for twinx and block asis

This allows us to enforce 16 and 64-byte alignment
without any extra overhead.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Message-Id: <1466744068-6615-1-git-send-email-rth@twiddle.net>
---
 target-sparc/translate.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index e7691e4458..ec4a73accf 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2385,20 +2385,23 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
     case GET_ASI_BLOCK:
         /* Valid for lddfa on aligned registers only.  */
         if (size == 8 && (rd & 7) == 0) {
+            TCGMemOp memop;
             TCGv eight;
             int i;
 
-            gen_check_align(addr, 0x3f);
             gen_address_mask(dc, addr);
 
+            /* The first operation checks required alignment.  */
+            memop = da.memop | MO_ALIGN_64;
             eight = tcg_const_tl(8);
             for (i = 0; ; ++i) {
                 tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr,
-                                    da.mem_idx, da.memop);
+                                    da.mem_idx, memop);
                 if (i == 7) {
                     break;
                 }
                 tcg_gen_add_tl(addr, addr, eight);
+                memop = da.memop;
             }
             tcg_temp_free(eight);
         } else {
@@ -2488,20 +2491,23 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
     case GET_ASI_BLOCK:
         /* Valid for stdfa on aligned registers only.  */
         if (size == 8 && (rd & 7) == 0) {
+            TCGMemOp memop;
             TCGv eight;
             int i;
 
-            gen_check_align(addr, 0x3f);
             gen_address_mask(dc, addr);
 
+            /* The first operation checks required alignment.  */
+            memop = da.memop | MO_ALIGN_64;
             eight = tcg_const_tl(8);
             for (i = 0; ; ++i) {
                 tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr,
-                                    da.mem_idx, da.memop);
+                                    da.mem_idx, memop);
                 if (i == 7) {
                     break;
                 }
                 tcg_gen_add_tl(addr, addr, eight);
+                memop = da.memop;
             }
             tcg_temp_free(eight);
         } else {
@@ -2539,9 +2545,8 @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd)
         return;
 
     case GET_ASI_DTWINX:
-        gen_check_align(addr, 15);
         gen_address_mask(dc, addr);
-        tcg_gen_qemu_ld_i64(hi, addr, da.mem_idx, da.memop);
+        tcg_gen_qemu_ld_i64(hi, addr, da.mem_idx, da.memop | MO_ALIGN_16);
         tcg_gen_addi_tl(addr, addr, 8);
         tcg_gen_qemu_ld_i64(lo, addr, da.mem_idx, da.memop);
         break;
@@ -2594,9 +2599,8 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
         break;
 
     case GET_ASI_DTWINX:
-        gen_check_align(addr, 15);
         gen_address_mask(dc, addr);
-        tcg_gen_qemu_st_i64(hi, addr, da.mem_idx, da.memop);
+        tcg_gen_qemu_st_i64(hi, addr, da.mem_idx, da.memop | MO_ALIGN_16);
         tcg_gen_addi_tl(addr, addr, 8);
         tcg_gen_qemu_st_i64(lo, addr, da.mem_idx, da.memop);
         break;
@@ -5468,7 +5472,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         if (gen_trap_ifnofpu(dc)) {
                             goto jmp_insn;
                         }
-                        gen_check_align(cpu_addr, 7);
                         gen_stf_asi(dc, cpu_addr, insn, 16, QFPREG(rd));
                     }
                     break;
-- 
cgit v1.2.3


From 2f9d35fc4006122bad33f9ae3e2e51d2263e98ee Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 13:12:50 -0700
Subject: target-sparc: Introduce cpu_raise_exception_ra

Several helpers call helper_raise_exception directly, which requires
in turn that their callers have performed save_state.  The new function
allows a TCG return address to be passed in so that we can restore
PC + NPC + flags data from that.

This fixes a bug in the usage of helper_check_align, whose callers had
not been calling save_state.  It fixes another bug in which the divide
helpers used GETPC at a level other than the direct callee from TCG.

This allows the translator to avoid save_state prior to SAVE, RESTORE,
and FLUSHW instructions.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/cpu.h         |  1 +
 target-sparc/helper.c      | 52 +++++++++++++---------------
 target-sparc/ldst_helper.c | 85 +++++++++++++++++++++-------------------------
 target-sparc/translate.c   |  3 --
 target-sparc/win_helper.c  | 37 ++++++++++----------
 5 files changed, 82 insertions(+), 96 deletions(-)

diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 646a103513..3ae3a12e19 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -549,6 +549,7 @@ void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
                                                  MMUAccessType access_type,
                                                  int mmu_idx,
                                                  uintptr_t retaddr);
+void cpu_raise_exception_ra(CPUSPARCState *, int, uintptr_t) QEMU_NORETURN;
 
 #ifndef NO_CPU_IO_DEFS
 /* cpu_init.c */
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index bedc6722a1..359b0b15ed 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -24,6 +24,14 @@
 #include "exec/helper-proto.h"
 #include "sysemu/sysemu.h"
 
+void cpu_raise_exception_ra(CPUSPARCState *env, int tt, uintptr_t ra)
+{
+    CPUState *cs = CPU(sparc_env_get_cpu(env));
+
+    cs->exception_index = tt;
+    cpu_loop_exit_restore(cs, ra);
+}
+
 void helper_raise_exception(CPUSPARCState *env, int tt)
 {
     CPUState *cs = CPU(sparc_env_get_cpu(env));
@@ -59,7 +67,7 @@ uint64_t helper_tick_get_count(CPUSPARCState *env, void *opaque, int mem_idx)
     CPUTimer *timer = opaque;
 
     if (timer->npt && mem_idx < MMU_KERNEL_IDX) {
-        helper_raise_exception(env, TT_PRIV_INSN);
+        cpu_raise_exception_ra(env, TT_PRIV_INSN, GETPC());
     }
 
     return cpu_tick_get_count(timer);
@@ -76,10 +84,9 @@ void helper_tick_set_limit(void *opaque, uint64_t limit)
 }
 #endif
 
-static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
-                                       target_ulong b, int cc)
+static target_ulong do_udiv(CPUSPARCState *env, target_ulong a,
+                            target_ulong b, int cc, uintptr_t ra)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     int overflow = 0;
     uint64_t x0;
     uint32_t x1;
@@ -88,8 +95,7 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, ra);
     }
 
     x0 = x0 / x1;
@@ -108,18 +114,17 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
 
 target_ulong helper_udiv(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_udiv_common(env, a, b, 0);
+    return do_udiv(env, a, b, 0, GETPC());
 }
 
 target_ulong helper_udiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_udiv_common(env, a, b, 1);
+    return do_udiv(env, a, b, 1, GETPC());
 }
 
-static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
-                                       target_ulong b, int cc)
+static target_ulong do_sdiv(CPUSPARCState *env, target_ulong a,
+                            target_ulong b, int cc, uintptr_t ra)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     int overflow = 0;
     int64_t x0;
     int32_t x1;
@@ -128,8 +133,7 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, ra);
     } else if (x1 == -1 && x0 == INT64_MIN) {
         x0 = INT32_MAX;
         overflow = 1;
@@ -151,12 +155,12 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
 
 target_ulong helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_sdiv_common(env, a, b, 0);
+    return do_sdiv(env, a, b, 0, GETPC());
 }
 
 target_ulong helper_sdiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_sdiv_common(env, a, b, 1);
+    return do_sdiv(env, a, b, 1, GETPC());
 }
 
 #ifdef TARGET_SPARC64
@@ -164,10 +168,7 @@ int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        SPARCCPU *cpu = sparc_env_get_cpu(env);
-
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
     } else if (b == -1) {
         /* Avoid overflow trap with i386 divide insn.  */
         return -a;
@@ -180,10 +181,7 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        SPARCCPU *cpu = sparc_env_get_cpu(env);
-
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
     }
     return a / b;
 }
@@ -192,7 +190,6 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
 target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
                              target_ulong src2)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     target_ulong dst;
 
     /* Tag overflow occurs if either input has bits 0 or 1 set.  */
@@ -215,14 +212,12 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state(CPU(cpu), GETPC());
-    helper_raise_exception(env, TT_TOVF);
+    cpu_raise_exception_ra(env, TT_TOVF, GETPC());
 }
 
 target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
                              target_ulong src2)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     target_ulong dst;
 
     /* Tag overflow occurs if either input has bits 0 or 1 set.  */
@@ -245,8 +240,7 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state(CPU(cpu), GETPC());
-    helper_raise_exception(env, TT_TOVF);
+    cpu_raise_exception_ra(env, TT_TOVF, GETPC());
 }
 
 #ifndef TARGET_SPARC64
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 6ce5ccc37f..3c70766adf 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -301,17 +301,23 @@ static inline target_ulong asi_address_mask(CPUSPARCState *env,
 }
 #endif
 
-void helper_check_align(CPUSPARCState *env, target_ulong addr, uint32_t align)
+static void do_check_align(CPUSPARCState *env, target_ulong addr,
+                           uint32_t align, uintptr_t ra)
 {
     if (addr & align) {
 #ifdef DEBUG_UNALIGNED
         printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
                "\n", addr, env->pc);
 #endif
-        helper_raise_exception(env, TT_UNALIGNED);
+        cpu_raise_exception_ra(env, TT_UNALIGNED, ra);
     }
 }
 
+void helper_check_align(CPUSPARCState *env, target_ulong addr, uint32_t align)
+{
+    do_check_align(env, addr, align, GETPC());
+}
+
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) &&   \
     defined(DEBUG_MXCC)
 static void dump_mxcc(CPUSPARCState *env)
@@ -440,7 +446,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     uint32_t last_addr = addr;
 #endif
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     switch (asi) {
     case ASI_M_MXCC: /* SuperSparc MXCC registers, or... */
     /* case ASI_LEON_CACHEREGS:  Leon3 cache control */
@@ -708,7 +714,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
     SPARCCPU *cpu = sparc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     switch (asi) {
     case ASI_M_MXCC: /* SuperSparc MXCC registers, or... */
     /* case ASI_LEON_CACHEREGS:  Leon3 cache control */
@@ -1112,10 +1118,10 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
 #endif
 
     if (asi < 0x80) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
     switch (asi) {
@@ -1218,10 +1224,9 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     dump_asi("write", addr, asi, size, val);
 #endif
     if (asi < 0x80) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
-
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
     /* Convert to little endian */
@@ -1276,7 +1281,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_PNFL: /* Primary no-fault LE, RO */
     case ASI_SNFL: /* Secondary no-fault LE, RO */
     default:
-        helper_raise_exception(env, TT_DATA_ACCESS);
+        cpu_raise_exception_ra(env, TT_DATA_ACCESS, GETPC());
         return;
     }
 }
@@ -1300,10 +1305,10 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV))) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
     /* process nonfaulting loads first */
@@ -1322,7 +1327,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
             dump_asi("read ", last_addr, asi, size, ret);
 #endif
             /* env->exception_index is set in get_physical_address_data(). */
-            helper_raise_exception(env, cs->exception_index);
+            cpu_raise_exception_ra(env, cs->exception_index, GETPC());
         }
 
         /* convert nonfaulting load ASIs to normal load ASIs */
@@ -1614,7 +1619,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     case ASI_TWINX_S:  /* Secondary, twinx */
     case ASI_TWINX_SL: /* Secondary, twinx, LE */
         /* These are all 128-bit atomic; only ldda (now ldtxa) allowed */
-        helper_raise_exception(env, TT_ILL_INSN);
+        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
         return 0;
     }
 
@@ -1683,10 +1688,10 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
         || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV))) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
     /* Convert to little endian */
@@ -2032,7 +2037,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_TWINX_S:  /* Secondary, twinx */
     case ASI_TWINX_SL: /* Secondary, twinx, LE */
         /* Only stda allowed */
-        helper_raise_exception(env, TT_ILL_INSN);
+        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
         return;
     case ASI_DCACHE_DATA: /* D-cache data */
     case ASI_DCACHE_TAG: /* D-cache tag access */
@@ -2076,7 +2081,7 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
         || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV))) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
 
     addr = asi_address_mask(env, asi, addr);
@@ -2085,19 +2090,19 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
 #if !defined(CONFIG_USER_ONLY)
     case ASI_TWINX_AIUP:   /* As if user primary, twinx */
     case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-        helper_check_align(env, addr, 0xf);
+        do_check_align(env, addr, 0xf, GETPC());
         h = cpu_ldq_user(env, addr);
         l = cpu_ldq_user(env, addr + 8);
         break;
     case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
     case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-        helper_check_align(env, addr, 0xf);
+        do_check_align(env, addr, 0xf, GETPC());
         h = cpu_ldq_user_secondary(env, addr);
         l = cpu_ldq_user_secondary(env, addr + 8);
         break;
     case ASI_TWINX_REAL:   /* Real address, twinx */
     case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-        helper_check_align(env, addr, 0xf);
+        do_check_align(env, addr, 0xf, GETPC());
         {
             CPUState *cs = CPU(sparc_env_get_cpu(env));
             h = ldq_phys(cs->as, addr);
@@ -2108,14 +2113,14 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
     case ASI_NUCLEUS_QUAD_LDD_L:
     case ASI_TWINX_N:  /* Nucleus, twinx */
     case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-        helper_check_align(env, addr, 0xf);
+        do_check_align(env, addr, 0xf, GETPC());
         h = cpu_ldq_nucleus(env, addr);
         l = cpu_ldq_nucleus(env, addr + 8);
         break;
     case ASI_TWINX_S: /* Secondary, twinx */
     case ASI_TWINX_SL: /* Secondary, twinx, LE */
         if (!cpu_hypervisor_mode(env)) {
-            helper_check_align(env, addr, 0xf);
+            do_check_align(env, addr, 0xf, GETPC());
             if (env->pstate & PS_PRIV) {
                 h = cpu_ldq_kernel_secondary(env, addr);
                 l = cpu_ldq_kernel_secondary(env, addr + 8);
@@ -2128,7 +2133,7 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
         /* fallthru */
     case ASI_TWINX_P:  /* Primary, twinx */
     case ASI_TWINX_PL: /* Primary, twinx, LE */
-        helper_check_align(env, addr, 0xf);
+        do_check_align(env, addr, 0xf, GETPC());
         h = cpu_ldq_data(env, addr);
         l = cpu_ldq_data(env, addr + 8);
         break;
@@ -2139,7 +2144,7 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
     case ASI_TWINX_SL: /* Primary, twinx, LE */
         /* ??? Should be available, but we need to implement
            an atomic 128-bit load.  */
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
 #endif
     default:
         /* Non-twinx asi, so this is the legacy ldda insn, which
@@ -2147,7 +2152,7 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
         /* ??? The UA2011 manual recommends emulating this with
            a single 64-bit load.  However, LE asis *are* treated
            as two 32-bit loads individually byte swapped.  */
-        helper_check_align(env, addr, 0x7);
+        do_check_align(env, addr, 7, GETPC());
         QT0.high = (uint32_t)helper_ld_asi(env, addr, asi, MO_UL);
         QT0.low = (uint32_t)helper_ld_asi(env, addr + 4, asi, MO_UL);
         return;
@@ -2196,7 +2201,7 @@ void helper_ldqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
     /* XXX add 128 bit load */
     CPU_QuadU u;
 
-    helper_check_align(env, addr, 7);
+    do_check_align(env, addr, 7, GETPC());
 #if !defined(CONFIG_USER_ONLY)
     switch (mem_idx) {
     case MMU_USER_IDX:
@@ -2232,7 +2237,7 @@ void helper_stqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
     /* XXX add 128 bit store */
     CPU_QuadU u;
 
-    helper_check_align(env, addr, 7);
+    do_check_align(env, addr, 7, GETPC());
 #if !defined(CONFIG_USER_ONLY)
     switch (mem_idx) {
     case MMU_USER_IDX:
@@ -2314,11 +2319,8 @@ void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
     }
 
     if ((env->mmuregs[0] & MMU_E) && !(env->mmuregs[0] & MMU_NF)) {
-        if (is_exec) {
-            helper_raise_exception(env, TT_CODE_ACCESS);
-        } else {
-            helper_raise_exception(env, TT_DATA_ACCESS);
-        }
+        int tt = is_exec ? TT_CODE_ACCESS : TT_DATA_ACCESS;
+        cpu_raise_exception_ra(env, tt, GETPC());
     }
 
     /* flush neverland mappings created during no-fault mode,
@@ -2334,17 +2336,14 @@ void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
 {
     SPARCCPU *cpu = SPARC_CPU(cs);
     CPUSPARCState *env = &cpu->env;
+    int tt = is_exec ? TT_CODE_ACCESS : TT_DATA_ACCESS;
 
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem access to " TARGET_FMT_plx " from " TARGET_FMT_lx
            "\n", addr, env->pc);
 #endif
 
-    if (is_exec) {
-        helper_raise_exception(env, TT_CODE_ACCESS);
-    } else {
-        helper_raise_exception(env, TT_DATA_ACCESS);
-    }
+    cpu_raise_exception_ra(env, tt, GETPC());
 }
 #endif
 #endif
@@ -2362,10 +2361,7 @@ void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
     printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
            "\n", addr, env->pc);
 #endif
-    if (retaddr) {
-        cpu_restore_state(CPU(cpu), retaddr);
-    }
-    helper_raise_exception(env, TT_UNALIGNED);
+    cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr);
 }
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -2379,10 +2375,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
 
     ret = sparc_cpu_handle_mmu_fault(cs, addr, access_type, mmu_idx);
     if (ret) {
-        if (retaddr) {
-            cpu_restore_state(cs, retaddr);
-        }
-        cpu_loop_exit(cs);
+        cpu_loop_exit_restore(cs, retaddr);
     }
 }
 #endif
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index ec4a73accf..ee7bbc40b5 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -3458,7 +3458,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 break;
             } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
 #ifdef TARGET_SPARC64
-                save_state(dc);
                 gen_helper_flushw(cpu_env);
 #else
                 if (!supervisor(dc))
@@ -5062,12 +5061,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     /* nop */
                     break;
                 case 0x3c:      /* save */
-                    save_state(dc);
                     gen_helper_save(cpu_env);
                     gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
                 case 0x3d:      /* restore */
-                    save_state(dc);
                     gen_helper_restore(cpu_env);
                     gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
diff --git a/target-sparc/win_helper.c b/target-sparc/win_helper.c
index a8a6c0cfc4..2d5b5469a9 100644
--- a/target-sparc/win_helper.c
+++ b/target-sparc/win_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "trace.h"
 
@@ -111,13 +112,13 @@ void helper_rett(CPUSPARCState *env)
     unsigned int cwp;
 
     if (env->psret == 1) {
-        helper_raise_exception(env, TT_ILL_INSN);
+        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
     }
 
     env->psret = 1;
     cwp = cpu_cwp_inc(env, env->cwp + 1) ;
     if (env->wim & (1 << cwp)) {
-        helper_raise_exception(env, TT_WIN_UNF);
+        cpu_raise_exception_ra(env, TT_WIN_UNF, GETPC());
     }
     cpu_set_cwp(env, cwp);
     env->psrs = env->psrps;
@@ -131,7 +132,7 @@ void helper_save(CPUSPARCState *env)
 
     cwp = cpu_cwp_dec(env, env->cwp - 1);
     if (env->wim & (1 << cwp)) {
-        helper_raise_exception(env, TT_WIN_OVF);
+        cpu_raise_exception_ra(env, TT_WIN_OVF, GETPC());
     }
     cpu_set_cwp(env, cwp);
 }
@@ -142,7 +143,7 @@ void helper_restore(CPUSPARCState *env)
 
     cwp = cpu_cwp_inc(env, env->cwp + 1);
     if (env->wim & (1 << cwp)) {
-        helper_raise_exception(env, TT_WIN_UNF);
+        cpu_raise_exception_ra(env, TT_WIN_UNF, GETPC());
     }
     cpu_set_cwp(env, cwp);
 }
@@ -150,7 +151,7 @@ void helper_restore(CPUSPARCState *env)
 void helper_wrpsr(CPUSPARCState *env, target_ulong new_psr)
 {
     if ((new_psr & PSR_CWP) >= env->nwindows) {
-        helper_raise_exception(env, TT_ILL_INSN);
+        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
     } else {
         cpu_put_psr(env, new_psr);
     }
@@ -170,14 +171,14 @@ void helper_save(CPUSPARCState *env)
 
     cwp = cpu_cwp_dec(env, env->cwp - 1);
     if (env->cansave == 0) {
-        helper_raise_exception(env, TT_SPILL | (env->otherwin != 0 ?
-                                                (TT_WOTHER |
-                                                 ((env->wstate & 0x38) >> 1)) :
-                                                ((env->wstate & 0x7) << 2)));
+        int tt = TT_SPILL | (env->otherwin != 0
+                             ? (TT_WOTHER | ((env->wstate & 0x38) >> 1))
+                             : ((env->wstate & 0x7) << 2));
+        cpu_raise_exception_ra(env, tt, GETPC());
     } else {
         if (env->cleanwin - env->canrestore == 0) {
             /* XXX Clean windows without trap */
-            helper_raise_exception(env, TT_CLRWIN);
+            cpu_raise_exception_ra(env, TT_CLRWIN, GETPC());
         } else {
             env->cansave--;
             env->canrestore++;
@@ -192,10 +193,10 @@ void helper_restore(CPUSPARCState *env)
 
     cwp = cpu_cwp_inc(env, env->cwp + 1);
     if (env->canrestore == 0) {
-        helper_raise_exception(env, TT_FILL | (env->otherwin != 0 ?
-                                               (TT_WOTHER |
-                                                ((env->wstate & 0x38) >> 1)) :
-                                               ((env->wstate & 0x7) << 2)));
+        int tt = TT_FILL | (env->otherwin != 0
+                            ? (TT_WOTHER | ((env->wstate & 0x38) >> 1))
+                            : ((env->wstate & 0x7) << 2));
+        cpu_raise_exception_ra(env, tt, GETPC());
     } else {
         env->cansave++;
         env->canrestore--;
@@ -206,10 +207,10 @@ void helper_restore(CPUSPARCState *env)
 void helper_flushw(CPUSPARCState *env)
 {
     if (env->cansave != env->nwindows - 2) {
-        helper_raise_exception(env, TT_SPILL | (env->otherwin != 0 ?
-                                                (TT_WOTHER |
-                                                 ((env->wstate & 0x38) >> 1)) :
-                                                ((env->wstate & 0x7) << 2)));
+        int tt = TT_SPILL | (env->otherwin != 0
+                             ? (TT_WOTHER | ((env->wstate & 0x38) >> 1))
+                             : ((env->wstate & 0x7) << 2));
+        cpu_raise_exception_ra(env, tt, GETPC());
     }
 }
 
-- 
cgit v1.2.3


From af7a06bac7d3abb2da48ef3277d2a415772d2ae8 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 21:01:29 -0700
Subject: target-sparc: Add MMU_PHYS_IDX

It's handy to have a mmu idx for physical addresses, so
that mmu disabled and physical access asis can use the
same path as normal accesses.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/cpu.h         | 25 +++++++++++++++++-------
 target-sparc/ldst_helper.c | 27 ++++++--------------------
 target-sparc/mmu_helper.c  | 47 ++++++++++++++++++++++++++++------------------
 3 files changed, 53 insertions(+), 46 deletions(-)

diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 3ae3a12e19..e94b8f19d6 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -225,9 +225,9 @@ enum {
 #define MAX_NWINDOWS 32
 
 #if !defined(TARGET_SPARC64)
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 3
 #else
-#define NB_MMU_MODES 6
+#define NB_MMU_MODES 7
 typedef struct trap_state {
     uint64_t tpc;
     uint64_t tnpc;
@@ -649,11 +649,13 @@ int cpu_sparc_signal_handler(int host_signum, void *pinfo, void *puc);
 #define MMU_MODE4_SUFFIX _nucleus
 #define MMU_HYPV_IDX   5
 #define MMU_MODE5_SUFFIX _hypv
+#define MMU_PHYS_IDX   6
 #else
 #define MMU_USER_IDX   0
 #define MMU_MODE0_SUFFIX _user
 #define MMU_KERNEL_IDX 1
 #define MMU_MODE1_SUFFIX _kernel
+#define MMU_PHYS_IDX   2
 #endif
 
 #if defined (TARGET_SPARC64)
@@ -673,18 +675,27 @@ static inline int cpu_supervisor_mode(CPUSPARCState *env1)
 }
 #endif
 
-static inline int cpu_mmu_index(CPUSPARCState *env1, bool ifetch)
+static inline int cpu_mmu_index(CPUSPARCState *env, bool ifetch)
 {
 #if defined(CONFIG_USER_ONLY)
     return MMU_USER_IDX;
 #elif !defined(TARGET_SPARC64)
-    return env1->psrs;
+    if ((env->mmuregs[0] & MMU_E) == 0) { /* MMU disabled */
+        return MMU_PHYS_IDX;
+    } else {
+        return env->psrs;
+    }
 #else
-    if (env1->tl > 0) {
+    /* IMMU or DMMU disabled.  */
+    if (ifetch
+        ? (env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0
+        : (env->lsu & DMMU_E) == 0) {
+        return MMU_PHYS_IDX;
+    } else if (env->tl > 0) {
         return MMU_NUCLEUS_IDX;
-    } else if (cpu_hypervisor_mode(env1)) {
+    } else if (cpu_hypervisor_mode(env)) {
         return MMU_HYPV_IDX;
-    } else if (cpu_supervisor_mode(env1)) {
+    } else if (cpu_supervisor_mode(env)) {
         return MMU_KERNEL_IDX;
     } else {
         return MMU_USER_IDX;
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 3c70766adf..667b962afe 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -887,10 +887,10 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
             case 0: /* Control Register */
                 env->mmuregs[reg] = (env->mmuregs[reg] & 0xff000000) |
                     (val & 0x00ffffff);
-                /* Mappings generated during no-fault mode or MMU
-                   disabled mode are invalid in normal mode */
-                if ((oldreg & (MMU_E | MMU_NF | env->def->mmu_bm)) !=
-                    (env->mmuregs[reg] & (MMU_E | MMU_NF | env->def->mmu_bm))) {
+                /* Mappings generated during no-fault mode
+                   are invalid in normal mode.  */
+                if ((oldreg ^ env->mmuregs[reg])
+                    & (MMU_NF | env->def->mmu_bm)) {
                     tlb_flush(CPU(cpu), 1);
                 }
                 break;
@@ -1866,23 +1866,8 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
         /* XXX */
         return;
     case ASI_LSU_CONTROL: /* LSU */
-        {
-            uint64_t oldreg;
-
-            oldreg = env->lsu;
-            env->lsu = val & (DMMU_E | IMMU_E);
-            /* Mappings generated during D/I MMU disabled mode are
-               invalid in normal mode */
-            if (oldreg != env->lsu) {
-                DPRINTF_MMU("LSU change: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
-                            oldreg, env->lsu);
-#ifdef DEBUG_MMU
-                dump_mmu(stdout, fprintf, env);
-#endif
-                tlb_flush(CPU(cpu), 1);
-            }
-            return;
-        }
+        env->lsu = val & (DMMU_E | IMMU_E);
+        return;
     case ASI_IMMU: /* I-MMU regs */
         {
             int reg = (addr >> 3) & 0xf;
diff --git a/target-sparc/mmu_helper.c b/target-sparc/mmu_helper.c
index 32b629fb0d..044e88c4c5 100644
--- a/target-sparc/mmu_helper.c
+++ b/target-sparc/mmu_helper.c
@@ -92,7 +92,7 @@ static int get_physical_address(CPUSPARCState *env, hwaddr *physical,
 
     is_user = mmu_idx == MMU_USER_IDX;
 
-    if ((env->mmuregs[0] & MMU_E) == 0) { /* MMU disabled */
+    if (mmu_idx == MMU_PHYS_IDX) {
         *page_size = TARGET_PAGE_SIZE;
         /* Boot mode: instruction fetches are taken from PROM */
         if (rw == 2 && (env->mmuregs[0] & env->def->mmu_bm)) {
@@ -494,23 +494,21 @@ static int get_physical_address_data(CPUSPARCState *env,
     unsigned int i;
     uint64_t context;
     uint64_t sfsr = 0;
-
-    int is_user = (mmu_idx == MMU_USER_IDX ||
-                   mmu_idx == MMU_USER_SECONDARY_IDX);
-
-    if ((env->lsu & DMMU_E) == 0) { /* DMMU disabled */
-        *physical = ultrasparc_truncate_physical(address);
-        *prot = PAGE_READ | PAGE_WRITE;
-        return 0;
-    }
+    bool is_user = false;
 
     switch (mmu_idx) {
+    case MMU_PHYS_IDX:
+        g_assert_not_reached();
     case MMU_USER_IDX:
+        is_user = true;
+        /* fallthru */
     case MMU_KERNEL_IDX:
         context = env->dmmu.mmu_primary_context & 0x1fff;
         sfsr |= SFSR_CT_PRIMARY;
         break;
     case MMU_USER_SECONDARY_IDX:
+        is_user = true;
+        /* fallthru */
     case MMU_KERNEL_SECONDARY_IDX:
         context = env->dmmu.mmu_secondary_context & 0x1fff;
         sfsr |= SFSR_CT_SECONDARY;
@@ -613,15 +611,22 @@ static int get_physical_address_code(CPUSPARCState *env,
     CPUState *cs = CPU(sparc_env_get_cpu(env));
     unsigned int i;
     uint64_t context;
+    bool is_user = false;
 
-    int is_user = (mmu_idx == MMU_USER_IDX ||
-                   mmu_idx == MMU_USER_SECONDARY_IDX);
-
-    if ((env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0) {
-        /* IMMU disabled */
-        *physical = ultrasparc_truncate_physical(address);
-        *prot = PAGE_EXEC;
-        return 0;
+    switch (mmu_idx) {
+    case MMU_PHYS_IDX:
+    case MMU_USER_SECONDARY_IDX:
+    case MMU_KERNEL_SECONDARY_IDX:
+        g_assert_not_reached();
+    case MMU_USER_IDX:
+        is_user = true;
+        /* fallthru */
+    case MMU_KERNEL_IDX:
+        context = env->dmmu.mmu_primary_context & 0x1fff;
+        break;
+    default:
+        context = 0;
+        break;
     }
 
     if (env->tl == 0) {
@@ -700,6 +705,12 @@ static int get_physical_address(CPUSPARCState *env, hwaddr *physical,
         }
     }
 
+    if (mmu_idx == MMU_PHYS_IDX) {
+        *physical = ultrasparc_truncate_physical(address);
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return 0;
+    }
+
     if (rw == 2) {
         return get_physical_address_code(env, physical, prot, address,
                                          mmu_idx);
-- 
cgit v1.2.3


From 7f87c90527d7363e8cecf1c6b5ad3d4cc85d3d28 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 21:29:44 -0700
Subject: target-sparc: Use MMU_PHYS_IDX for bypass asis

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index ee7bbc40b5..86432accc5 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2046,6 +2046,11 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
             mem_idx = MMU_KERNEL_IDX;
             type = GET_ASI_DIRECT;
             break;
+        case ASI_M_BYPASS:    /* MMU passthrough */
+        case ASI_LEON_BYPASS: /* LEON MMU passthrough */
+            mem_idx = MMU_PHYS_IDX;
+            type = GET_ASI_DIRECT;
+            break;
         }
     } else {
         gen_exception(dc, TT_PRIV_INSN);
@@ -2066,6 +2071,14 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         type = GET_ASI_EXCP;
     } else {
         switch (asi) {
+        case ASI_REAL:      /* Bypass */
+        case ASI_REAL_IO:   /* Bypass, non-cacheable */
+        case ASI_REAL_L:    /* Bypass LE */
+        case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
+        case ASI_TWINX_REAL:   /* Real address, twinx */
+        case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+            mem_idx = MMU_PHYS_IDX;
+            break;
         case ASI_N:  /* Nucleus */
         case ASI_NL: /* Nucleus LE */
         case ASI_TWINX_N:
@@ -2123,6 +2136,10 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
             break;
         }
         switch (asi) {
+        case ASI_REAL:
+        case ASI_REAL_IO:
+        case ASI_REAL_L:
+        case ASI_REAL_IO_L:
         case ASI_N:
         case ASI_NL:
         case ASI_AIUP:
@@ -2135,6 +2152,8 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         case ASI_PL:
             type = GET_ASI_DIRECT;
             break;
+        case ASI_TWINX_REAL:
+        case ASI_TWINX_REAL_L:
         case ASI_TWINX_N:
         case ASI_TWINX_NL:
         case ASI_TWINX_AIUP:
-- 
cgit v1.2.3


From 34a6e13da70b2c798630a8dbd03d09f201c0198f Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Wed, 13 Jul 2016 00:43:05 -0700
Subject: target-sparc: Handle more twinx asis

As used by HelenOS, presumably for ultra 2 and 3,
prior to the sun4v platform and the current twinx names.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 86432accc5..e7e07de03b 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2077,12 +2077,16 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
         case ASI_TWINX_REAL:   /* Real address, twinx */
         case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+        case ASI_QUAD_LDD_PHYS:
+        case ASI_QUAD_LDD_PHYS_L:
             mem_idx = MMU_PHYS_IDX;
             break;
         case ASI_N:  /* Nucleus */
         case ASI_NL: /* Nucleus LE */
         case ASI_TWINX_N:
         case ASI_TWINX_NL:
+        case ASI_NUCLEUS_QUAD_LDD:
+        case ASI_NUCLEUS_QUAD_LDD_L:
             mem_idx = MMU_NUCLEUS_IDX;
             break;
         case ASI_AIUP:  /* As if user primary */
@@ -2164,6 +2168,10 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         case ASI_TWINX_PL:
         case ASI_TWINX_S:
         case ASI_TWINX_SL:
+        case ASI_QUAD_LDD_PHYS:
+        case ASI_QUAD_LDD_PHYS_L:
+        case ASI_NUCLEUS_QUAD_LDD:
+        case ASI_NUCLEUS_QUAD_LDD_L:
             type = GET_ASI_DTWINX;
             break;
         case ASI_BLK_COMMIT_P:
-- 
cgit v1.2.3


From 4fb554bc6c88eb45270a3ad3cf6e6e2ad476aede Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 15:29:18 -0700
Subject: target-sparc: Implement swap_asi inline

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 49 +++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index e7e07de03b..8cd8bb6047 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2001,6 +2001,18 @@ static inline void gen_ne_fop_QD(DisasContext *dc, int rd, int rs,
     gen_update_fprs_dirty(dc, QFPREG(rd));
 }
 
+static void gen_swap(DisasContext *dc, TCGv dst, TCGv src,
+                     TCGv addr, int mmu_idx, TCGMemOp memop)
+{
+    /* ??? Should be atomic.  */
+    TCGv t0 = tcg_temp_new();
+    gen_address_mask(dc, addr);
+    tcg_gen_qemu_ld_tl(t0, addr, mmu_idx, memop);
+    tcg_gen_qemu_st_tl(src, addr, mmu_idx, memop);
+    tcg_gen_mov_tl(dst, t0);
+    tcg_temp_free(t0);
+}
+
 /* asi moves */
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
 typedef enum {
@@ -2302,26 +2314,12 @@ static void gen_swap_asi(DisasContext *dc, TCGv dst, TCGv src,
     switch (da.type) {
     case GET_ASI_EXCP:
         break;
+    case GET_ASI_DIRECT:
+        gen_swap(dc, dst, src, addr, da.mem_idx, da.memop);
+        break;
     default:
-        {
-            TCGv_i32 r_asi = tcg_const_i32(da.asi);
-            TCGv_i32 r_mop = tcg_const_i32(MO_UL);
-            TCGv_i64 s64, t64;
-
-            save_state(dc);
-            t64 = tcg_temp_new_i64();
-            gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
-
-            s64 = tcg_temp_new_i64();
-            tcg_gen_extu_tl_i64(s64, src);
-            gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
-            tcg_temp_free_i64(s64);
-            tcg_temp_free_i32(r_mop);
-            tcg_temp_free_i32(r_asi);
-
-            tcg_gen_trunc_i64_tl(dst, t64);
-            tcg_temp_free_i64(t64);
-        }
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
         break;
     }
 }
@@ -5207,15 +5205,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     break;
                 case 0x0f:
                     /* swap, swap register with memory. Also atomically */
-                    {
-                        TCGv t0 = get_temp_tl(dc);
-                        CHECK_IU_FEATURE(dc, SWAP);
-                        cpu_src1 = gen_load_gpr(dc, rd);
-                        gen_address_mask(dc, cpu_addr);
-                        tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
-                        tcg_gen_qemu_st32(cpu_src1, cpu_addr, dc->mem_idx);
-                        tcg_gen_mov_tl(cpu_val, t0);
-                    }
+                    CHECK_IU_FEATURE(dc, SWAP);
+                    cpu_src1 = gen_load_gpr(dc, rd);
+                    gen_swap(dc, cpu_val, cpu_src1, cpu_addr,
+                             dc->mem_idx, MO_TEUL);
                     break;
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
                 case 0x10:      /* lda, V9 lduwa, load word alternate */
-- 
cgit v1.2.3


From fbb4bbb62e5603c991b880e25dc4bb30d342b944 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 15:38:13 -0700
Subject: target-sparc: Implement ldstub_asi inline

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 52 +++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 8cd8bb6047..8d879a9985 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2013,6 +2013,20 @@ static void gen_swap(DisasContext *dc, TCGv dst, TCGv src,
     tcg_temp_free(t0);
 }
 
+static void gen_ldstub(DisasContext *dc, TCGv dst, TCGv addr, int mmu_idx)
+{
+    /* ??? Should be atomic.  */
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_const_i32(0xff);
+
+    gen_address_mask(dc, addr);
+    tcg_gen_qemu_ld_i32(t0, addr, mmu_idx, MO_UB);
+    tcg_gen_qemu_st_i32(t1, addr, mmu_idx, MO_UB);
+    tcg_gen_extu_i32_tl(dst, t0);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+}
+
 /* asi moves */
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
 typedef enum {
@@ -2351,25 +2365,12 @@ static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
     switch (da.type) {
     case GET_ASI_EXCP:
         break;
+    case GET_ASI_DIRECT:
+        gen_ldstub(dc, dst, addr, da.mem_idx);
+        break;
     default:
-        {
-            TCGv_i32 r_asi = tcg_const_i32(da.asi);
-            TCGv_i32 r_mop = tcg_const_i32(MO_UB);
-            TCGv_i64 s64, t64;
-
-            save_state(dc);
-            t64 = tcg_temp_new_i64();
-            gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
-
-            s64 = tcg_const_i64(0xff);
-            gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
-            tcg_temp_free_i64(s64);
-            tcg_temp_free_i32(r_mop);
-            tcg_temp_free_i32(r_asi);
-
-            tcg_gen_trunc_i64_tl(dst, t64);
-            tcg_temp_free_i64(t64);
-        }
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
         break;
     }
 }
@@ -5189,19 +5190,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_address_mask(dc, cpu_addr);
                     tcg_gen_qemu_ld16s(cpu_val, cpu_addr, dc->mem_idx);
                     break;
-                case 0xd:       /* ldstub -- XXX: should be atomically */
-                    {
-                        TCGv r_const;
-                        TCGv tmp = tcg_temp_new();
-
-                        gen_address_mask(dc, cpu_addr);
-                        tcg_gen_qemu_ld8u(tmp, cpu_addr, dc->mem_idx);
-                        r_const = tcg_const_tl(0xff);
-                        tcg_gen_qemu_st8(r_const, cpu_addr, dc->mem_idx);
-                        tcg_gen_mov_tl(cpu_val, tmp);
-                        tcg_temp_free(r_const);
-                        tcg_temp_free(tmp);
-                    }
+                case 0xd:       /* ldstub */
+                    gen_ldstub(dc, cpu_val, cpu_addr, dc->mem_idx);
                     break;
                 case 0x0f:
                     /* swap, swap register with memory. Also atomically */
-- 
cgit v1.2.3


From 7268adebfda6548b8ae6865dc8337f116a5d266d Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 16:10:32 -0700
Subject: target-sparc: Implement cas_asi/casx_asi inline

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/helper.h      |  4 ---
 target-sparc/ldst_helper.c | 29 -------------------
 target-sparc/translate.c   | 70 +++++++++++++++++++++++++++++++---------------
 3 files changed, 47 insertions(+), 56 deletions(-)

diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index caa2a895d0..435c65e97a 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -18,7 +18,6 @@ DEF_HELPER_2(wrcwp, void, env, tl)
 DEF_HELPER_FLAGS_2(array8, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_1(popc, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_3(ldda_asi, TCG_CALL_NO_WG, void, env, tl, int)
-DEF_HELPER_FLAGS_5(casx_asi, TCG_CALL_NO_WG, tl, env, tl, tl, tl, i32)
 DEF_HELPER_FLAGS_2(set_softint, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(clear_softint, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(write_softint, TCG_CALL_NO_RWG, void, env, i64)
@@ -26,9 +25,6 @@ DEF_HELPER_FLAGS_2(tick_set_count, TCG_CALL_NO_RWG, void, ptr, i64)
 DEF_HELPER_FLAGS_3(tick_get_count, TCG_CALL_NO_WG, i64, env, ptr, int)
 DEF_HELPER_FLAGS_2(tick_set_limit, TCG_CALL_NO_RWG, void, ptr, i64)
 #endif
-#if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
-DEF_HELPER_FLAGS_5(cas_asi, TCG_CALL_NO_WG, tl, env, tl, tl, tl, i32)
-#endif
 DEF_HELPER_FLAGS_3(check_align, TCG_CALL_NO_WG, void, env, tl, i32)
 DEF_HELPER_1(debug, void, env)
 DEF_HELPER_1(save, void, env)
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 667b962afe..d90748ea5a 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -2150,37 +2150,8 @@ void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
     QT0.high = h;
     QT0.low = l;
 }
-
-target_ulong helper_casx_asi(CPUSPARCState *env, target_ulong addr,
-                             target_ulong val1, target_ulong val2,
-                             uint32_t asi)
-{
-    target_ulong ret;
-
-    ret = helper_ld_asi(env, addr, asi, MO_Q);
-    if (val2 == ret) {
-        helper_st_asi(env, addr, val1, asi, MO_Q);
-    }
-    return ret;
-}
 #endif /* TARGET_SPARC64 */
 
-#if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
-target_ulong helper_cas_asi(CPUSPARCState *env, target_ulong addr,
-                            target_ulong val1, target_ulong val2, uint32_t asi)
-{
-    target_ulong ret;
-
-    val2 &= 0xffffffffUL;
-    ret = helper_ld_asi(env, addr, asi, MO_UL);
-    ret &= 0xffffffffUL;
-    if (val2 == ret) {
-        helper_st_asi(env, addr, val1 & 0xffffffffUL, asi, MO_UL);
-    }
-    return ret;
-}
-#endif /* !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64) */
-
 void helper_ldqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
 {
     /* XXX add 128 bit load */
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 8d879a9985..0fb361a8bb 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2338,24 +2338,37 @@ static void gen_swap_asi(DisasContext *dc, TCGv dst, TCGv src,
     }
 }
 
-static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv val2,
+static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv cmpr,
                         int insn, int rd)
 {
     DisasASI da = get_asi(dc, insn, MO_TEUL);
-    TCGv val1, dst;
-    TCGv_i32 r_asi;
+    TCGv cmpv, oldv, tmpv;
 
-    if (da.type == GET_ASI_EXCP) {
+    switch (da.type) {
+    case GET_ASI_EXCP:
         return;
+    case GET_ASI_DIRECT:
+        cmpv = tcg_temp_new();
+        oldv = tcg_temp_new();
+        tmpv = tcg_temp_new();
+        tcg_gen_ext32u_tl(cmpv, cmpr);
+
+        /* ??? Should be atomic.  */
+        tcg_gen_qemu_ld_tl(oldv, addr, da.mem_idx, da.memop);
+        tcg_gen_movcond_tl(TCG_COND_EQ, tmpv, oldv, cmpv,
+                           gen_load_gpr(dc, rd), oldv);
+        tcg_gen_qemu_st_tl(tmpv, addr, da.mem_idx, da.memop);
+
+        gen_store_gpr(dc, rd, oldv);
+        tcg_temp_free(cmpv);
+        tcg_temp_free(oldv);
+        tcg_temp_free(tmpv);
+        break;
+    default:
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
+        break;
     }
-
-    save_state(dc);
-    val1 = gen_load_gpr(dc, rd);
-    dst = gen_dest_gpr(dc, rd);
-    r_asi = tcg_const_i32(da.asi);
-    gen_helper_cas_asi(dst, cpu_env, addr, val1, val2, r_asi);
-    tcg_temp_free_i32(r_asi);
-    gen_store_gpr(dc, rd, dst);
 }
 
 static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
@@ -2668,23 +2681,34 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
     }
 }
 
-static void gen_casx_asi(DisasContext *dc, TCGv addr, TCGv val2,
+static void gen_casx_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
                          int insn, int rd)
 {
     DisasASI da = get_asi(dc, insn, MO_TEQ);
-    TCGv val1 = gen_load_gpr(dc, rd);
-    TCGv dst = gen_dest_gpr(dc, rd);
-    TCGv_i32 r_asi;
+    TCGv oldv, tmpv;
 
-    if (da.type == GET_ASI_EXCP) {
+    switch (da.type) {
+    case GET_ASI_EXCP:
         return;
+    case GET_ASI_DIRECT:
+        oldv = tcg_temp_new();
+        tmpv = tcg_temp_new();
+
+        /* ??? Should be atomic.  */
+        tcg_gen_qemu_ld_tl(oldv, addr, da.mem_idx, da.memop);
+        tcg_gen_movcond_tl(TCG_COND_EQ, tmpv, oldv, cmpv,
+                           gen_load_gpr(dc, rd), oldv);
+        tcg_gen_qemu_st_tl(tmpv, addr, da.mem_idx, da.memop);
+
+        gen_store_gpr(dc, rd, oldv);
+        tcg_temp_free(oldv);
+        tcg_temp_free(tmpv);
+        break;
+    default:
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
+        break;
     }
-
-    save_state(dc);
-    r_asi = tcg_const_i32(da.asi);
-    gen_helper_casx_asi(dst, cpu_env, addr, val1, val2, r_asi);
-    tcg_temp_free_i32(r_asi);
-    gen_store_gpr(dc, rd, dst);
 }
 
 #elif !defined(CONFIG_USER_ONLY)
-- 
cgit v1.2.3


From 34810610acbde7a0745be3a88e99f2ef9282260f Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 14 Jul 2016 12:46:19 -0700
Subject: target-sparc: Implement BCOPY/BFILL inline

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 0fb361a8bb..7950458a13 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2036,6 +2036,8 @@ typedef enum {
     GET_ASI_DTWINX,
     GET_ASI_BLOCK,
     GET_ASI_SHORT,
+    GET_ASI_BCOPY,
+    GET_ASI_BFILL,
 } ASIType;
 
 typedef struct {
@@ -2077,6 +2079,14 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
             mem_idx = MMU_PHYS_IDX;
             type = GET_ASI_DIRECT;
             break;
+        case ASI_M_BCOPY: /* Block copy, sta access */
+            mem_idx = MMU_KERNEL_IDX;
+            type = GET_ASI_BCOPY;
+            break;
+        case ASI_M_BFILL: /* Block fill, stda access */
+            mem_idx = MMU_KERNEL_IDX;
+            type = GET_ASI_BFILL;
+            break;
         }
     } else {
         gen_exception(dc, TT_PRIV_INSN);
@@ -2294,6 +2304,38 @@ static void gen_st_asi(DisasContext *dc, TCGv src, TCGv addr,
         gen_address_mask(dc, addr);
         tcg_gen_qemu_st_tl(src, addr, da.mem_idx, da.memop);
         break;
+#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
+    case GET_ASI_BCOPY:
+        /* Copy 32 bytes from the address in SRC to ADDR.  */
+        /* ??? The original qemu code suggests 4-byte alignment, dropping
+           the low bits, but the only place I can see this used is in the
+           Linux kernel with 32 byte alignment, which would make more sense
+           as a cacheline-style operation.  */
+        {
+            TCGv saddr = tcg_temp_new();
+            TCGv daddr = tcg_temp_new();
+            TCGv four = tcg_const_tl(4);
+            TCGv_i32 tmp = tcg_temp_new_i32();
+            int i;
+
+            tcg_gen_andi_tl(saddr, src, -4);
+            tcg_gen_andi_tl(daddr, addr, -4);
+            for (i = 0; i < 32; i += 4) {
+                /* Since the loads and stores are paired, allow the
+                   copy to happen in the host endianness.  */
+                tcg_gen_qemu_ld_i32(tmp, saddr, da.mem_idx, MO_UL);
+                tcg_gen_qemu_st_i32(tmp, daddr, da.mem_idx, MO_UL);
+                tcg_gen_add_tl(saddr, saddr, four);
+                tcg_gen_add_tl(daddr, daddr, four);
+            }
+
+            tcg_temp_free(saddr);
+            tcg_temp_free(daddr);
+            tcg_temp_free(four);
+            tcg_temp_free_i32(tmp);
+        }
+        break;
+#endif
     default:
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
@@ -2766,6 +2808,27 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
         gen_address_mask(dc, addr);
         tcg_gen_qemu_st_i64(t64, addr, da.mem_idx, da.memop);
         break;
+    case GET_ASI_BFILL:
+        /* Store 32 bytes of T64 to ADDR.  */
+        /* ??? The original qemu code suggests 8-byte alignment, dropping
+           the low bits, but the only place I can see this used is in the
+           Linux kernel with 32 byte alignment, which would make more sense
+           as a cacheline-style operation.  */
+        {
+            TCGv d_addr = tcg_temp_new();
+            TCGv eight = tcg_const_tl(8);
+            int i;
+
+            tcg_gen_andi_tl(d_addr, addr, -8);
+            for (i = 0; i < 32; i += 8) {
+                tcg_gen_qemu_st_i64(t64, d_addr, da.mem_idx, da.memop);
+                tcg_gen_add_tl(d_addr, d_addr, eight);
+            }
+
+            tcg_temp_free(d_addr);
+            tcg_temp_free(eight);
+        }
+        break;
     default:
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
-- 
cgit v1.2.3


From 918d9a2c9d36378a3cf6636018900a4731c83b9d Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Jul 2016 15:09:54 -0700
Subject: target-sparc: Remove asi helper code handled inline

Now that we never call out to helpers when direct accesses can
handle an asi, remove the corresponding code in those helpers.
For ldda, this removes the entire helper.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/helper.h      |   1 -
 target-sparc/ldst_helper.c | 811 ++++++++-------------------------------------
 target-sparc/translate.c   |  34 +-
 3 files changed, 156 insertions(+), 690 deletions(-)

diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index 435c65e97a..b7bff84efa 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -17,7 +17,6 @@ DEF_HELPER_1(rdcwp, tl, env)
 DEF_HELPER_2(wrcwp, void, env, tl)
 DEF_HELPER_FLAGS_2(array8, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_1(popc, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_3(ldda_asi, TCG_CALL_NO_WG, void, env, tl, int)
 DEF_HELPER_FLAGS_2(set_softint, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(clear_softint, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(write_softint, TCG_CALL_NO_RWG, void, env, i64)
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index d90748ea5a..725bb49e9d 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -560,64 +560,11 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
             break;
         }
         break;
-    case ASI_USERDATA: /* User data access */
-        switch (size) {
-        case 1:
-            ret = cpu_ldub_user(env, addr);
-            break;
-        case 2:
-            ret = cpu_lduw_user(env, addr);
-            break;
-        default:
-        case 4:
-            ret = cpu_ldl_user(env, addr);
-            break;
-        case 8:
-            ret = cpu_ldq_user(env, addr);
-            break;
-        }
-        break;
-    case ASI_KERNELDATA: /* Supervisor data access */
-    case ASI_P: /* Implicit primary context data access (v9 only?) */
-        switch (size) {
-        case 1:
-            ret = cpu_ldub_kernel(env, addr);
-            break;
-        case 2:
-            ret = cpu_lduw_kernel(env, addr);
-            break;
-        default:
-        case 4:
-            ret = cpu_ldl_kernel(env, addr);
-            break;
-        case 8:
-            ret = cpu_ldq_kernel(env, addr);
-            break;
-        }
-        break;
     case ASI_M_TXTC_TAG:   /* SparcStation 5 I-cache tag */
     case ASI_M_TXTC_DATA:  /* SparcStation 5 I-cache data */
     case ASI_M_DATAC_TAG:  /* SparcStation 5 D-cache tag */
     case ASI_M_DATAC_DATA: /* SparcStation 5 D-cache data */
         break;
-    case ASI_M_BYPASS:    /* MMU passthrough */
-    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
-        switch (size) {
-        case 1:
-            ret = ldub_phys(cs->as, addr);
-            break;
-        case 2:
-            ret = lduw_phys(cs->as, addr);
-            break;
-        default:
-        case 4:
-            ret = ldl_phys(cs->as, addr);
-            break;
-        case 8:
-            ret = ldq_phys(cs->as, addr);
-            break;
-        }
-        break;
     case 0x21 ... 0x2f: /* MMU passthrough, 0x100000000 to 0xfffffffff */
         switch (size) {
         case 1:
@@ -685,6 +632,14 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         cpu_unassigned_access(cs, addr, false, false, asi, size);
         ret = 0;
         break;
+
+    case ASI_USERDATA: /* User data access */
+    case ASI_KERNELDATA: /* Supervisor data access */
+    case ASI_P: /* Implicit primary context data access (v9 only?) */
+    case ASI_M_BYPASS:    /* MMU passthrough */
+    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
     }
     if (sign) {
         switch (size) {
@@ -935,41 +890,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
     case ASI_M_DIAGS:   /* Turbosparc DTLB Diagnostic */
     case ASI_M_IODIAG:  /* Turbosparc IOTLB Diagnostic */
         break;
-    case ASI_USERDATA: /* User data access */
-        switch (size) {
-        case 1:
-            cpu_stb_user(env, addr, val);
-            break;
-        case 2:
-            cpu_stw_user(env, addr, val);
-            break;
-        default:
-        case 4:
-            cpu_stl_user(env, addr, val);
-            break;
-        case 8:
-            cpu_stq_user(env, addr, val);
-            break;
-        }
-        break;
-    case ASI_KERNELDATA: /* Supervisor data access */
-    case ASI_P:
-        switch (size) {
-        case 1:
-            cpu_stb_kernel(env, addr, val);
-            break;
-        case 2:
-            cpu_stw_kernel(env, addr, val);
-            break;
-        default:
-        case 4:
-            cpu_stl_kernel(env, addr, val);
-            break;
-        case 8:
-            cpu_stq_kernel(env, addr, val);
-            break;
-        }
-        break;
     case ASI_M_TXTC_TAG:   /* I-cache tag */
     case ASI_M_TXTC_DATA:  /* I-cache data */
     case ASI_M_DATAC_TAG:  /* D-cache tag */
@@ -980,52 +900,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
     case ASI_M_FLUSH_CTX:    /* I/D-cache flush context */
     case ASI_M_FLUSH_USER:   /* I/D-cache flush user */
         break;
-    case ASI_M_BCOPY: /* Block copy, sta access */
-        {
-            /* val = src
-               addr = dst
-               copy 32 bytes */
-            unsigned int i;
-            uint32_t src = val & ~3, dst = addr & ~3, temp;
-
-            for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = cpu_ldl_kernel(env, src);
-                cpu_stl_kernel(env, dst, temp);
-            }
-        }
-        break;
-    case ASI_M_BFILL: /* Block fill, stda access */
-        {
-            /* addr = dst
-               fill 32 bytes with val */
-            unsigned int i;
-            uint32_t dst = addr & ~7;
-
-            for (i = 0; i < 32; i += 8, dst += 8) {
-                cpu_stq_kernel(env, dst, val);
-            }
-        }
-        break;
-    case ASI_M_BYPASS:    /* MMU passthrough */
-    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
-        {
-            switch (size) {
-            case 1:
-                stb_phys(cs->as, addr, val);
-                break;
-            case 2:
-                stw_phys(cs->as, addr, val);
-                break;
-            case 4:
-            default:
-                stl_phys(cs->as, addr, val);
-                break;
-            case 8:
-                stq_phys(cs->as, addr, val);
-                break;
-            }
-        }
-        break;
     case 0x21 ... 0x2f: /* MMU passthrough, 0x100000000 to 0xfffffffff */
         {
             switch (size) {
@@ -1097,6 +971,16 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
         cpu_unassigned_access(CPU(sparc_env_get_cpu(env)),
                               addr, true, false, asi, size);
         break;
+
+    case ASI_USERDATA: /* User data access */
+    case ASI_KERNELDATA: /* Supervisor data access */
+    case ASI_P:
+    case ASI_M_BYPASS:    /* MMU passthrough */
+    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
+    case ASI_M_BCOPY: /* Block copy, sta access */
+    case ASI_M_BFILL: /* Block fill, stda access */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
     }
 #ifdef DEBUG_ASI
     dump_asi("write", addr, asi, size, val);
@@ -1113,68 +997,54 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     int size = 1 << (memop & MO_SIZE);
     int sign = memop & MO_SIGN;
     uint64_t ret = 0;
-#if defined(DEBUG_ASI)
-    target_ulong last_addr = addr;
-#endif
 
     if (asi < 0x80) {
         cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
-
     do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
     switch (asi) {
     case ASI_PNF:  /* Primary no-fault */
     case ASI_PNFL: /* Primary no-fault LE */
-        if (page_check_range(addr, size, PAGE_READ) == -1) {
-#ifdef DEBUG_ASI
-            dump_asi("read ", last_addr, asi, size, ret);
-#endif
-            return 0;
-        }
-        /* Fall through */
-    case ASI_P: /* Primary */
-    case ASI_PL: /* Primary LE */
-        {
-            switch (size) {
-            case 1:
-                ret = cpu_ldub_data(env, addr);
-                break;
-            case 2:
-                ret = cpu_lduw_data(env, addr);
-                break;
-            case 4:
-                ret = cpu_ldl_data(env, addr);
-                break;
-            default:
-            case 8:
-                ret = cpu_ldq_data(env, addr);
-                break;
-            }
-        }
-        break;
     case ASI_SNF:  /* Secondary no-fault */
     case ASI_SNFL: /* Secondary no-fault LE */
         if (page_check_range(addr, size, PAGE_READ) == -1) {
-#ifdef DEBUG_ASI
-            dump_asi("read ", last_addr, asi, size, ret);
-#endif
-            return 0;
+            ret = 0;
+            break;
+        }
+        switch (size) {
+        case 1:
+            ret = cpu_ldub_data(env, addr);
+            break;
+        case 2:
+            ret = cpu_lduw_data(env, addr);
+            break;
+        case 4:
+            ret = cpu_ldl_data(env, addr);
+            break;
+        case 8:
+            ret = cpu_ldq_data(env, addr);
+            break;
+        default:
+            g_assert_not_reached();
         }
-        /* Fall through */
+        break;
+        break;
+
+    case ASI_P: /* Primary */
+    case ASI_PL: /* Primary LE */
     case ASI_S:  /* Secondary */
     case ASI_SL: /* Secondary LE */
-        /* XXX */
-        break;
+        /* These are always handled inline.  */
+        g_assert_not_reached();
+
     default:
-        break;
+        cpu_raise_exception_ra(env, TT_DATA_ACCESS, GETPC());
     }
 
     /* Convert from little endian */
     switch (asi) {
-    case ASI_PL:   /* Primary LE */
-    case ASI_SL:   /* Secondary LE */
     case ASI_PNFL: /* Primary no-fault LE */
     case ASI_SNFL: /* Secondary no-fault LE */
         switch (size) {
@@ -1187,11 +1057,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         case 8:
             ret = bswap64(ret);
             break;
-        default:
-            break;
         }
-    default:
-        break;
     }
 
     /* Convert to signed number */
@@ -1206,12 +1072,10 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         case 4:
             ret = (int32_t) ret;
             break;
-        default:
-            break;
         }
     }
 #ifdef DEBUG_ASI
-    dump_asi("read ", last_addr, asi, size, ret);
+    dump_asi("read", addr, asi, size, ret);
 #endif
     return ret;
 }
@@ -1227,54 +1091,14 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
         cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
     do_check_align(env, addr, size - 1, GETPC());
-    addr = asi_address_mask(env, asi, addr);
-
-    /* Convert to little endian */
-    switch (asi) {
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        switch (size) {
-        case 2:
-            val = bswap16(val);
-            break;
-        case 4:
-            val = bswap32(val);
-            break;
-        case 8:
-            val = bswap64(val);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
 
     switch (asi) {
     case ASI_P:  /* Primary */
     case ASI_PL: /* Primary LE */
-        {
-            switch (size) {
-            case 1:
-                cpu_stb_data(env, addr, val);
-                break;
-            case 2:
-                cpu_stw_data(env, addr, val);
-                break;
-            case 4:
-                cpu_stl_data(env, addr, val);
-                break;
-            case 8:
-            default:
-                cpu_stq_data(env, addr, val);
-                break;
-            }
-        }
-        break;
     case ASI_S:  /* Secondary */
     case ASI_SL: /* Secondary LE */
-        /* XXX */
-        return;
+        /* These are always handled inline.  */
+        g_assert_not_reached();
 
     case ASI_PNF:  /* Primary no-fault, RO */
     case ASI_SNF:  /* Secondary no-fault, RO */
@@ -1282,7 +1106,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_SNFL: /* Secondary no-fault LE, RO */
     default:
         cpu_raise_exception_ra(env, TT_DATA_ACCESS, GETPC());
-        return;
     }
 }
 
@@ -1311,170 +1134,88 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
-    /* process nonfaulting loads first */
-    if ((asi & 0xf6) == 0x82) {
-        int mmu_idx;
-
-        /* secondary space access has lowest asi bit equal to 1 */
-        if (env->pstate & PS_PRIV) {
-            mmu_idx = (asi & 1) ? MMU_KERNEL_SECONDARY_IDX : MMU_KERNEL_IDX;
-        } else {
-            mmu_idx = (asi & 1) ? MMU_USER_SECONDARY_IDX : MMU_USER_IDX;
-        }
+    switch (asi) {
+    case ASI_PNF:
+    case ASI_PNFL:
+    case ASI_SNF:
+    case ASI_SNFL:
+        {
+            TCGMemOpIdx oi;
+            int idx = (env->pstate & PS_PRIV
+                       ? (asi & 1 ? MMU_KERNEL_SECONDARY_IDX : MMU_KERNEL_IDX)
+                       : (asi & 1 ? MMU_USER_SECONDARY_IDX : MMU_USER_IDX));
 
-        if (cpu_get_phys_page_nofault(env, addr, mmu_idx) == -1ULL) {
+            if (cpu_get_phys_page_nofault(env, addr, idx) == -1ULL) {
 #ifdef DEBUG_ASI
-            dump_asi("read ", last_addr, asi, size, ret);
+                dump_asi("read ", last_addr, asi, size, ret);
 #endif
-            /* env->exception_index is set in get_physical_address_data(). */
-            cpu_raise_exception_ra(env, cs->exception_index, GETPC());
-        }
-
-        /* convert nonfaulting load ASIs to normal load ASIs */
-        asi &= ~0x02;
-    }
-
-    switch (asi) {
-    case ASI_AIUP:  /* As if user primary */
-    case ASI_AIUS:  /* As if user secondary */
-    case ASI_AIUPL: /* As if user primary LE */
-    case ASI_AIUSL: /* As if user secondary LE */
-    case ASI_P:  /* Primary */
-    case ASI_S:  /* Secondary */
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if (cpu_hypervisor_mode(env)) {
-                switch (size) {
-                case 1:
-                    ret = cpu_ldub_hypv(env, addr);
-                    break;
-                case 2:
-                    ret = cpu_lduw_hypv(env, addr);
-                    break;
-                case 4:
-                    ret = cpu_ldl_hypv(env, addr);
-                    break;
-                default:
-                case 8:
-                    ret = cpu_ldq_hypv(env, addr);
-                    break;
-                }
-            } else {
-                /* secondary space access has lowest asi bit equal to 1 */
-                if (asi & 1) {
-                    switch (size) {
-                    case 1:
-                        ret = cpu_ldub_kernel_secondary(env, addr);
-                        break;
-                    case 2:
-                        ret = cpu_lduw_kernel_secondary(env, addr);
-                        break;
-                    case 4:
-                        ret = cpu_ldl_kernel_secondary(env, addr);
-                        break;
-                    default:
-                    case 8:
-                        ret = cpu_ldq_kernel_secondary(env, addr);
-                        break;
-                    }
-                } else {
-                    switch (size) {
-                    case 1:
-                        ret = cpu_ldub_kernel(env, addr);
-                        break;
-                    case 2:
-                        ret = cpu_lduw_kernel(env, addr);
-                        break;
-                    case 4:
-                        ret = cpu_ldl_kernel(env, addr);
-                        break;
-                    default:
-                    case 8:
-                        ret = cpu_ldq_kernel(env, addr);
-                        break;
-                    }
-                }
-            }
-        } else {
-            /* secondary space access has lowest asi bit equal to 1 */
-            if (asi & 1) {
-                switch (size) {
-                case 1:
-                    ret = cpu_ldub_user_secondary(env, addr);
-                    break;
-                case 2:
-                    ret = cpu_lduw_user_secondary(env, addr);
-                    break;
-                case 4:
-                    ret = cpu_ldl_user_secondary(env, addr);
-                    break;
-                default:
-                case 8:
-                    ret = cpu_ldq_user_secondary(env, addr);
-                    break;
-                }
-            } else {
-                switch (size) {
-                case 1:
-                    ret = cpu_ldub_user(env, addr);
-                    break;
-                case 2:
-                    ret = cpu_lduw_user(env, addr);
-                    break;
-                case 4:
-                    ret = cpu_ldl_user(env, addr);
-                    break;
-                default:
-                case 8:
-                    ret = cpu_ldq_user(env, addr);
-                    break;
-                }
+                /* exception_index is set in get_physical_address_data. */
+                cpu_raise_exception_ra(env, cs->exception_index, GETPC());
             }
-        }
-        break;
-    case ASI_REAL:        /* Bypass */
-    case ASI_REAL_IO:   /* Bypass, non-cacheable */
-    case ASI_REAL_L:      /* Bypass LE */
-    case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-        {
+            oi = make_memop_idx(memop, idx);
             switch (size) {
             case 1:
-                ret = ldub_phys(cs->as, addr);
+                ret = helper_ret_ldub_mmu(env, addr, oi, GETPC());
                 break;
             case 2:
-                ret = lduw_phys(cs->as, addr);
+                if (asi & 8) {
+                    ret = helper_le_lduw_mmu(env, addr, oi, GETPC());
+                } else {
+                    ret = helper_be_lduw_mmu(env, addr, oi, GETPC());
+                }
                 break;
             case 4:
-                ret = ldl_phys(cs->as, addr);
+                if (asi & 8) {
+                    ret = helper_le_ldul_mmu(env, addr, oi, GETPC());
+                } else {
+                    ret = helper_be_ldul_mmu(env, addr, oi, GETPC());
+                }
                 break;
-            default:
             case 8:
-                ret = ldq_phys(cs->as, addr);
+                if (asi & 8) {
+                    ret = helper_le_ldq_mmu(env, addr, oi, GETPC());
+                } else {
+                    ret = helper_be_ldq_mmu(env, addr, oi, GETPC());
+                }
                 break;
+            default:
+                g_assert_not_reached();
             }
-            break;
         }
+        break;
+
+    case ASI_AIUP:  /* As if user primary */
+    case ASI_AIUS:  /* As if user secondary */
+    case ASI_AIUPL: /* As if user primary LE */
+    case ASI_AIUSL: /* As if user secondary LE */
+    case ASI_P:  /* Primary */
+    case ASI_S:  /* Secondary */
+    case ASI_PL: /* Primary LE */
+    case ASI_SL: /* Secondary LE */
+    case ASI_REAL:      /* Bypass */
+    case ASI_REAL_IO:   /* Bypass, non-cacheable */
+    case ASI_REAL_L:    /* Bypass LE */
+    case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
     case ASI_N:  /* Nucleus */
     case ASI_NL: /* Nucleus Little Endian (LE) */
-        {
-            switch (size) {
-            case 1:
-                ret = cpu_ldub_nucleus(env, addr);
-                break;
-            case 2:
-                ret = cpu_lduw_nucleus(env, addr);
-                break;
-            case 4:
-                ret = cpu_ldl_nucleus(env, addr);
-                break;
-            default:
-            case 8:
-                ret = cpu_ldq_nucleus(env, addr);
-                break;
-            }
-            break;
-        }
+    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
+    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
+    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
+    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
+    case ASI_TWINX_REAL:   /* Real address, twinx */
+    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
+    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
+    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+    case ASI_TWINX_N:  /* Nucleus, twinx */
+    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
+    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
+    case ASI_TWINX_P:  /* Primary, twinx */
+    case ASI_TWINX_PL: /* Primary, twinx, LE */
+    case ASI_TWINX_S:  /* Secondary, twinx */
+    case ASI_TWINX_SL: /* Secondary, twinx, LE */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
+
     case ASI_UPA_CONFIG: /* UPA config */
         /* XXX */
         break;
@@ -1602,51 +1343,6 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         cpu_unassigned_access(cs, addr, false, false, 1, size);
         ret = 0;
         break;
-
-    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
-    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
-    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
-    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
-    case ASI_TWINX_REAL:   /* Real address, twinx */
-    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-    case ASI_TWINX_N:  /* Nucleus, twinx */
-    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-    case ASI_TWINX_S:  /* Secondary, twinx */
-    case ASI_TWINX_SL: /* Secondary, twinx, LE */
-        /* These are all 128-bit atomic; only ldda (now ldtxa) allowed */
-        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
-        return 0;
-    }
-
-    /* Convert from little endian */
-    switch (asi) {
-    case ASI_NL: /* Nucleus Little Endian (LE) */
-    case ASI_AIUPL: /* As if user primary LE */
-    case ASI_AIUSL: /* As if user secondary LE */
-    case ASI_REAL_L:      /* Bypass LE */
-    case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        switch(size) {
-        case 2:
-            ret = bswap16(ret);
-            break;
-        case 4:
-            ret = bswap32(ret);
-            break;
-        case 8:
-            ret = bswap64(ret);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
     }
 
     /* Convert to signed number */
@@ -1694,32 +1390,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
-    /* Convert to little endian */
-    switch (asi) {
-    case ASI_NL: /* Nucleus Little Endian (LE) */
-    case ASI_AIUPL: /* As if user primary LE */
-    case ASI_AIUSL: /* As if user secondary LE */
-    case ASI_REAL_L: /* Bypass LE */
-    case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        switch (size) {
-        case 2:
-            val = bswap16(val);
-            break;
-        case 4:
-            val = bswap32(val);
-            break;
-        case 8:
-            val = bswap64(val);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     switch (asi) {
     case ASI_AIUP:  /* As if user primary */
     case ASI_AIUS:  /* As if user secondary */
@@ -1729,138 +1399,29 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_S:  /* Secondary */
     case ASI_PL: /* Primary LE */
     case ASI_SL: /* Secondary LE */
-        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if (cpu_hypervisor_mode(env)) {
-                switch (size) {
-                case 1:
-                    cpu_stb_hypv(env, addr, val);
-                    break;
-                case 2:
-                    cpu_stw_hypv(env, addr, val);
-                    break;
-                case 4:
-                    cpu_stl_hypv(env, addr, val);
-                    break;
-                case 8:
-                default:
-                    cpu_stq_hypv(env, addr, val);
-                    break;
-                }
-            } else {
-                /* secondary space access has lowest asi bit equal to 1 */
-                if (asi & 1) {
-                    switch (size) {
-                    case 1:
-                        cpu_stb_kernel_secondary(env, addr, val);
-                        break;
-                    case 2:
-                        cpu_stw_kernel_secondary(env, addr, val);
-                        break;
-                    case 4:
-                        cpu_stl_kernel_secondary(env, addr, val);
-                        break;
-                    case 8:
-                    default:
-                        cpu_stq_kernel_secondary(env, addr, val);
-                        break;
-                    }
-                } else {
-                    switch (size) {
-                    case 1:
-                        cpu_stb_kernel(env, addr, val);
-                        break;
-                    case 2:
-                        cpu_stw_kernel(env, addr, val);
-                        break;
-                    case 4:
-                        cpu_stl_kernel(env, addr, val);
-                        break;
-                    case 8:
-                    default:
-                        cpu_stq_kernel(env, addr, val);
-                        break;
-                    }
-                }
-            }
-        } else {
-            /* secondary space access has lowest asi bit equal to 1 */
-            if (asi & 1) {
-                switch (size) {
-                case 1:
-                    cpu_stb_user_secondary(env, addr, val);
-                    break;
-                case 2:
-                    cpu_stw_user_secondary(env, addr, val);
-                    break;
-                case 4:
-                    cpu_stl_user_secondary(env, addr, val);
-                    break;
-                case 8:
-                default:
-                    cpu_stq_user_secondary(env, addr, val);
-                    break;
-                }
-            } else {
-                switch (size) {
-                case 1:
-                    cpu_stb_user(env, addr, val);
-                    break;
-                case 2:
-                    cpu_stw_user(env, addr, val);
-                    break;
-                case 4:
-                    cpu_stl_user(env, addr, val);
-                    break;
-                case 8:
-                default:
-                    cpu_stq_user(env, addr, val);
-                    break;
-                }
-            }
-        }
-        break;
     case ASI_REAL:      /* Bypass */
     case ASI_REAL_IO:   /* Bypass, non-cacheable */
     case ASI_REAL_L:    /* Bypass LE */
     case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-        {
-            switch (size) {
-            case 1:
-                stb_phys(cs->as, addr, val);
-                break;
-            case 2:
-                stw_phys(cs->as, addr, val);
-                break;
-            case 4:
-                stl_phys(cs->as, addr, val);
-                break;
-            case 8:
-            default:
-                stq_phys(cs->as, addr, val);
-                break;
-            }
-        }
-        return;
     case ASI_N:  /* Nucleus */
     case ASI_NL: /* Nucleus Little Endian (LE) */
-        {
-            switch (size) {
-            case 1:
-                cpu_stb_nucleus(env, addr, val);
-                break;
-            case 2:
-                cpu_stw_nucleus(env, addr, val);
-                break;
-            case 4:
-                cpu_stl_nucleus(env, addr, val);
-                break;
-            default:
-            case 8:
-                cpu_stq_nucleus(env, addr, val);
-                break;
-            }
-            break;
-        }
+    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
+    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
+    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
+    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
+    case ASI_TWINX_REAL:   /* Real address, twinx */
+    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
+    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
+    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+    case ASI_TWINX_N:  /* Nucleus, twinx */
+    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
+    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
+    case ASI_TWINX_P:  /* Primary, twinx */
+    case ASI_TWINX_PL: /* Primary, twinx, LE */
+    case ASI_TWINX_S:  /* Secondary, twinx */
+    case ASI_TWINX_SL: /* Secondary, twinx, LE */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
 
     case ASI_UPA_CONFIG: /* UPA config */
         /* XXX */
@@ -2006,24 +1567,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_INTR_RECEIVE: /* Interrupt data receive */
         env->ivec_status = val & 0x20;
         return;
-    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
-    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
-    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
-    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
-    case ASI_TWINX_REAL:   /* Real address, twinx */
-    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-    case ASI_TWINX_N:  /* Nucleus, twinx */
-    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-    case ASI_TWINX_S:  /* Secondary, twinx */
-    case ASI_TWINX_SL: /* Secondary, twinx, LE */
-        /* Only stda allowed */
-        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
-        return;
     case ASI_DCACHE_DATA: /* D-cache data */
     case ASI_DCACHE_TAG: /* D-cache tag access */
     case ASI_ESTATE_ERROR_EN: /* E-cache error enable */
@@ -2056,100 +1599,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     }
 }
 #endif /* CONFIG_USER_ONLY */
-
-/* 128-bit LDDA; result returned in QT0.  */
-void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
-{
-    uint64_t h, l;
-
-    if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || (cpu_has_hypervisor(env)
-            && asi >= 0x30 && asi < 0x80
-            && !(env->hpstate & HS_PRIV))) {
-        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
-    }
-
-    addr = asi_address_mask(env, asi, addr);
-
-    switch (asi) {
-#if !defined(CONFIG_USER_ONLY)
-    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
-    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-        do_check_align(env, addr, 0xf, GETPC());
-        h = cpu_ldq_user(env, addr);
-        l = cpu_ldq_user(env, addr + 8);
-        break;
-    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
-    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-        do_check_align(env, addr, 0xf, GETPC());
-        h = cpu_ldq_user_secondary(env, addr);
-        l = cpu_ldq_user_secondary(env, addr + 8);
-        break;
-    case ASI_TWINX_REAL:   /* Real address, twinx */
-    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-        do_check_align(env, addr, 0xf, GETPC());
-        {
-            CPUState *cs = CPU(sparc_env_get_cpu(env));
-            h = ldq_phys(cs->as, addr);
-            l = ldq_phys(cs->as, addr + 8);
-        }
-        break;
-    case ASI_NUCLEUS_QUAD_LDD:
-    case ASI_NUCLEUS_QUAD_LDD_L:
-    case ASI_TWINX_N:  /* Nucleus, twinx */
-    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-        do_check_align(env, addr, 0xf, GETPC());
-        h = cpu_ldq_nucleus(env, addr);
-        l = cpu_ldq_nucleus(env, addr + 8);
-        break;
-    case ASI_TWINX_S: /* Secondary, twinx */
-    case ASI_TWINX_SL: /* Secondary, twinx, LE */
-        if (!cpu_hypervisor_mode(env)) {
-            do_check_align(env, addr, 0xf, GETPC());
-            if (env->pstate & PS_PRIV) {
-                h = cpu_ldq_kernel_secondary(env, addr);
-                l = cpu_ldq_kernel_secondary(env, addr + 8);
-            } else {
-                h = cpu_ldq_user_secondary(env, addr);
-                l = cpu_ldq_user_secondary(env, addr + 8);
-            }
-            break;
-        }
-        /* fallthru */
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-        do_check_align(env, addr, 0xf, GETPC());
-        h = cpu_ldq_data(env, addr);
-        l = cpu_ldq_data(env, addr + 8);
-        break;
-#else
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-    case ASI_TWINX_S:  /* Primary, twinx */
-    case ASI_TWINX_SL: /* Primary, twinx, LE */
-        /* ??? Should be available, but we need to implement
-           an atomic 128-bit load.  */
-        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
-#endif
-    default:
-        /* Non-twinx asi, so this is the legacy ldda insn, which
-           performs two word sized operations.  */
-        /* ??? The UA2011 manual recommends emulating this with
-           a single 64-bit load.  However, LE asis *are* treated
-           as two 32-bit loads individually byte swapped.  */
-        do_check_align(env, addr, 7, GETPC());
-        QT0.high = (uint32_t)helper_ld_asi(env, addr, asi, MO_UL);
-        QT0.low = (uint32_t)helper_ld_asi(env, addr + 4, asi, MO_UL);
-        return;
-    }
-
-    if (asi & 8) {
-        h = bswap64(h);
-        l = bswap64(l);
-    }
-    QT0.high = h;
-    QT0.low = l;
-}
 #endif /* TARGET_SPARC64 */
 
 void helper_ldqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 7950458a13..ecdc7e1d8a 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2652,15 +2652,27 @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd)
         break;
 
     default:
+        /* ??? In theory we've handled all of the ASIs that are valid
+           for ldda, and this should raise DAE_invalid_asi.  However,
+           real hardware allows others.  This can be seen with e.g.
+           FreeBSD 10.3 wrt ASI_IC_TAG.  */
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
+            TCGv_i32 r_mop = tcg_const_i32(da.memop);
+            TCGv_i64 tmp = tcg_temp_new_i64();
 
             save_state(dc);
-            gen_helper_ldda_asi(cpu_env, addr, r_asi);
+            gen_helper_ld_asi(tmp, cpu_env, addr, r_asi, r_mop);
             tcg_temp_free_i32(r_asi);
+            tcg_temp_free_i32(r_mop);
 
-            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUSPARCState, qt0.high));
-            tcg_gen_ld_i64(lo, cpu_env, offsetof(CPUSPARCState, qt0.low));
+            /* See above.  */
+            if ((da.memop & MO_BSWAP) == MO_TE) {
+                tcg_gen_extr32_i64(lo, hi, tmp);
+            } else {
+                tcg_gen_extr32_i64(hi, lo, tmp);
+            }
+            tcg_temp_free_i64(tmp);
         }
         break;
     }
@@ -2705,15 +2717,21 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
         break;
 
     default:
+        /* ??? In theory we've handled all of the ASIs that are valid
+           for stda, and this should raise DAE_invalid_asi.  */
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
-            TCGv_i32 r_mop = tcg_const_i32(MO_Q);
-            TCGv_i64 t64;
+            TCGv_i32 r_mop = tcg_const_i32(da.memop);
+            TCGv_i64 t64 = tcg_temp_new_i64();
 
-            save_state(dc);
+            /* See above.  */
+            if ((da.memop & MO_BSWAP) == MO_TE) {
+                tcg_gen_concat32_i64(t64, lo, hi);
+            } else {
+                tcg_gen_concat32_i64(t64, hi, lo);
+            }
 
-            t64 = tcg_temp_new_i64();
-            tcg_gen_concat_tl_i64(t64, lo, hi);
+            save_state(dc);
             gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_mop);
             tcg_temp_free_i32(r_mop);
             tcg_temp_free_i32(r_asi);
-- 
cgit v1.2.3


From f939ffe5a022a8798824e2720ed5a14186fca6b6 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 14 Jul 2016 11:47:08 -0700
Subject: target-sparc: Implement ldqf and stqf inline

At the same time, fix a problem with stqf_asi, when
a write might access two pages.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/helper.h      |  2 -
 target-sparc/ldst_helper.c | 97 +++++-----------------------------------------
 target-sparc/translate.c   | 79 ++++++++++++++++++++++++++-----------
 3 files changed, 67 insertions(+), 111 deletions(-)

diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index b7bff84efa..0cf1bfb73a 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -38,8 +38,6 @@ DEF_HELPER_3(tsubcctv, tl, env, tl, tl)
 DEF_HELPER_FLAGS_3(sdivx, TCG_CALL_NO_WG, s64, env, s64, s64)
 DEF_HELPER_FLAGS_3(udivx, TCG_CALL_NO_WG, i64, env, i64, i64)
 #endif
-DEF_HELPER_FLAGS_3(ldqf, TCG_CALL_NO_WG, void, env, tl, int)
-DEF_HELPER_FLAGS_3(stqf, TCG_CALL_NO_WG, void, env, tl, int)
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
 DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32)
 DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 725bb49e9d..de7d53ae20 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -254,18 +254,6 @@ static void replace_tlb_1bit_lru(SparcTLBEntry *tlb,
 
 #endif
 
-#if defined(TARGET_SPARC64) || defined(CONFIG_USER_ONLY)
-static inline target_ulong address_mask(CPUSPARCState *env1, target_ulong addr)
-{
-#ifdef TARGET_SPARC64
-    if (AM_CHECK(env1)) {
-        addr &= 0xffffffffULL;
-    }
-#endif
-    return addr;
-}
-#endif
-
 #ifdef TARGET_SPARC64
 /* returns true if access using this ASI is to have address translated by MMU
    otherwise access is to raw physical address */
@@ -290,14 +278,21 @@ static inline int is_translating_asi(int asi)
     }
 }
 
+static inline target_ulong address_mask(CPUSPARCState *env1, target_ulong addr)
+{
+    if (AM_CHECK(env1)) {
+        addr &= 0xffffffffULL;
+    }
+    return addr;
+}
+
 static inline target_ulong asi_address_mask(CPUSPARCState *env,
                                             int asi, target_ulong addr)
 {
     if (is_translating_asi(asi)) {
-        return address_mask(env, addr);
-    } else {
-        return addr;
+        addr = address_mask(env, addr);
     }
+    return addr;
 }
 #endif
 
@@ -1601,78 +1596,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
 #endif /* CONFIG_USER_ONLY */
 #endif /* TARGET_SPARC64 */
 
-void helper_ldqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
-{
-    /* XXX add 128 bit load */
-    CPU_QuadU u;
-
-    do_check_align(env, addr, 7, GETPC());
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        u.ll.upper = cpu_ldq_user(env, addr);
-        u.ll.lower = cpu_ldq_user(env, addr + 8);
-        QT0 = u.q;
-        break;
-    case MMU_KERNEL_IDX:
-        u.ll.upper = cpu_ldq_kernel(env, addr);
-        u.ll.lower = cpu_ldq_kernel(env, addr + 8);
-        QT0 = u.q;
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        u.ll.upper = cpu_ldq_hypv(env, addr);
-        u.ll.lower = cpu_ldq_hypv(env, addr + 8);
-        QT0 = u.q;
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_ldqf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    u.ll.upper = cpu_ldq_data(env, address_mask(env, addr));
-    u.ll.lower = cpu_ldq_data(env, address_mask(env, addr + 8));
-    QT0 = u.q;
-#endif
-}
-
-void helper_stqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
-{
-    /* XXX add 128 bit store */
-    CPU_QuadU u;
-
-    do_check_align(env, addr, 7, GETPC());
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        u.q = QT0;
-        cpu_stq_user(env, addr, u.ll.upper);
-        cpu_stq_user(env, addr + 8, u.ll.lower);
-        break;
-    case MMU_KERNEL_IDX:
-        u.q = QT0;
-        cpu_stq_kernel(env, addr, u.ll.upper);
-        cpu_stq_kernel(env, addr + 8, u.ll.lower);
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        u.q = QT0;
-        cpu_stq_hypv(env, addr, u.ll.upper);
-        cpu_stq_hypv(env, addr + 8, u.ll.lower);
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_stqf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    u.q = QT0;
-    cpu_stq_data(env, address_mask(env, addr), u.ll.upper);
-    cpu_stq_data(env, address_mask(env, addr + 8), u.ll.lower);
-#endif
-}
-
 #if !defined(CONFIG_USER_ONLY)
 #ifndef TARGET_SPARC64
 void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index ecdc7e1d8a..a869d7d6dc 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -242,7 +242,29 @@ static void gen_op_store_QT0_fpr(unsigned int dst)
                    offsetof(CPU_QuadU, ll.lower));
 }
 
+static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst,
+                            TCGv_i64 v1, TCGv_i64 v2)
+{
+    dst = QFPREG(dst);
+
+    tcg_gen_mov_i64(cpu_fpr[dst / 2], v1);
+    tcg_gen_mov_i64(cpu_fpr[dst / 2 + 1], v2);
+    gen_update_fprs_dirty(dc, dst);
+}
+
 #ifdef TARGET_SPARC64
+static TCGv_i64 gen_load_fpr_Q0(DisasContext *dc, unsigned int src)
+{
+    src = QFPREG(src);
+    return cpu_fpr[src / 2];
+}
+
+static TCGv_i64 gen_load_fpr_Q1(DisasContext *dc, unsigned int src)
+{
+    src = QFPREG(src);
+    return cpu_fpr[src / 2 + 1];
+}
+
 static void gen_move_Q(DisasContext *dc, unsigned int rd, unsigned int rs)
 {
     rd = QFPREG(rd);
@@ -2557,10 +2579,17 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
             tcg_gen_qemu_st_i32(d32, addr, da.mem_idx, da.memop);
             break;
         case 8:
+            /* ??? Only 4-byte alignment required.  However, it is legal
+               for the cpu to signal the alignment fault, and the OS trap
+               handler is required to fix it up.  */
             tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
             break;
         case 16:
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            /* Only 4-byte alignment required.  See above.  Requiring
+               16-byte alignment here avoids having to probe the second
+               page before performing the first write.  */
+            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_16);
             tcg_gen_addi_tl(addr, addr, 8);
             tcg_gen_qemu_st_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop);
             break;
@@ -5426,17 +5455,16 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_helper_ldfsr(cpu_fsr, cpu_env, cpu_fsr, cpu_dst_32);
                     break;
                 case 0x22:      /* ldqf, load quad fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        CHECK_FPU_FEATURE(dc, FLOAT128);
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_ldqf(cpu_env, cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                        gen_op_store_QT0_fpr(QFPREG(rd));
-                        gen_update_fprs_dirty(dc, QFPREG(rd));
-                    }
+                    CHECK_FPU_FEATURE(dc, FLOAT128);
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_64 = tcg_temp_new_i64();
+                    tcg_gen_qemu_ld64(cpu_src1_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_addi_tl(cpu_addr, cpu_addr, 8);
+                    cpu_src2_64 = tcg_temp_new_i64();
+                    tcg_gen_qemu_ld64(cpu_src2_64, cpu_addr, dc->mem_idx);
+                    gen_store_fpr_Q(dc, rd, cpu_src1_64, cpu_src2_64);
+                    tcg_temp_free_i64(cpu_src1_64);
+                    tcg_temp_free_i64(cpu_src2_64);
                     break;
                 case 0x23:      /* lddf, load double fpreg */
                     gen_address_mask(dc, cpu_addr);
@@ -5537,16 +5565,20 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x26:
 #ifdef TARGET_SPARC64
                     /* V9 stqf, store quad fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        CHECK_FPU_FEATURE(dc, FLOAT128);
-                        gen_op_load_fpr_QT0(QFPREG(rd));
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_stqf(cpu_env, cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                    }
+                    CHECK_FPU_FEATURE(dc, FLOAT128);
+                    gen_address_mask(dc, cpu_addr);
+                    /* ??? While stqf only requires 4-byte alignment, it is
+                       legal for the cpu to signal the unaligned exception.
+                       The OS trap handler is then required to fix it up.
+                       For qemu, this avoids having to probe the second page
+                       before performing the first write.  */
+                    cpu_src1_64 = gen_load_fpr_Q0(dc, rd);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr,
+                                        dc->mem_idx, MO_TEQ | MO_ALIGN_16);
+                    tcg_gen_addi_tl(cpu_addr, cpu_addr, 8);
+                    cpu_src2_64 = gen_load_fpr_Q1(dc, rd);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr,
+                                        dc->mem_idx, MO_TEQ);
                     break;
 #else /* !TARGET_SPARC64 */
                     /* stdfq, store floating point queue */
@@ -5562,6 +5594,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #endif
 #endif
                 case 0x27: /* stdf, store double fpreg */
+                    /* ??? Only 4-byte alignment required.  However, it is
+                       legal for the cpu to signal the alignment fault, and
+                       the OS trap handler is required to fix it up.  */
                     gen_address_mask(dc, cpu_addr);
                     cpu_src1_64 = gen_load_fpr_D(dc, rd);
                     tcg_gen_qemu_st64(cpu_src1_64, cpu_addr, dc->mem_idx);
-- 
cgit v1.2.3


From cb21b4da6cca1bb4e3f5fefb698fb9e4d00c8f66 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Wed, 31 Aug 2016 10:48:03 -0700
Subject: target-sparc: Allow 4-byte alignment on fp mem ops

The cpu is allowed to require stricter alignment on these 8- and 16-byte
operations, and the OS is required to fix up the accesses as necessary,
so the previous code was not wrong.

However, we can easily handle this misalignment for all direct 8-byte
operations and for direct 16-byte loads.

We must retain 16-byte alignment for 16-byte stores, so that we don't have
to probe for writability of a second page before performing the first of
two 8-byte stores.  We also retain 8-byte alignment for no-fault loads,
since they are rare and it's not worth extending the helpers for this.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 86 +++++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 42 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index a869d7d6dc..4920cf28e7 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2459,6 +2459,7 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
 {
     DisasASI da = get_asi(dc, insn, (size == 4 ? MO_TEUL : MO_TEQ));
     TCGv_i32 d32;
+    TCGv_i64 d64;
 
     switch (da.type) {
     case GET_ASI_EXCP:
@@ -2473,12 +2474,17 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
             gen_store_fpr_F(dc, rd, d32);
             break;
         case 8:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
             break;
         case 16:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            d64 = tcg_temp_new_i64();
+            tcg_gen_qemu_ld_i64(d64, addr, da.mem_idx, da.memop | MO_ALIGN_4);
             tcg_gen_addi_tl(addr, addr, 8);
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
+            tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+            tcg_temp_free_i64(d64);
             break;
         default:
             g_assert_not_reached();
@@ -2534,22 +2540,23 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
                but we can just use the integer asi helper for them.  */
             switch (size) {
             case 4:
-                {
-                    TCGv d64 = tcg_temp_new_i64();
-                    gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
-                    d32 = gen_dest_fpr_F(dc);
-                    tcg_gen_extrl_i64_i32(d32, d64);
-                    tcg_temp_free_i64(d64);
-                    gen_store_fpr_F(dc, rd, d32);
-                }
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
+                d32 = gen_dest_fpr_F(dc);
+                tcg_gen_extrl_i64_i32(d32, d64);
+                tcg_temp_free_i64(d64);
+                gen_store_fpr_F(dc, rd, d32);
                 break;
             case 8:
                 gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, r_mop);
                 break;
             case 16:
-                gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, r_mop);
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
                 tcg_gen_addi_tl(addr, addr, 8);
                 gen_helper_ld_asi(cpu_fpr[rd/2+1], cpu_env, addr, r_asi, r_mop);
+                tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+                tcg_temp_free_i64(d64);
                 break;
             default:
                 g_assert_not_reached();
@@ -2579,15 +2586,15 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
             tcg_gen_qemu_st_i32(d32, addr, da.mem_idx, da.memop);
             break;
         case 8:
-            /* ??? Only 4-byte alignment required.  However, it is legal
-               for the cpu to signal the alignment fault, and the OS trap
-               handler is required to fix it up.  */
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
             break;
         case 16:
-            /* Only 4-byte alignment required.  See above.  Requiring
-               16-byte alignment here avoids having to probe the second
-               page before performing the first write.  */
+            /* Only 4-byte alignment required.  However, it is legal for the
+               cpu to signal the alignment fault, and the OS trap handler is
+               required to fix it up.  Requiring 16-byte alignment here avoids
+               having to probe the second page before performing the first
+               write.  */
             tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
                                 da.memop | MO_ALIGN_16);
             tcg_gen_addi_tl(addr, addr, 8);
@@ -5423,18 +5430,15 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
-                TCGv t0;
-
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    t0 = get_temp_tl(dc);
-                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
                     cpu_dst_32 = gen_dest_fpr_F(dc);
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
@@ -5442,26 +5446,28 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_address_mask(dc, cpu_addr);
                     if (rd == 1) {
                         TCGv_i64 t64 = tcg_temp_new_i64();
-                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        tcg_gen_qemu_ld_i64(t64, cpu_addr,
+                                            dc->mem_idx, MO_TEQ);
                         gen_helper_ldxfsr(cpu_fsr, cpu_env, cpu_fsr, t64);
                         tcg_temp_free_i64(t64);
                         break;
                     }
 #endif
                     cpu_dst_32 = get_temp_i32(dc);
-                    t0 = get_temp_tl(dc);
-                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     gen_helper_ldfsr(cpu_fsr, cpu_env, cpu_fsr, cpu_dst_32);
                     break;
                 case 0x22:      /* ldqf, load quad fpreg */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
                     gen_address_mask(dc, cpu_addr);
                     cpu_src1_64 = tcg_temp_new_i64();
-                    tcg_gen_qemu_ld64(cpu_src1_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_src1_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     tcg_gen_addi_tl(cpu_addr, cpu_addr, 8);
                     cpu_src2_64 = tcg_temp_new_i64();
-                    tcg_gen_qemu_ld64(cpu_src2_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_src2_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     gen_store_fpr_Q(dc, rd, cpu_src1_64, cpu_src2_64);
                     tcg_temp_free_i64(cpu_src1_64);
                     tcg_temp_free_i64(cpu_src2_64);
@@ -5469,7 +5475,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x23:      /* lddf, load double fpreg */
                     gen_address_mask(dc, cpu_addr);
                     cpu_dst_64 = gen_dest_fpr_D(dc, rd);
-                    tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_dst_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 default:
@@ -5542,13 +5549,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 }
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
-                    {
-                        TCGv t = get_temp_tl(dc);
-                        gen_address_mask(dc, cpu_addr);
-                        cpu_src1_32 = gen_load_fpr_F(dc, rd);
-                        tcg_gen_ext_i32_tl(t, cpu_src1_32);
-                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                    tcg_gen_qemu_st_i32(cpu_src1_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
                     {
@@ -5594,12 +5598,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 #endif
 #endif
                 case 0x27: /* stdf, store double fpreg */
-                    /* ??? Only 4-byte alignment required.  However, it is
-                       legal for the cpu to signal the alignment fault, and
-                       the OS trap handler is required to fix it up.  */
                     gen_address_mask(dc, cpu_addr);
                     cpu_src1_64 = gen_load_fpr_D(dc, rd);
-                    tcg_gen_qemu_st64(cpu_src1_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     break;
                 default:
                     goto illegal_insn;
-- 
cgit v1.2.3


From 47b2696b975b794c6fa7b9fa8ae4699e749d662c Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 14 Jul 2016 13:21:11 -0700
Subject: target-sparc: Remove MMU_MODE*_SUFFIX

The functions that these generate are no longer used.

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/cpu.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index e94b8f19d6..5fb0ed1aad 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -638,23 +638,15 @@ int cpu_sparc_signal_handler(int host_signum, void *pinfo, void *puc);
 /* MMU modes definitions */
 #if defined (TARGET_SPARC64)
 #define MMU_USER_IDX   0
-#define MMU_MODE0_SUFFIX _user
 #define MMU_USER_SECONDARY_IDX   1
-#define MMU_MODE1_SUFFIX _user_secondary
 #define MMU_KERNEL_IDX 2
-#define MMU_MODE2_SUFFIX _kernel
 #define MMU_KERNEL_SECONDARY_IDX 3
-#define MMU_MODE3_SUFFIX _kernel_secondary
 #define MMU_NUCLEUS_IDX 4
-#define MMU_MODE4_SUFFIX _nucleus
 #define MMU_HYPV_IDX   5
-#define MMU_MODE5_SUFFIX _hypv
 #define MMU_PHYS_IDX   6
 #else
 #define MMU_USER_IDX   0
-#define MMU_MODE0_SUFFIX _user
 #define MMU_KERNEL_IDX 1
-#define MMU_MODE1_SUFFIX _kernel
 #define MMU_PHYS_IDX   2
 #endif
 
-- 
cgit v1.2.3


From da1bcae65288bdd51e0a7203d1e6c9cde1be5b3d Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 12 Sep 2016 19:01:53 -0700
Subject: target-sparc: Use tcg_gen_atomic_xchg_tl

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 4920cf28e7..82f5e67f0f 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2026,27 +2026,16 @@ static inline void gen_ne_fop_QD(DisasContext *dc, int rd, int rs,
 static void gen_swap(DisasContext *dc, TCGv dst, TCGv src,
                      TCGv addr, int mmu_idx, TCGMemOp memop)
 {
-    /* ??? Should be atomic.  */
-    TCGv t0 = tcg_temp_new();
     gen_address_mask(dc, addr);
-    tcg_gen_qemu_ld_tl(t0, addr, mmu_idx, memop);
-    tcg_gen_qemu_st_tl(src, addr, mmu_idx, memop);
-    tcg_gen_mov_tl(dst, t0);
-    tcg_temp_free(t0);
+    tcg_gen_atomic_xchg_tl(dst, addr, src, mmu_idx, memop);
 }
 
 static void gen_ldstub(DisasContext *dc, TCGv dst, TCGv addr, int mmu_idx)
 {
-    /* ??? Should be atomic.  */
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t1 = tcg_const_i32(0xff);
-
+    TCGv m1 = tcg_const_tl(0xff);
     gen_address_mask(dc, addr);
-    tcg_gen_qemu_ld_i32(t0, addr, mmu_idx, MO_UB);
-    tcg_gen_qemu_st_i32(t1, addr, mmu_idx, MO_UB);
-    tcg_gen_extu_i32_tl(dst, t0);
-    tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t1);
+    tcg_gen_atomic_xchg_tl(dst, addr, m1, mmu_idx, MO_UB);
+    tcg_temp_free(m1);
 }
 
 /* asi moves */
-- 
cgit v1.2.3


From 5a7267b6a9e94c264ca77a7ca5a239e70dac81da Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 12 Sep 2016 19:11:18 -0700
Subject: target-sparc: Use tcg_gen_atomic_cmpxchg_tl

Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/translate.c | 31 +++++++------------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 82f5e67f0f..a13b76ebd9 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2391,31 +2391,21 @@ static void gen_swap_asi(DisasContext *dc, TCGv dst, TCGv src,
     }
 }
 
-static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv cmpr,
+static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
                         int insn, int rd)
 {
     DisasASI da = get_asi(dc, insn, MO_TEUL);
-    TCGv cmpv, oldv, tmpv;
+    TCGv oldv;
 
     switch (da.type) {
     case GET_ASI_EXCP:
         return;
     case GET_ASI_DIRECT:
-        cmpv = tcg_temp_new();
         oldv = tcg_temp_new();
-        tmpv = tcg_temp_new();
-        tcg_gen_ext32u_tl(cmpv, cmpr);
-
-        /* ??? Should be atomic.  */
-        tcg_gen_qemu_ld_tl(oldv, addr, da.mem_idx, da.memop);
-        tcg_gen_movcond_tl(TCG_COND_EQ, tmpv, oldv, cmpv,
-                           gen_load_gpr(dc, rd), oldv);
-        tcg_gen_qemu_st_tl(tmpv, addr, da.mem_idx, da.memop);
-
+        tcg_gen_atomic_cmpxchg_tl(oldv, addr, cmpv, gen_load_gpr(dc, rd),
+                                  da.mem_idx, da.memop);
         gen_store_gpr(dc, rd, oldv);
-        tcg_temp_free(cmpv);
         tcg_temp_free(oldv);
-        tcg_temp_free(tmpv);
         break;
     default:
         /* ??? Should be DAE_invalid_asi.  */
@@ -2770,24 +2760,17 @@ static void gen_casx_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
                          int insn, int rd)
 {
     DisasASI da = get_asi(dc, insn, MO_TEQ);
-    TCGv oldv, tmpv;
+    TCGv oldv;
 
     switch (da.type) {
     case GET_ASI_EXCP:
         return;
     case GET_ASI_DIRECT:
         oldv = tcg_temp_new();
-        tmpv = tcg_temp_new();
-
-        /* ??? Should be atomic.  */
-        tcg_gen_qemu_ld_tl(oldv, addr, da.mem_idx, da.memop);
-        tcg_gen_movcond_tl(TCG_COND_EQ, tmpv, oldv, cmpv,
-                           gen_load_gpr(dc, rd), oldv);
-        tcg_gen_qemu_st_tl(tmpv, addr, da.mem_idx, da.memop);
-
+        tcg_gen_atomic_cmpxchg_tl(oldv, addr, cmpv, gen_load_gpr(dc, rd),
+                                  da.mem_idx, da.memop);
         gen_store_gpr(dc, rd, oldv);
         tcg_temp_free(oldv);
-        tcg_temp_free(tmpv);
         break;
     default:
         /* ??? Should be DAE_invalid_asi.  */
-- 
cgit v1.2.3