target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate)

This adds support for [UF]RSQRTE instructions. It utilises the existing NEON helpers with some changes. The changes include an explicit passing of fpstatus (so the correct one is used between arm32 and aarch64), denormilzation, more correct error handling and also proper scaling of the fraction going into the estimate. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1394822294-14837-25-git-send-email-peter.maydell@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Alex Bennée <alex.bennee@linaro.org> 2014-03-17 16:31:53 +0000
committer: Peter Maydell <peter.maydell@linaro.org> 2014-03-17 16:31:53 +0000
commit: c2fb418e35be3eb1f60987174f94c029f7d4dd7d (patch)
tree: 1ed62625c2e5f4dbc921662d9eb29d6523e6391e /target-arm
parent: 5553955eb6ec890f324a2ff6c6cc1365b98b981f (diff)
4 files changed, 140 insertions, 37 deletions
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 535fc8f35e..55077ed1b6 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -4721,12 +4721,12 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
 /* The algorithm that must be used to calculate the estimate
  * is specified by the ARM ARM.
  */
-static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
+static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
 {
     /* These calculations mustn't set any fp exception flags,
      * so we use a local copy of the fp_status.
      */
-    float_status dummy_status = env->vfp.standard_fp_status;
+    float_status dummy_status = *real_fp_status;
     float_status *s = &dummy_status;
     float64 q;
     int64_t q_int;
@@ -4773,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
     return float64_div(int64_to_float64(q_int, s), float64_256, s);
 }
 
-float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
+float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
 {
-    float_status *s = &env->vfp.standard_fp_status;
+    float_status *s = fpstp;
+    float32 f32 = float32_squash_input_denormal(input, s);
+    uint32_t val = float32_val(f32);
+    uint32_t f32_sbit = 0x80000000 & val;
+    int32_t f32_exp = extract32(val, 23, 8);
+    uint32_t f32_frac = extract32(val, 0, 23);
+    uint64_t f64_frac;
+    uint64_t val64;
     int result_exp;
     float64 f64;
-    uint32_t val;
-    uint64_t val64;
 
-    val = float32_val(a);
-
-    if (float32_is_any_nan(a)) {
-        if (float32_is_signaling_nan(a)) {
+    if (float32_is_any_nan(f32)) {
+        float32 nan = f32;
+        if (float32_is_signaling_nan(f32)) {
             float_raise(float_flag_invalid, s);
+            nan = float32_maybe_silence_nan(f32);
         }
-        return float32_default_nan;
-    } else if (float32_is_zero_or_denormal(a)) {
-        if (!float32_is_zero(a)) {
-            float_raise(float_flag_input_denormal, s);
+        if (s->default_nan_mode) {
+            nan =  float32_default_nan;
         }
+        return nan;
+    } else if (float32_is_zero(f32)) {
         float_raise(float_flag_divbyzero, s);
-        return float32_set_sign(float32_infinity, float32_is_neg(a));
-    } else if (float32_is_neg(a)) {
+        return float32_set_sign(float32_infinity, float32_is_neg(f32));
+    } else if (float32_is_neg(f32)) {
         float_raise(float_flag_invalid, s);
         return float32_default_nan;
-    } else if (float32_is_infinity(a)) {
+    } else if (float32_is_infinity(f32)) {
         return float32_zero;
     }
 
-    /* Normalize to a double-precision value between 0.25 and 1.0,
+    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
      * preserving the parity of the exponent.  */
-    if ((val & 0x800000) == 0) {
-        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+
+    f64_frac = ((uint64_t) f32_frac) << 29;
+    if (f32_exp == 0) {
+        while (extract64(f64_frac, 51, 1) == 0) {
+            f64_frac = f64_frac << 1;
+            f32_exp = f32_exp-1;
+        }
+        f64_frac = extract64(f64_frac, 0, 51) << 1;
+    }
+
+    if (extract64(f32_exp, 0, 1) == 0) {
+        f64 = make_float64(((uint64_t) f32_sbit) << 32
                            | (0x3feULL << 52)
-                           | ((uint64_t)(val & 0x7fffff) << 29));
+                           | f64_frac);
     } else {
-        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+        f64 = make_float64(((uint64_t) f32_sbit) << 32
                            | (0x3fdULL << 52)
-                           | ((uint64_t)(val & 0x7fffff) << 29));
+                           | f64_frac);
     }
 
-    result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+    result_exp = (380 - f32_exp) / 2;
 
-    f64 = recip_sqrt_estimate(f64, env);
+    f64 = recip_sqrt_estimate(f64, s);
 
     val64 = float64_val(f64);
 
@@ -4824,6 +4839,69 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
     return make_float32(val);
 }
 
+float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
+{
+    float_status *s = fpstp;
+    float64 f64 = float64_squash_input_denormal(input, s);
+    uint64_t val = float64_val(f64);
+    uint64_t f64_sbit = 0x8000000000000000ULL & val;
+    int64_t f64_exp = extract64(val, 52, 11);
+    uint64_t f64_frac = extract64(val, 0, 52);
+    int64_t result_exp;
+    uint64_t result_frac;
+
+    if (float64_is_any_nan(f64)) {
+        float64 nan = f64;
+        if (float64_is_signaling_nan(f64)) {
+            float_raise(float_flag_invalid, s);
+            nan = float64_maybe_silence_nan(f64);
+        }
+        if (s->default_nan_mode) {
+            nan =  float64_default_nan;
+        }
+        return nan;
+    } else if (float64_is_zero(f64)) {
+        float_raise(float_flag_divbyzero, s);
+        return float64_set_sign(float64_infinity, float64_is_neg(f64));
+    } else if (float64_is_neg(f64)) {
+        float_raise(float_flag_invalid, s);
+        return float64_default_nan;
+    } else if (float64_is_infinity(f64)) {
+        return float64_zero;
+    }
+
+    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+     * preserving the parity of the exponent.  */
+
+    if (f64_exp == 0) {
+        while (extract64(f64_frac, 51, 1) == 0) {
+            f64_frac = f64_frac << 1;
+            f64_exp = f64_exp - 1;
+        }
+        f64_frac = extract64(f64_frac, 0, 51) << 1;
+    }
+
+    if (extract64(f64_exp, 0, 1) == 0) {
+        f64 = make_float64(f64_sbit
+                           | (0x3feULL << 52)
+                           | f64_frac);
+    } else {
+        f64 = make_float64(f64_sbit
+                           | (0x3fdULL << 52)
+                           | f64_frac);
+    }
+
+    result_exp = (3068 - f64_exp) / 2;
+
+    f64 = recip_sqrt_estimate(f64, s);
+
+    result_frac = extract64(float64_val(f64), 0, 52);
+
+    return make_float64(f64_sbit |
+                        ((result_exp & 0x7ff) << 52) |
+                        result_frac);
+}
+
 uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
 {
     float_status *s = fpstp;
@@ -4841,8 +4919,9 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
     return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
 }
 
-uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
+uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
 {
+    float_status *fpst = fpstp;
     float64 f64;
 
     if ((a & 0xc0000000) == 0) {
@@ -4857,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
                            | ((uint64_t)(a & 0x3fffffff) << 22));
     }
 
-    f64 = recip_sqrt_estimate(f64, env);
+    f64 = recip_sqrt_estimate(f64, fpst);
 
     return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
 }
diff --git a/target-arm/helper.h b/target-arm/helper.h
index f96a82415a..a3d6f32b06 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -169,9 +169,10 @@ DEF_HELPER_3(recps_f32, f32, f32, f32, env)
 DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
 DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_2(rsqrte_f32, f32, f32, env)
+DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
 DEF_HELPER_2(recpe_u32, i32, i32, ptr)
-DEF_HELPER_2(rsqrte_u32, i32, i32, env)
+DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
 DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 235f880589..befffac2e3 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7146,6 +7146,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
             case 0x3f: /* FRECPX */
                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
                 break;
+            case 0x7d: /* FRSQRTE */
+                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -7181,6 +7184,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
             case 0x3f: /* FRECPX */
                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
                 break;
+            case 0x7d: /* FRSQRTE */
+                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -7378,6 +7384,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         }
         case 0x3d: /* FRECPE */
         case 0x3f: /* FRECPX */
+        case 0x7d: /* FRSQRTE */
             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
             return;
         case 0x1a: /* FCVTNS */
@@ -7404,9 +7411,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
             }
             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
             return;
-        case 0x7d: /* FRSQRTE */
-            unsupported_encoding(s, insn);
-            return;
         default:
             unallocated_encoding(s);
             return;
@@ -9255,6 +9259,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             }
             /* fall through */
         case 0x3d: /* FRECPE */
+        case 0x7d: /* FRSQRTE */
+            if (size == 3 && !is_q) {
+                unallocated_encoding(s);
+                return;
+            }
             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
             return;
         case 0x56: /* FCVTXN, FCVTXN2 */
@@ -9297,9 +9306,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             }
             break;
         case 0x7c: /* URSQRTE */
-        case 0x7d: /* FRSQRTE */
-            unsupported_encoding(s, insn);
-            return;
+            if (size == 3) {
+                unallocated_encoding(s);
+                return;
+            }
+            need_fpstatus = true;
+            break;
         default:
             unallocated_encoding(s);
             return;
@@ -9432,6 +9444,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 case 0x59: /* FRINTX */
                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
                     break;
+                case 0x7c: /* URSQRTE */
+                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
+                    break;
                 default:
                     g_assert_not_reached();
                 }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 3771953e02..56e3b4bf7f 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6689,8 +6689,12 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                             break;
                         }
                         case NEON_2RM_VRSQRTE:
-                            gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                             break;
+                        }
                         case NEON_2RM_VRECPE_F:
                         {
                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
@@ -6699,8 +6703,12 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                             break;
                         }
                         case NEON_2RM_VRSQRTE_F:
-                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                             break;
+                        }
                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
                             gen_vfp_sito(0, 1);
                             break;
author	Alex Bennée <alex.bennee@linaro.org>	2014-03-17 16:31:53 +0000
committer	Peter Maydell <peter.maydell@linaro.org>	2014-03-17 16:31:53 +0000
commit	c2fb418e35be3eb1f60987174f94c029f7d4dd7d (patch)
tree	1ed62625c2e5f4dbc921662d9eb29d6523e6391e /target-arm
parent	5553955eb6ec890f324a2ff6c6cc1365b98b981f (diff)