7 files changed, 257 insertions, 186 deletions
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 814d13e767..e479a4dbd7 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -73,7 +73,7 @@ static const char *ext3_feature_name[] = {
 };
 
 static const char *kvm_feature_name[] = {
-    "kvmclock", "kvm_nopiodelay", "kvm_mmu", NULL, "kvm_asyncpf", NULL, NULL, NULL,
+    "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", "kvm_asyncpf", NULL, NULL, NULL,
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -182,20 +182,22 @@ static int altcmp(const char *s, const char *e, const char *altstr)
 }
 
 /* search featureset for flag *[s..e), if found set corresponding bit in
- * *pval and return success, otherwise return zero
+ * *pval and return true, otherwise return false
  */
-static int lookup_feature(uint32_t *pval, const char *s, const char *e,
-    const char **featureset)
+static bool lookup_feature(uint32_t *pval, const char *s, const char *e,
+                           const char **featureset)
 {
     uint32_t mask;
     const char **ppc;
+    bool found = false;
 
-    for (mask = 1, ppc = featureset; mask; mask <<= 1, ++ppc)
+    for (mask = 1, ppc = featureset; mask; mask <<= 1, ++ppc) {
         if (*ppc && !altcmp(s, e, *ppc)) {
             *pval |= mask;
-            break;
+            found = true;
         }
-    return (mask ? 1 : 0);
+    }
+    return found;
 }
 
 static void add_flagname_to_bitmaps(const char *flagname, uint32_t *features,
diff --git a/target-i386/exec.h b/target-i386/exec.h
index 6f9f709d8a..ee36a7181a 100644
--- a/target-i386/exec.h
+++ b/target-i386/exec.h
@@ -110,11 +110,24 @@ static inline void svm_check_intercept(uint32_t type)
 #define float64_to_floatx float64_to_floatx80
 #define floatx_to_float32 floatx80_to_float32
 #define floatx_to_float64 floatx80_to_float64
+#define floatx_add floatx80_add
+#define floatx_div floatx80_div
+#define floatx_mul floatx80_mul
+#define floatx_sub floatx80_sub
+#define floatx_sqrt floatx80_sqrt
 #define floatx_abs floatx80_abs
 #define floatx_chs floatx80_chs
+#define floatx_scalbn floatx80_scalbn
 #define floatx_round_to_int floatx80_round_to_int
 #define floatx_compare floatx80_compare
 #define floatx_compare_quiet floatx80_compare_quiet
+#define floatx_is_any_nan floatx80_is_any_nan
+#define floatx_is_neg floatx80_is_neg
+#define floatx_is_zero floatx80_is_zero
+#define floatx_zero floatx80_zero
+#define floatx_one floatx80_one
+#define floatx_ln2 floatx80_ln2
+#define floatx_pi floatx80_pi
 #else
 #define floatx_to_int32 float64_to_int32
 #define floatx_to_int64 float64_to_int64
@@ -126,11 +139,24 @@ static inline void svm_check_intercept(uint32_t type)
 #define float64_to_floatx(x, e) (x)
 #define floatx_to_float32 float64_to_float32
 #define floatx_to_float64(x, e) (x)
+#define floatx_add float64_add
+#define floatx_div float64_div
+#define floatx_mul float64_mul
+#define floatx_sub float64_sub
+#define floatx_sqrt float64_sqrt
 #define floatx_abs float64_abs
 #define floatx_chs float64_chs
+#define floatx_scalbn float64_scalbn
 #define floatx_round_to_int float64_round_to_int
 #define floatx_compare float64_compare
 #define floatx_compare_quiet float64_compare_quiet
+#define floatx_is_any_nan float64_is_any_nan
+#define floatx_is_neg float64_is_neg
+#define floatx_is_zero float64_is_zero
+#define floatx_zero float64_zero
+#define floatx_one float64_one
+#define floatx_ln2 float64_ln2
+#define floatx_pi float64_pi
 #endif
 
 #define RC_MASK         0xc00
@@ -144,13 +170,7 @@ static inline void svm_check_intercept(uint32_t type)
 #ifdef USE_X86LDOUBLE
 
 /* only for x86 */
-typedef union {
-    long double d;
-    struct {
-        unsigned long long lower;
-        unsigned short upper;
-    } l;
-} CPU86_LDoubleU;
+typedef CPU_LDoubleU CPU86_LDoubleU;
 
 /* the following deal with x86 long double-precision numbers */
 #define MAXEXPD 0x7fff
@@ -162,24 +182,7 @@ typedef union {
 
 #else
 
-/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
-typedef union {
-    double d;
-#if !defined(HOST_WORDS_BIGENDIAN) && !defined(__arm__)
-    struct {
-        uint32_t lower;
-        int32_t upper;
-    } l;
-#else
-    struct {
-        int32_t upper;
-        uint32_t lower;
-    } l;
-#endif
-#ifndef __arm__
-    int64_t ll;
-#endif
-} CPU86_LDoubleU;
+typedef CPU_DoubleU CPU86_LDoubleU;
 
 /* the following deal with IEEE double-precision numbers */
 #define MAXEXPD 0x7ff
diff --git a/target-i386/helper.c b/target-i386/helper.c
index d15fca591e..89df997436 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -404,16 +404,10 @@ void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
                     env->mxcsr);
         for(i=0;i<8;i++) {
 #if defined(USE_X86LDOUBLE)
-            union {
-                long double d;
-                struct {
-                    uint64_t lower;
-                    uint16_t upper;
-                } l;
-            } tmp;
-            tmp.d = env->fpregs[i].d;
+            CPU_LDoubleU u;
+            u.d = env->fpregs[i].d;
             cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
-                        i, tmp.l.lower, tmp.l.upper);
+                        i, u.l.lower, u.l.upper);
 #else
             cpu_fprintf(f, "FPR%d=%016" PRIx64,
                         i, env->fpregs[i].mmx.q);
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index a13599db81..faedc6c254 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -92,6 +92,35 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
     return cpuid;
 }
 
+#ifdef CONFIG_KVM_PARA
+struct kvm_para_features {
+    int cap;
+    int feature;
+} para_features[] = {
+    { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
+    { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
+    { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
+#ifdef KVM_CAP_ASYNC_PF
+    { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
+#endif
+    { -1, -1 }
+};
+
+static int get_para_features(CPUState *env)
+{
+    int i, features = 0;
+
+    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
+        if (kvm_check_extension(env->kvm_state, para_features[i].cap)) {
+            features |= (1 << para_features[i].feature);
+        }
+    }
+
+    return features;
+}
+#endif
+
+
 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
                                       uint32_t index, int reg)
 {
@@ -99,6 +128,9 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
     int i, max;
     uint32_t ret = 0;
     uint32_t cpuid_1_edx;
+#ifdef CONFIG_KVM_PARA
+    int has_kvm_features = 0;
+#endif
 
     max = 1;
     while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
@@ -108,6 +140,11 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
     for (i = 0; i < cpuid->nent; ++i) {
         if (cpuid->entries[i].function == function &&
             cpuid->entries[i].index == index) {
+#ifdef CONFIG_KVM_PARA
+            if (cpuid->entries[i].function == KVM_CPUID_FEATURES) {
+                has_kvm_features = 1;
+            }
+#endif
             switch (reg) {
             case R_EAX:
                 ret = cpuid->entries[i].eax;
@@ -140,38 +177,15 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function,
 
     qemu_free(cpuid);
 
-    return ret;
-}
-
 #ifdef CONFIG_KVM_PARA
-struct kvm_para_features {
-    int cap;
-    int feature;
-} para_features[] = {
-    { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
-    { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
-    { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
-#ifdef KVM_CAP_ASYNC_PF
-    { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
-#endif
-    { -1, -1 }
-};
-
-static int get_para_features(CPUState *env)
-{
-    int i, features = 0;
-
-    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
-        if (kvm_check_extension(env->kvm_state, para_features[i].cap)) {
-            features |= (1 << para_features[i].feature);
-        }
+    /* fallback for older kernels */
+    if (!has_kvm_features && (function == KVM_CPUID_FEATURES)) {
+        ret = get_para_features(env);
     }
-#ifdef KVM_CAP_ASYNC_PF
-    has_msr_async_pf_en = features & (1 << KVM_FEATURE_ASYNC_PF);
 #endif
-    return features;
+
+    return ret;
 }
-#endif /* CONFIG_KVM_PARA */
 
 typedef struct HWPoisonPage {
     ram_addr_t ram_addr;
@@ -397,7 +411,13 @@ int kvm_arch_init_vcpu(CPUState *env)
     c = &cpuid_data.entries[cpuid_i++];
     memset(c, 0, sizeof(*c));
     c->function = KVM_CPUID_FEATURES;
-    c->eax = env->cpuid_kvm_features & get_para_features(env);
+    c->eax = env->cpuid_kvm_features & kvm_arch_get_supported_cpuid(env,
+                                                KVM_CPUID_FEATURES, 0, R_EAX);
+
+#ifdef KVM_CAP_ASYNC_PF
+    has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
+#endif
+
 #endif
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
@@ -552,7 +572,7 @@ static int kvm_get_supported_msrs(KVMState *s)
             }
         }
 
-        free(kvm_msr_list);
+        qemu_free(kvm_msr_list);
     }
 
     return ret;
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 43fbd0c778..3c539f37cf 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <math.h>
 #include "exec.h"
 #include "exec-all.h"
 #include "host-utils.h"
@@ -94,15 +95,25 @@ static const uint8_t rclb_table[32] = {
     6, 7, 8, 0, 1, 2, 3, 4,
 };
 
+#if defined(CONFIG_SOFTFLOAT)
+# define floatx_lg2 make_floatx80( 0x3ffd, 0x9a209a84fbcff799LL )
+# define floatx_l2e make_floatx80( 0x3fff, 0xb8aa3b295c17f0bcLL )
+# define floatx_l2t make_floatx80( 0x4000, 0xd49a784bcd1b8afeLL )
+#else
+# define floatx_lg2 (0.30102999566398119523L)
+# define floatx_l2e (1.44269504088896340739L)
+# define floatx_l2t (3.32192809488736234781L)
+#endif
+
 static const CPU86_LDouble f15rk[7] =
 {
-    0.00000000000000000000L,
-    1.00000000000000000000L,
-    3.14159265358979323851L,  /*pi*/
-    0.30102999566398119523L,  /*lg2*/
-    0.69314718055994530943L,  /*ln2*/
-    1.44269504088896340739L,  /*l2e*/
-    3.32192809488736234781L,  /*l2t*/
+    floatx_zero,
+    floatx_one,
+    floatx_pi,
+    floatx_lg2,
+    floatx_ln2,
+    floatx_l2e,
+    floatx_l2t,
 };
 
 /* broken thread support */
@@ -3431,6 +3442,28 @@ void helper_verw(target_ulong selector1)
 
 /* x87 FPU helpers */
 
+static inline double CPU86_LDouble_to_double(CPU86_LDouble a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.f64 = floatx_to_float64(a, &env->fp_status);
+    return u.d;
+}
+
+static inline CPU86_LDouble double_to_CPU86_LDouble(double a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.d = a;
+    return float64_to_floatx(u.f64, &env->fp_status);
+}
+
 static void fpu_set_exception(int mask)
 {
     env->fpus |= mask;
@@ -3440,9 +3473,10 @@ static void fpu_set_exception(int mask)
 
 static inline CPU86_LDouble helper_fdiv(CPU86_LDouble a, CPU86_LDouble b)
 {
-    if (b == 0.0)
+    if (floatx_is_zero(b)) {
         fpu_set_exception(FPUS_ZE);
-    return a / b;
+    }
+    return floatx_div(a, b, &env->fp_status);
 }
 
 static void fpu_raise_exception(void)
@@ -3711,22 +3745,22 @@ void helper_fucomi_ST0_FT0(void)
 
 void helper_fadd_ST0_FT0(void)
 {
-    ST0 += FT0;
+    ST0 = floatx_add(ST0, FT0, &env->fp_status);
 }
 
 void helper_fmul_ST0_FT0(void)
 {
-    ST0 *= FT0;
+    ST0 = floatx_mul(ST0, FT0, &env->fp_status);
 }
 
 void helper_fsub_ST0_FT0(void)
 {
-    ST0 -= FT0;
+    ST0 = floatx_sub(ST0, FT0, &env->fp_status);
 }
 
 void helper_fsubr_ST0_FT0(void)
 {
-    ST0 = FT0 - ST0;
+    ST0 = floatx_sub(FT0, ST0, &env->fp_status);
 }
 
 void helper_fdiv_ST0_FT0(void)
@@ -3743,24 +3777,22 @@ void helper_fdivr_ST0_FT0(void)
 
 void helper_fadd_STN_ST0(int st_index)
 {
-    ST(st_index) += ST0;
+    ST(st_index) = floatx_add(ST(st_index), ST0, &env->fp_status);
 }
 
 void helper_fmul_STN_ST0(int st_index)
 {
-    ST(st_index) *= ST0;
+    ST(st_index) = floatx_mul(ST(st_index), ST0, &env->fp_status);
 }
 
 void helper_fsub_STN_ST0(int st_index)
 {
-    ST(st_index) -= ST0;
+    ST(st_index) = floatx_sub(ST(st_index), ST0, &env->fp_status);
 }
 
 void helper_fsubr_STN_ST0(int st_index)
 {
-    CPU86_LDouble *p;
-    p = &ST(st_index);
-    *p = ST0 - *p;
+    ST(st_index) = floatx_sub(ST0, ST(st_index), &env->fp_status);
 }
 
 void helper_fdiv_STN_ST0(int st_index)
@@ -3922,9 +3954,10 @@ void helper_fbld_ST0(target_ulong ptr)
         v = ldub(ptr + i);
         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
     }
-    tmp = val;
-    if (ldub(ptr + 9) & 0x80)
-        tmp = -tmp;
+    tmp = int64_to_floatx(val, &env->fp_status);
+    if (ldub(ptr + 9) & 0x80) {
+        floatx_chs(tmp);
+    }
     fpush();
     ST0 = tmp;
 }
@@ -3959,17 +3992,19 @@ void helper_fbst_ST0(target_ulong ptr)
 
 void helper_f2xm1(void)
 {
-    ST0 = pow(2.0,ST0) - 1.0;
+    double val = CPU86_LDouble_to_double(ST0);
+    val = pow(2.0, val) - 1.0;
+    ST0 = double_to_CPU86_LDouble(val);
 }
 
 void helper_fyl2x(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = CPU86_LDouble_to_double(ST0);
 
-    fptemp = ST0;
     if (fptemp>0.0){
-        fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
-        ST1 *= fptemp;
+        fptemp = log(fptemp)/log(2.0);    /* log2(ST) */
+        fptemp *= CPU86_LDouble_to_double(ST1);
+        ST1 = double_to_CPU86_LDouble(fptemp);
         fpop();
     } else {
         env->fpus &= (~0x4700);
@@ -3979,15 +4014,15 @@ void helper_fyl2x(void)
 
 void helper_fptan(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = CPU86_LDouble_to_double(ST0);
 
-    fptemp = ST0;
     if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = tan(fptemp);
+        fptemp = tan(fptemp);
+        ST0 = double_to_CPU86_LDouble(fptemp);
         fpush();
-        ST0 = 1.0;
+        ST0 = floatx_one;
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg| < 2**52 only */
     }
@@ -3995,45 +4030,57 @@ void helper_fptan(void)
 
 void helper_fpatan(void)
 {
-    CPU86_LDouble fptemp, fpsrcop;
+    double fptemp, fpsrcop;
 
-    fpsrcop = ST1;
-    fptemp = ST0;
-    ST1 = atan2(fpsrcop,fptemp);
+    fpsrcop = CPU86_LDouble_to_double(ST1);
+    fptemp = CPU86_LDouble_to_double(ST0);
+    ST1 = double_to_CPU86_LDouble(atan2(fpsrcop, fptemp));
     fpop();
 }
 
 void helper_fxtract(void)
 {
     CPU86_LDoubleU temp;
-    unsigned int expdif;
 
     temp.d = ST0;
-    expdif = EXPD(temp) - EXPBIAS;
-    /*DP exponent bias*/
-    ST0 = expdif;
-    fpush();
-    BIASEXPONENT(temp);
-    ST0 = temp.d;
+
+    if (floatx_is_zero(ST0)) {
+        /* Easy way to generate -inf and raising division by 0 exception */
+        ST0 = floatx_div(floatx_chs(floatx_one), floatx_zero, &env->fp_status);
+        fpush();
+        ST0 = temp.d;
+    } else {
+        int expdif;
+
+        expdif = EXPD(temp) - EXPBIAS;
+        /*DP exponent bias*/
+        ST0 = int32_to_floatx(expdif, &env->fp_status);
+        fpush();
+        BIASEXPONENT(temp);
+        ST0 = temp.d;
+    }
 }
 
 void helper_fprem1(void)
 {
-    CPU86_LDouble dblq, fpsrcop, fptemp;
+    double st0, st1, dblq, fpsrcop, fptemp;
     CPU86_LDoubleU fpsrcop1, fptemp1;
     int expdif;
     signed long long int q;
 
-    if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
-        ST0 = 0.0 / 0.0; /* NaN */
+    st0 = CPU86_LDouble_to_double(ST0);
+    st1 = CPU86_LDouble_to_double(ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+        ST0 = double_to_CPU86_LDouble(0.0 / 0.0); /* NaN */
         env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
         return;
     }
 
-    fpsrcop = ST0;
-    fptemp = ST1;
-    fpsrcop1.d = fpsrcop;
-    fptemp1.d = fptemp;
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 
     if (expdif < 0) {
@@ -4047,7 +4094,7 @@ void helper_fprem1(void)
         dblq = fpsrcop / fptemp;
         /* round dblq towards nearest integer */
         dblq = rint(dblq);
-        ST0 = fpsrcop - fptemp * dblq;
+        st0 = fpsrcop - fptemp * dblq;
 
         /* convert dblq to q by truncating towards zero */
         if (dblq < 0.0)
@@ -4063,31 +4110,35 @@ void helper_fprem1(void)
     } else {
         env->fpus |= 0x400;  /* C2 <-- 1 */
         fptemp = pow(2.0, expdif - 50);
-        fpsrcop = (ST0 / ST1) / fptemp;
+        fpsrcop = (st0 / st1) / fptemp;
         /* fpsrcop = integer obtained by chopping */
         fpsrcop = (fpsrcop < 0.0) ?
                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        ST0 -= (ST1 * fpsrcop * fptemp);
+        st0 -= (st1 * fpsrcop * fptemp);
     }
+    ST0 = double_to_CPU86_LDouble(st0);
 }
 
 void helper_fprem(void)
 {
-    CPU86_LDouble dblq, fpsrcop, fptemp;
+    double st0, st1, dblq, fpsrcop, fptemp;
     CPU86_LDoubleU fpsrcop1, fptemp1;
     int expdif;
     signed long long int q;
 
-    if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
-       ST0 = 0.0 / 0.0; /* NaN */
+    st0 = CPU86_LDouble_to_double(ST0);
+    st1 = CPU86_LDouble_to_double(ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+       ST0 = double_to_CPU86_LDouble(0.0 / 0.0); /* NaN */
        env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
        return;
     }
 
-    fpsrcop = (CPU86_LDouble)ST0;
-    fptemp = (CPU86_LDouble)ST1;
-    fpsrcop1.d = fpsrcop;
-    fptemp1.d = fptemp;
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 
     if (expdif < 0) {
@@ -4101,7 +4152,7 @@ void helper_fprem(void)
         dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
         /* round dblq towards zero */
         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
-        ST0 = fpsrcop/*ST0*/ - fptemp * dblq;
+        st0 = fpsrcop/*ST0*/ - fptemp * dblq;
 
         /* convert dblq to q by truncating towards zero */
         if (dblq < 0.0)
@@ -4118,22 +4169,23 @@ void helper_fprem(void)
         int N = 32 + (expdif % 32); /* as per AMD docs */
         env->fpus |= 0x400;  /* C2 <-- 1 */
         fptemp = pow(2.0, (double)(expdif - N));
-        fpsrcop = (ST0 / ST1) / fptemp;
+        fpsrcop = (st0 / st1) / fptemp;
         /* fpsrcop = integer obtained by chopping */
         fpsrcop = (fpsrcop < 0.0) ?
                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        ST0 -= (ST1 * fpsrcop * fptemp);
+        st0 -= (st1 * fpsrcop * fptemp);
     }
+    ST0 = double_to_CPU86_LDouble(st0);
 }
 
 void helper_fyl2xp1(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = CPU86_LDouble_to_double(ST0);
 
-    fptemp = ST0;
     if ((fptemp+1.0)>0.0) {
         fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
-        ST1 *= fptemp;
+        fptemp *= CPU86_LDouble_to_double(ST1);
+        ST1 = double_to_CPU86_LDouble(fptemp);
         fpop();
     } else {
         env->fpus &= (~0x4700);
@@ -4143,27 +4195,23 @@ void helper_fyl2xp1(void)
 
 void helper_fsqrt(void)
 {
-    CPU86_LDouble fptemp;
-
-    fptemp = ST0;
-    if (fptemp<0.0) {
+    if (floatx_is_neg(ST0)) {
         env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
         env->fpus |= 0x400;
     }
-    ST0 = sqrt(fptemp);
+    ST0 = floatx_sqrt(ST0, &env->fp_status);
 }
 
 void helper_fsincos(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = CPU86_LDouble_to_double(ST0);
 
-    fptemp = ST0;
     if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = sin(fptemp);
+        ST0 = double_to_CPU86_LDouble(sin(fptemp));
         fpush();
-        ST0 = cos(fptemp);
+        ST0 = double_to_CPU86_LDouble(cos(fptemp));
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg| < 2**63 only */
     }
@@ -4176,18 +4224,22 @@ void helper_frndint(void)
 
 void helper_fscale(void)
 {
-    ST0 = ldexp (ST0, (int)(ST1));
+    if (floatx_is_any_nan(ST1)) {
+        ST0 = ST1;
+    } else {
+        int n = floatx_to_int32_round_to_zero(ST1, &env->fp_status);
+        ST0 = floatx_scalbn(ST0, n, &env->fp_status);
+    }
 }
 
 void helper_fsin(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = CPU86_LDouble_to_double(ST0);
 
-    fptemp = ST0;
     if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = sin(fptemp);
+        ST0 = double_to_CPU86_LDouble(sin(fptemp));
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg| < 2**53 only */
     }
@@ -4195,13 +4247,12 @@ void helper_fsin(void)
 
 void helper_fcos(void)
 {
-    CPU86_LDouble fptemp;
+    double fptemp = CPU86_LDouble_to_double(ST0);
 
-    fptemp = ST0;
     if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
         env->fpus |= 0x400;
     } else {
-        ST0 = cos(fptemp);
+        ST0 = double_to_CPU86_LDouble(cos(fptemp));
         env->fpus &= (~0x400);  /* C2 <-- 0 */
         /* the above code is for  |arg5 < 2**63 only */
     }
@@ -4783,16 +4834,6 @@ void helper_boundl(target_ulong a0, int v)
     }
 }
 
-static float approx_rsqrt(float a)
-{
-    return 1.0 / sqrt(a);
-}
-
-static float approx_rcp(float a)
-{
-    return 1.0 / a;
-}
-
 #if !defined(CONFIG_USER_ONLY)
 
 #define MMUSUFFIX _mmu
@@ -4837,7 +4878,7 @@ void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
             if (tb) {
                 /* the PC is inside the translated code. It means that we have
                    a virtual CPU fault */
-                cpu_restore_state(tb, env, pc, NULL);
+                cpu_restore_state(tb, env, pc);
             }
         }
         raise_exception_err(env->exception_index, env->error_code);
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 3232abd965..703be99cd2 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -778,28 +778,38 @@ int64_t helper_cvttsd2sq(XMMReg *s)
 
 void helper_rsqrtps(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
-    d->XMM_S(1) = approx_rsqrt(s->XMM_S(1));
-    d->XMM_S(2) = approx_rsqrt(s->XMM_S(2));
-    d->XMM_S(3) = approx_rsqrt(s->XMM_S(3));
+    d->XMM_S(0) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(0), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(1) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(1), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(2) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(2), &env->sse_status),
+                              &env->sse_status);
+    d->XMM_S(3) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(3), &env->sse_status),
+                              &env->sse_status);
 }
 
 void helper_rsqrtss(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rsqrt(s->XMM_S(0));
+    d->XMM_S(0) = float32_div(float32_one,
+                              float32_sqrt(s->XMM_S(0), &env->sse_status),
+                              &env->sse_status);
 }
 
 void helper_rcpps(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
-    d->XMM_S(1) = approx_rcp(s->XMM_S(1));
-    d->XMM_S(2) = approx_rcp(s->XMM_S(2));
-    d->XMM_S(3) = approx_rcp(s->XMM_S(3));
+    d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status);
+    d->XMM_S(1) = float32_div(float32_one, s->XMM_S(1), &env->sse_status);
+    d->XMM_S(2) = float32_div(float32_one, s->XMM_S(2), &env->sse_status);
+    d->XMM_S(3) = float32_div(float32_one, s->XMM_S(3), &env->sse_status);
 }
 
 void helper_rcpss(XMMReg *d, XMMReg *s)
 {
-    d->XMM_S(0) = approx_rcp(s->XMM_S(0));
+    d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status);
 }
 
 static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
@@ -921,14 +931,14 @@ void helper_ ## name ## sd (Reg *d, Reg *s)\
     d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0));\
 }
 
-#define FPU_CMPEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? -1 : 0
+#define FPU_CMPEQ(size, a, b) float ## size ## _eq_quiet(a, b, &env->sse_status) ? -1 : 0
 #define FPU_CMPLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0
 #define FPU_CMPLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? -1 : 0
-#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? - 1 : 0
-#define FPU_CMPNEQ(size, a, b) float ## size ## _eq(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPUNORD(size, a, b) float ## size ## _unordered_quiet(a, b, &env->sse_status) ? - 1 : 0
+#define FPU_CMPNEQ(size, a, b) float ## size ## _eq_quiet(a, b, &env->sse_status) ? 0 : -1
 #define FPU_CMPNLT(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1
 #define FPU_CMPNLE(size, a, b) float ## size ## _le(a, b, &env->sse_status) ? 0 : -1
-#define FPU_CMPORD(size, a, b) float ## size ## _unordered(a, b, &env->sse_status) ? 0 : -1
+#define FPU_CMPORD(size, a, b) float ## size ## _unordered_quiet(a, b, &env->sse_status) ? 0 : -1
 
 SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
 SSE_HELPER_CMP(cmplt, FPU_CMPLT)
@@ -1216,8 +1226,8 @@ void helper_pfadd(MMXReg *d, MMXReg *s)
 
 void helper_pfcmpeq(MMXReg *d, MMXReg *s)
 {
-    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
-    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(0) = float32_eq_quiet(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
+    d->MMX_L(1) = float32_eq_quiet(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
 }
 
 void helper_pfcmpge(MMXReg *d, MMXReg *s)
@@ -1272,14 +1282,16 @@ void helper_pfpnacc(MMXReg *d, MMXReg *s)
 
 void helper_pfrcp(MMXReg *d, MMXReg *s)
 {
-    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
+    d->MMX_S(0) = float32_div(float32_one, s->MMX_S(0), &env->mmx_status);
     d->MMX_S(1) = d->MMX_S(0);
 }
 
 void helper_pfrsqrt(MMXReg *d, MMXReg *s)
 {
     d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
-    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
+    d->MMX_S(1) = float32_div(float32_one,
+                              float32_sqrt(d->MMX_S(1), &env->mmx_status),
+                              &env->mmx_status);
     d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
     d->MMX_L(0) = d->MMX_L(1);
 }
diff --git a/target-i386/translate.c b/target-i386/translate.c
index c00845038a..199302e517 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2274,7 +2274,7 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
         gen_jmp_im(eip);
-        tcg_gen_exit_tb((long)tb + tb_num);
+        tcg_gen_exit_tb((tcg_target_long)tb + tb_num);
     } else {
         /* jump to another page: currently not optimized */
         gen_jmp_im(eip);
@@ -7890,8 +7890,7 @@ void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
     gen_intermediate_code_internal(env, tb, 1);
 }
 
-void gen_pc_load(CPUState *env, TranslationBlock *tb,
-                unsigned long searched_pc, int pc_pos, void *puc)
+void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)
 {
     int cc_op;
 #ifdef DEBUG_DISAS
@@ -7903,8 +7902,8 @@ void gen_pc_load(CPUState *env, TranslationBlock *tb,
                 qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
             }
         }
-        qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+                pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
                 (uint32_t)tb->cs_base);
     }
 #endif