diff options
33 files changed, 1424 insertions, 1148 deletions
@@ -4047,11 +4047,14 @@ fi if test "$pie" = "no" ; then textseg_addr= case "$cpu" in - arm | hppa | i386 | m68k | ppc | ppc64 | s390* | sparc | sparc64 | x86_64 | x32) + arm | i386 | ppc* | s390* | sparc* | x86_64 | x32) + # ??? Rationale for choosing this address textseg_addr=0x60000000 ;; mips) - textseg_addr=0x400000 + # A 256M aligned address, high in the address space, with enough + # room for the code_gen_buffer above it before the stack. + textseg_addr=0x60000000 ;; esac if [ -n "$textseg_addr" ]; then diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index f6743659f7..979e532fdf 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -29,7 +29,6 @@ obj-$(CONFIG_NSERIES) += cbus.o obj-$(CONFIG_ECCMEMCTL) += eccmemctl.o obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o obj-$(CONFIG_IMX) += imx_ccm.o -obj-$(CONFIG_LM32) += lm32_sys.o obj-$(CONFIG_MILKYMIST) += milkymist-hpdmc.o obj-$(CONFIG_MILKYMIST) += milkymist-pfpu.o obj-$(CONFIG_MAINSTONE) += mst_fpga.o diff --git a/hw/misc/lm32_sys.c b/hw/misc/lm32_sys.c deleted file mode 100644 index 778eb6e042..0000000000 --- a/hw/misc/lm32_sys.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * QEMU model of the LatticeMico32 system control block. - * - * Copyright (c) 2010 Michael Walle <michael@walle.cc> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -/* - * This model is mainly intended for testing purposes and doesn't fit to any - * real hardware. On the one hand it provides a control register (R_CTRL) on - * the other hand it supports the lm32 tests. - * - * A write to the control register causes a system shutdown. - * Tests first write the pointer to a test name to the test name register - * (R_TESTNAME) and then write a zero to the pass/fail register (R_PASSFAIL) if - * the test is passed or any non-zero value to it if the test is failed. - */ - -#include "hw/hw.h" -#include "hw/sysbus.h" -#include "trace.h" -#include "qemu/log.h" -#include "qemu/error-report.h" -#include "sysemu/sysemu.h" - -enum { - R_CTRL = 0, - R_PASSFAIL, - R_TESTNAME, - R_MAX -}; - -#define MAX_TESTNAME_LEN 32 - -#define TYPE_LM32_SYS "lm32-sys" -#define LM32_SYS(obj) OBJECT_CHECK(LM32SysState, (obj), TYPE_LM32_SYS) - -struct LM32SysState { - SysBusDevice parent_obj; - - MemoryRegion iomem; - uint32_t base; - uint32_t regs[R_MAX]; - uint8_t testname[MAX_TESTNAME_LEN]; -}; -typedef struct LM32SysState LM32SysState; - -static void copy_testname(LM32SysState *s) -{ - cpu_physical_memory_read(s->regs[R_TESTNAME], s->testname, - MAX_TESTNAME_LEN); - s->testname[MAX_TESTNAME_LEN - 1] = '\0'; -} - -static void sys_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size) -{ - LM32SysState *s = opaque; - char *testname; - - trace_lm32_sys_memory_write(addr, value); - - addr >>= 2; - switch (addr) { - case R_CTRL: - qemu_system_shutdown_request(); - break; - case R_PASSFAIL: - s->regs[addr] = value; - testname = (char *)s->testname; - fprintf(stderr, "TC %-*s %s\n", MAX_TESTNAME_LEN, - testname, (value) ? "FAILED" : "OK"); - if (value) { - cpu_dump_state(qemu_get_cpu(0), stderr, fprintf, 0); - } - break; - case R_TESTNAME: - s->regs[addr] = value; - copy_testname(s); - break; - - default: - error_report("lm32_sys: write access to unknown register 0x" - TARGET_FMT_plx, addr << 2); - break; - } -} - -static bool sys_ops_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write) -{ - return is_write && size == 4; -} - -static const MemoryRegionOps sys_ops = { - .write = sys_write, - .valid.accepts = sys_ops_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void sys_reset(DeviceState *d) -{ - LM32SysState *s = LM32_SYS(d); - int i; - - for (i = 0; i < R_MAX; i++) { - s->regs[i] = 0; - } - memset(s->testname, 0, MAX_TESTNAME_LEN); -} - -static int lm32_sys_init(SysBusDevice *dev) -{ - LM32SysState *s = LM32_SYS(dev); - - memory_region_init_io(&s->iomem, OBJECT(dev), &sys_ops , s, - "sys", R_MAX * 4); - sysbus_init_mmio(dev, &s->iomem); - - /* Note: This device is not created in the board initialization, - * instead it has to be added with the -device parameter. Therefore, - * the device maps itself. */ - sysbus_mmio_map(dev, 0, s->base); - - return 0; -} - -static const VMStateDescription vmstate_lm32_sys = { - .name = "lm32-sys", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, LM32SysState, R_MAX), - VMSTATE_BUFFER(testname, LM32SysState), - VMSTATE_END_OF_LIST() - } -}; - -static Property lm32_sys_properties[] = { - DEFINE_PROP_UINT32("base", LM32SysState, base, 0xffff0000), - DEFINE_PROP_END_OF_LIST(), -}; - -static void lm32_sys_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); - - k->init = lm32_sys_init; - dc->reset = sys_reset; - dc->vmsd = &vmstate_lm32_sys; - dc->props = lm32_sys_properties; -} - -static const TypeInfo lm32_sys_info = { - .name = TYPE_LM32_SYS, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(LM32SysState), - .class_init = lm32_sys_class_init, -}; - -static void lm32_sys_register_types(void) -{ - type_register_static(&lm32_sys_info); -} - -type_init(lm32_sys_register_types) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8bc2eb663e..c964ca4f0b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -131,7 +131,7 @@ static inline void tlb_flush(CPUState *cpu, int flush_global) #if defined(__arm__) || defined(_ARCH_PPC) \ || defined(__x86_64__) || defined(__i386__) \ || defined(__sparc__) || defined(__aarch64__) \ - || defined(__s390x__) \ + || defined(__s390x__) || defined(__mips__) \ || defined(CONFIG_TCG_INTERPRETER) #define USE_DIRECT_JUMP #endif @@ -268,7 +268,7 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); #endif } -#elif defined(__sparc__) +#elif defined(__sparc__) || defined(__mips__) void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr); #else #error tb_set_jmp_target1 is missing diff --git a/qapi-schema.json b/qapi-schema.json index 70ed7e3b45..7bc33ea717 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -691,7 +691,7 @@ # Information about current migration process. # # @status: #optional string describing the current migration status. -# As of 0.14.0 this can be 'active', 'completed', 'failed' or +# As of 0.14.0 this can be 'setup', 'active', 'completed', 'failed' or # 'cancelled'. If this field is not returned, no migration process # has been initiated # diff --git a/qemu-options.hx b/qemu-options.hx index c2c0823911..4011d46d62 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3071,7 +3071,8 @@ STEXI Set OpenBIOS nvram @var{variable} to given @var{value} (PPC, SPARC only). ETEXI DEF("semihosting", 0, QEMU_OPTION_semihosting, - "-semihosting semihosting mode\n", QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA) + "-semihosting semihosting mode\n", + QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32) STEXI @item -semihosting @findex -semihosting diff --git a/qmp-commands.hx b/qmp-commands.hx index 2c53c1ba2e..d8aa4edabe 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2940,7 +2940,7 @@ block migration status. The main json-object contains the following: - "status": migration status (json-string) - - Possible values: "active", "completed", "failed", "cancelled" + - Possible values: "setup", "active", "completed", "failed", "cancelled" - "total-time": total amount of ms since migration started. If migration has ended, it returns the total migration time (json-int) diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py index 386f17ef44..7d93d01ed2 100644 --- a/scripts/qapi-commands.py +++ b/scripts/qapi-commands.py @@ -111,7 +111,7 @@ bool has_%(argname)s = false; argname=c_var(argname), argtype=c_type(argtype)) else: ret += mcgen(''' -%(argtype)s %(argname)s; +%(argtype)s %(argname)s = {0}; ''', argname=c_var(argname), argtype=c_type(argtype)) diff --git a/scripts/qapi.py b/scripts/qapi.py index 0265b404dd..86e96089af 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -116,7 +116,7 @@ class QAPISchema: continue try: fobj = open(include_path, 'r') - except IOError as e: + except IOError, e: raise QAPIExprError(expr_info, '%s: %s' % (e.strerror, include)) exprs_include = QAPISchema(fobj, include, self.include_hist, diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 6c6f2b3d46..794dcb9fb7 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -370,8 +370,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) init_cpreg_list(cpu); - cpu_reset(cs); qemu_init_vcpu(cs); + cpu_reset(cs); acc->parent_realize(dev, errp); } diff --git a/target-lm32/Makefile.objs b/target-lm32/Makefile.objs index 40236876c8..c3e1bd6bd6 100644 --- a/target-lm32/Makefile.objs +++ b/target-lm32/Makefile.objs @@ -1,3 +1,4 @@ obj-y += translate.o op_helper.o helper.o cpu.o obj-y += gdbstub.o +obj-y += lm32-semi.o obj-$(CONFIG_SOFTMMU) += machine.o diff --git a/target-lm32/README b/target-lm32/README index a1c2c7eb1e..ba3508a711 100644 --- a/target-lm32/README +++ b/target-lm32/README @@ -16,14 +16,13 @@ This will make serial0 (the lm32_uart) and serial1 (the JTAG UART) available as virtual consoles. -Programmatically terminate the emulator ----------------------------------------- -Originally neither the LatticeMico32 nor its peripherals support a -mechanism to shut down the machine. Emulation aware programs can write to a -to a special register within the system control block to shut down the -virtual machine. For more details see hw/lm32_sys.c. The lm32-evr is the -first BSP which instantiate this model. A (32 bit) write to 0xfff0000 -causes a vm shutdown. +Semihosting +----------- +Semihosting on this target is supported. Some system calls like read, write +and exit are executed on the host if semihosting is enabled. See +target/lm32-semi.c for all supported system calls. Emulation aware programs +can use this mechanism to shut down the virtual machine and print to the +host console. See the tcg tests for an example. Special instructions diff --git a/target-lm32/cpu.h b/target-lm32/cpu.h index 24bde78502..70600aa47a 100644 --- a/target-lm32/cpu.h +++ b/target-lm32/cpu.h @@ -217,6 +217,7 @@ void lm32_breakpoint_remove(CPULM32State *env, int index); void lm32_watchpoint_insert(CPULM32State *env, int index, target_ulong address, lm32_wp_t wp_type); void lm32_watchpoint_remove(CPULM32State *env, int index); +bool lm32_cpu_do_semihosting(CPUState *cs); static inline CPULM32State *cpu_init(const char *cpu_model) { diff --git a/target-lm32/helper.c b/target-lm32/helper.c index 783aa16a45..1bca1961af 100644 --- a/target-lm32/helper.c +++ b/target-lm32/helper.c @@ -1,7 +1,7 @@ /* * LatticeMico32 helper routines. * - * Copyright (c) 2010 Michael Walle <michael@walle.cc> + * Copyright (c) 2010-2014 Michael Walle <michael@walle.cc> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -19,6 +19,7 @@ #include "cpu.h" #include "qemu/host-utils.h" +#include "sysemu/sysemu.h" int lm32_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw, int mmu_idx) @@ -159,11 +160,20 @@ void lm32_cpu_do_interrupt(CPUState *cs) "exception at pc=%x type=%x\n", env->pc, cs->exception_index); switch (cs->exception_index) { + case EXCP_SYSTEMCALL: + if (unlikely(semihosting_enabled)) { + /* do_semicall() returns true if call was handled. Otherwise + * do the normal exception handling. */ + if (lm32_cpu_do_semihosting(cs)) { + env->pc += 4; + break; + } + } + /* fall through */ case EXCP_INSN_BUS_ERROR: case EXCP_DATA_BUS_ERROR: case EXCP_DIVIDE_BY_ZERO: case EXCP_IRQ: - case EXCP_SYSTEMCALL: /* non-debug exceptions */ env->regs[R_EA] = env->pc; env->ie |= (env->ie & IE_IE) ? IE_EIE : 0; diff --git a/target-lm32/lm32-semi.c b/target-lm32/lm32-semi.c new file mode 100644 index 0000000000..fc9d2d1c73 --- /dev/null +++ b/target-lm32/lm32-semi.c @@ -0,0 +1,215 @@ +/* + * Lattice Mico32 semihosting syscall interface + * + * Copyright (c) 2014 Michael Walle <michael@walle.cc> + * + * Based on target-m68k/m68k-semi.c, which is + * Copyright (c) 2005-2007 CodeSourcery. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stddef.h> +#include "cpu.h" +#include "helper.h" +#include "qemu/log.h" +#include "exec/softmmu-semi.h" + +enum { + TARGET_SYS_exit = 1, + TARGET_SYS_open = 2, + TARGET_SYS_close = 3, + TARGET_SYS_read = 4, + TARGET_SYS_write = 5, + TARGET_SYS_lseek = 6, + TARGET_SYS_fstat = 10, + TARGET_SYS_stat = 15, +}; + +enum { + NEWLIB_O_RDONLY = 0x0, + NEWLIB_O_WRONLY = 0x1, + NEWLIB_O_RDWR = 0x2, + NEWLIB_O_APPEND = 0x8, + NEWLIB_O_CREAT = 0x200, + NEWLIB_O_TRUNC = 0x400, + NEWLIB_O_EXCL = 0x800, +}; + +static int translate_openflags(int flags) +{ + int hf; + + if (flags & NEWLIB_O_WRONLY) { + hf = O_WRONLY; + } else if (flags & NEWLIB_O_RDWR) { + hf = O_RDWR; + } else { + hf = O_RDONLY; + } + + if (flags & NEWLIB_O_APPEND) { + hf |= O_APPEND; + } + + if (flags & NEWLIB_O_CREAT) { + hf |= O_CREAT; + } + + if (flags & NEWLIB_O_TRUNC) { + hf |= O_TRUNC; + } + + if (flags & NEWLIB_O_EXCL) { + hf |= O_EXCL; + } + + return hf; +} + +struct newlib_stat { + int16_t newlib_st_dev; /* device */ + uint16_t newlib_st_ino; /* inode */ + uint16_t newlib_st_mode; /* protection */ + uint16_t newlib_st_nlink; /* number of hard links */ + uint16_t newlib_st_uid; /* user ID of owner */ + uint16_t newlib_st_gid; /* group ID of owner */ + int16_t newlib_st_rdev; /* device type (if inode device) */ + int32_t newlib_st_size; /* total size, in bytes */ + int32_t newlib_st_atime; /* time of last access */ + uint32_t newlib_st_spare1; + int32_t newlib_st_mtime; /* time of last modification */ + uint32_t newlib_st_spare2; + int32_t newlib_st_ctime; /* time of last change */ + uint32_t newlib_st_spare3; +} QEMU_PACKED; + +static int translate_stat(CPULM32State *env, target_ulong addr, + struct stat *s) +{ + struct newlib_stat *p; + + p = lock_user(VERIFY_WRITE, addr, sizeof(struct newlib_stat), 0); + if (!p) { + return 0; + } + p->newlib_st_dev = cpu_to_be16(s->st_dev); + p->newlib_st_ino = cpu_to_be16(s->st_ino); + p->newlib_st_mode = cpu_to_be16(s->st_mode); + p->newlib_st_nlink = cpu_to_be16(s->st_nlink); + p->newlib_st_uid = cpu_to_be16(s->st_uid); + p->newlib_st_gid = cpu_to_be16(s->st_gid); + p->newlib_st_rdev = cpu_to_be16(s->st_rdev); + p->newlib_st_size = cpu_to_be32(s->st_size); + p->newlib_st_atime = cpu_to_be32(s->st_atime); + p->newlib_st_mtime = cpu_to_be32(s->st_mtime); + p->newlib_st_ctime = cpu_to_be32(s->st_ctime); + unlock_user(p, addr, sizeof(struct newlib_stat)); + + return 1; +} + +bool lm32_cpu_do_semihosting(CPUState *cs) +{ + LM32CPU *cpu = LM32_CPU(cs); + CPULM32State *env = &cpu->env; + + int ret = -1; + target_ulong nr, arg0, arg1, arg2; + void *p; + struct stat s; + + nr = env->regs[R_R8]; + arg0 = env->regs[R_R1]; + arg1 = env->regs[R_R2]; + arg2 = env->regs[R_R3]; + + switch (nr) { + case TARGET_SYS_exit: + /* void _exit(int rc) */ + exit(arg0); + + case TARGET_SYS_open: + /* int open(const char *pathname, int flags) */ + p = lock_user_string(arg0); + if (!p) { + ret = -1; + } else { + ret = open(p, translate_openflags(arg2)); + unlock_user(p, arg0, 0); + } + break; + + case TARGET_SYS_read: + /* ssize_t read(int fd, const void *buf, size_t count) */ + p = lock_user(VERIFY_WRITE, arg1, arg2, 0); + if (!p) { + ret = -1; + } else { + ret = read(arg0, p, arg2); + unlock_user(p, arg1, arg2); + } + break; + + case TARGET_SYS_write: + /* ssize_t write(int fd, const void *buf, size_t count) */ + p = lock_user(VERIFY_READ, arg1, arg2, 1); + if (!p) { + ret = -1; + } else { + ret = write(arg0, p, arg2); + unlock_user(p, arg1, 0); + } + break; + + case TARGET_SYS_close: + /* int close(int fd) */ + /* don't close stdin/stdout/stderr */ + if (arg0 > 2) { + ret = close(arg0); + } else { + ret = 0; + } + break; + + case TARGET_SYS_lseek: + /* off_t lseek(int fd, off_t offset, int whence */ + ret = lseek(arg0, arg1, arg2); + break; + + case TARGET_SYS_stat: + /* int stat(const char *path, struct stat *buf) */ + p = lock_user_string(arg0); + if (!p) { + ret = -1; + } else { + ret = stat(p, &s); + unlock_user(p, arg0, 0); + if (translate_stat(env, arg1, &s) == 0) { + ret = -1; + } + } + break; + + case TARGET_SYS_fstat: + /* int stat(int fd, struct stat *buf) */ + ret = fstat(arg0, &s); + if (ret == 0) { + if (translate_stat(env, arg1, &s) == 0) { + ret = -1; + } + } + break; + + default: + /* unhandled */ + return false; + } + + env->regs[R_R1] = ret; + return true; +} diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 0ae495c586..8855d5039d 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -24,14 +24,17 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" +#include "tcg-be-ldst.h" -#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN) -# define TCG_NEED_BSWAP 0 +#ifdef HOST_WORDS_BIGENDIAN +# define MIPS_BE 1 #else -# define TCG_NEED_BSWAP 1 +# define MIPS_BE 0 #endif +#define LO_OFF (MIPS_BE * 4) +#define HI_OFF (4 - LO_OFF) + #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", @@ -64,13 +67,17 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "k1", "gp", "sp", - "fp", + "s8", "ra", }; #endif +#define TCG_TMP0 TCG_REG_AT +#define TCG_TMP1 TCG_REG_T9 + /* check if we really need so many registers :P */ static const TCGReg tcg_target_reg_alloc_order[] = { + /* Call saved registers. */ TCG_REG_S0, TCG_REG_S1, TCG_REG_S2, @@ -79,6 +86,10 @@ static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, + TCG_REG_S8, + + /* Call clobbered registers. */ + TCG_REG_T0, TCG_REG_T1, TCG_REG_T2, TCG_REG_T3, @@ -88,12 +99,14 @@ static const TCGReg tcg_target_reg_alloc_order[] = { TCG_REG_T7, TCG_REG_T8, TCG_REG_T9, - TCG_REG_A0, - TCG_REG_A1, - TCG_REG_A2, - TCG_REG_A3, + TCG_REG_V1, TCG_REG_V0, - TCG_REG_V1 + + /* Argument registers, opposite order of allocation. */ + TCG_REG_A3, + TCG_REG_A2, + TCG_REG_A1, + TCG_REG_A0, }; static const TCGReg tcg_target_call_iarg_regs[4] = { @@ -142,6 +155,17 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, reloc_pc16(code_ptr, (tcg_insn_unit *)value); } +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */ +#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */ +#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */ +#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */ + +static inline bool is_p2m1(tcg_target_long val) +{ + return val && ((val + 1) & val) == 0; +} + /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -161,11 +185,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'l': /* qemu_ld input arg constraint */ ct->ct |= TCG_CT_REG; tcg_regset_set(ct->u.regs, 0xffffffff); -#if defined(CONFIG_SOFTMMU) tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); -# if (TARGET_LONG_BITS == 64) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); -# endif +#if defined(CONFIG_SOFTMMU) + if (TARGET_LONG_BITS == 64) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + } #endif break; case 'S': /* qemu_st constraint */ @@ -173,13 +197,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_set(ct->u.regs, 0xffffffff); tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); #if defined(CONFIG_SOFTMMU) -# if (TARGET_LONG_BITS == 32) - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); -# endif - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); -# if TARGET_LONG_BITS == 64 - tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); -# endif + if (TARGET_LONG_BITS == 32) { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); + } else { + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); + } #endif break; case 'I': @@ -188,6 +211,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 'J': ct->ct |= TCG_CT_CONST_S16; break; + case 'K': + ct->ct |= TCG_CT_CONST_P2M1; + break; + case 'N': + ct->ct |= TCG_CT_CONST_N16; + break; case 'Z': /* We are cheating a bit here, using the fact that the register ZERO is also the register number 0. Hence there is no need @@ -208,20 +237,27 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, { int ct; ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; - else if ((ct & TCG_CT_CONST_ZERO) && val == 0) + } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { return 1; - else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) + } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) + } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { return 1; - else - return 0; + } else if ((ct & TCG_CT_CONST_P2M1) + && use_mips32r2_instructions && is_p2m1(val)) { + return 1; + } + return 0; } /* instruction opcodes */ -enum { +typedef enum { + OPC_J = 0x02 << 26, + OPC_JAL = 0x03 << 26, OPC_BEQ = 0x04 << 26, OPC_BNE = 0x05 << 26, OPC_BLEZ = 0x06 << 26, @@ -279,16 +315,17 @@ enum { OPC_MUL = OPC_SPECIAL2 | 0x002, OPC_SPECIAL3 = 0x1f << 26, + OPC_EXT = OPC_SPECIAL3 | 0x000, OPC_INS = OPC_SPECIAL3 | 0x004, OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, -}; +} MIPSInsn; /* * Type reg */ -static inline void tcg_out_opc_reg(TCGContext *s, int opc, +static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, TCGReg rd, TCGReg rs, TCGReg rt) { int32_t inst; @@ -303,7 +340,7 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, /* * Type immediate */ -static inline void tcg_out_opc_imm(TCGContext *s, int opc, +static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs, TCGArg imm) { int32_t inst; @@ -316,9 +353,25 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, } /* + * Type bitfield + */ +static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, + TCGReg rs, int msb, int lsb) +{ + int32_t inst; + + inst = opc; + inst |= (rs & 0x1F) << 21; + inst |= (rt & 0x1F) << 16; + inst |= (msb & 0x1F) << 11; + inst |= (lsb & 0x1F) << 6; + tcg_out32(s, inst); +} + +/* * Type branch */ -static inline void tcg_out_opc_br(TCGContext *s, int opc, +static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs) { /* We pay attention here to not modify the branch target by reading @@ -332,7 +385,7 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, /* * Type sa */ -static inline void tcg_out_opc_sa(TCGContext *s, int opc, +static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, TCGReg rd, TCGReg rt, TCGArg sa) { int32_t inst; @@ -345,6 +398,29 @@ static inline void tcg_out_opc_sa(TCGContext *s, int opc, } +/* + * Type jump. + * Returns true if the branch was in range and the insn was emitted. + */ +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) +{ + uintptr_t dest = (uintptr_t)target; + uintptr_t from = (uintptr_t)s->code_ptr + 4; + int32_t inst; + + /* The pc-region branch happens within the 256MB region of + the delay slot (thus the +4). */ + if ((from ^ dest) & -(1 << 28)) { + return false; + } + assert((dest & 3) == 0); + + inst = opc; + inst |= (dest >> 2) & 0x3ffffff; + tcg_out32(s, inst); + return true; +} + static inline void tcg_out_nop(TCGContext *s) { tcg_out32(s, 0); @@ -367,8 +443,10 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } else if (arg == (uint16_t)arg) { tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); } else { - tcg_out_opc_imm(s, OPC_LUI, reg, 0, arg >> 16); - tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); + if (arg & 0xffff) { + tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); + } } } @@ -378,14 +456,14 @@ static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); } else { /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -396,14 +474,14 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); } else { /* ret and arg can't be register at */ - if (ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -414,22 +492,22 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); } else { /* ret and arg must be different and can't be register at */ - if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) { + if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { tcg_abort(); } tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 24); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, arg, 0xff00); - tcg_out_opc_sa(s, OPC_SLL, TCG_REG_AT, TCG_REG_AT, 8); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); } } @@ -453,16 +531,18 @@ static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) } } -static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg, - TCGReg arg1, TCGArg arg2) +static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, + TCGReg addr, intptr_t ofs) { - if (arg2 == (int16_t) arg2) { - tcg_out_opc_imm(s, opc, arg, arg1, arg2); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, arg2); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, TCG_REG_AT, arg1); - tcg_out_opc_imm(s, opc, arg, TCG_REG_AT, 0); + int16_t lo = ofs; + if (ofs != lo) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); + } + addr = TCG_TMP0; } + tcg_out_opc_imm(s, opc, data, addr, lo); } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, @@ -482,1051 +562,1008 @@ static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) if (val == (int16_t)val) { tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, val); - tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_REG_AT); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); + tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); } } -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * arg_num is where we want to put this argument, and is updated to be ready - * for the next call. arg is the argument itself. Note that arg_num 0..3 is - * real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_CALL_IARG(NAME, ARGPARAM) \ - static inline void NAME(TCGContext *s, int *arg_num, ARGPARAM) \ - { \ - if (*arg_num < 4) { \ - DEFINE_TCG_OUT_CALL_IARG_GET_ARG(tcg_target_call_iarg_regs[*arg_num]); \ - } else { \ - DEFINE_TCG_OUT_CALL_IARG_GET_ARG(TCG_REG_AT); \ - tcg_out_st(s, TCG_TYPE_I32, TCG_REG_AT, TCG_REG_SP, 4 * (*arg_num)); \ - } \ - (*arg_num)++; \ -} -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_opc_imm(s, OPC_ANDI, A, arg, 0xff); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg8, TCGReg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_opc_imm(s, OPC_ANDI, A, arg, 0xffff); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG -#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \ - tcg_out_movi(s, TCG_TYPE_I32, A, arg); -DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg) -#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG - -/* We don't use the macro for this one to avoid an unnecessary reg-reg - move when storing to the stack. */ -static inline void tcg_out_call_iarg_reg32(TCGContext *s, int *arg_num, - TCGReg arg) -{ - if (*arg_num < 4) { - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[*arg_num], arg); - } else { - tcg_out_st(s, TCG_TYPE_I32, arg, TCG_REG_SP, 4 * (*arg_num)); - } - (*arg_num)++; -} - -static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num, - TCGReg arg_low, TCGReg arg_high) -{ - (*arg_num) = (*arg_num + 1) & ~1; - -#if defined(HOST_WORDS_BIGENDIAN) - tcg_out_call_iarg_reg32(s, arg_num, arg_high); - tcg_out_call_iarg_reg32(s, arg_num, arg_low); -#else - tcg_out_call_iarg_reg32(s, arg_num, arg_low); - tcg_out_call_iarg_reg32(s, arg_num, arg_high); -#endif -} +/* Bit 0 set if inversion required; bit 1 set if swapping required. */ +#define MIPS_CMP_INV 1 +#define MIPS_CMP_SWAP 2 + +static const uint8_t mips_cmp_map[16] = { + [TCG_COND_LT] = 0, + [TCG_COND_LTU] = 0, + [TCG_COND_GE] = MIPS_CMP_INV, + [TCG_COND_GEU] = MIPS_CMP_INV, + [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, + [TCG_COND_GT] = MIPS_CMP_SWAP, + [TCG_COND_GTU] = MIPS_CMP_SWAP, +}; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, int label_index) +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) { - TCGLabel *l = &s->labels[label_index]; + MIPSInsn s_opc = OPC_SLTU; + int cmp_map; switch (cond) { case TCG_COND_EQ: - tcg_out_opc_br(s, OPC_BEQ, arg1, arg2); + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; + } + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); break; + case TCG_COND_NE: - tcg_out_opc_br(s, OPC_BNE, arg1, arg2); - break; - case TCG_COND_LT: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BLTZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); + if (arg2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + arg1 = ret; } + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); - break; + + case TCG_COND_LT: case TCG_COND_GE: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BGEZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - } - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - break; case TCG_COND_LE: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BLEZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - } - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO); - break; case TCG_COND_GT: - if (arg2 == 0) { - tcg_out_opc_br(s, OPC_BGTZ, 0, arg1); - } else { - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); - } - break; + s_opc = OPC_SLT; + /* FALLTHRU */ + + case TCG_COND_LTU: + case TCG_COND_GEU: + case TCG_COND_LEU: case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1); - tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO); - break; - default: - tcg_abort(); + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); + if (cmp_map & MIPS_CMP_INV) { + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + } break; - } - if (l->has_value) { - reloc_pc16(s->code_ptr - 1, l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, label_index, 0); - } - tcg_out_nop(s); + + default: + tcg_abort(); + break; + } } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1, - TCGArg arg2, TCGArg arg3, TCGArg arg4, - int label_index) +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, int label_index) { - tcg_insn_unit *label_ptr; + static const MIPSInsn b_zero[16] = { + [TCG_COND_LT] = OPC_BLTZ, + [TCG_COND_GT] = OPC_BGTZ, + [TCG_COND_LE] = OPC_BLEZ, + [TCG_COND_GE] = OPC_BGEZ, + }; + + TCGLabel *l; + MIPSInsn s_opc = OPC_SLTU; + MIPSInsn b_opc; + int cmp_map; - switch(cond) { - case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, arg2, arg4, label_index); - tcg_out_brcond(s, TCG_COND_NE, arg1, arg3, label_index); - return; + switch (cond) { case TCG_COND_EQ: + b_opc = OPC_BEQ; break; - case TCG_COND_LT: - case TCG_COND_LE: - tcg_out_brcond(s, TCG_COND_LT, arg2, arg4, label_index); + case TCG_COND_NE: + b_opc = OPC_BNE; break; + + case TCG_COND_LT: case TCG_COND_GT: + case TCG_COND_LE: case TCG_COND_GE: - tcg_out_brcond(s, TCG_COND_GT, arg2, arg4, label_index); - break; - case TCG_COND_LTU: - case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LTU, arg2, arg4, label_index); - break; - case TCG_COND_GTU: - case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GTU, arg2, arg4, label_index); - break; - default: - tcg_abort(); - } - - label_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, arg2, arg4); - tcg_out_nop(s); + if (arg2 == 0) { + b_opc = b_zero[cond]; + arg2 = arg1; + arg1 = 0; + break; + } + s_opc = OPC_SLT; + /* FALLTHRU */ - switch(cond) { - case TCG_COND_EQ: - tcg_out_brcond(s, TCG_COND_EQ, arg1, arg3, label_index); - break; - case TCG_COND_LT: case TCG_COND_LTU: - tcg_out_brcond(s, TCG_COND_LTU, arg1, arg3, label_index); - break; - case TCG_COND_LE: - case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LEU, arg1, arg3, label_index); - break; - case TCG_COND_GT: case TCG_COND_GTU: - tcg_out_brcond(s, TCG_COND_GTU, arg1, arg3, label_index); - break; - case TCG_COND_GE: + case TCG_COND_LEU: case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GEU, arg1, arg3, label_index); + cmp_map = mips_cmp_map[cond]; + if (cmp_map & MIPS_CMP_SWAP) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); + arg1 = TCG_TMP0; + arg2 = TCG_REG_ZERO; break; + default: tcg_abort(); + break; } - reloc_pc16(label_ptr, s->code_ptr); + tcg_out_opc_br(s, b_opc, arg1, arg2); + l = &s->labels[label_index]; + if (l->has_value) { + reloc_pc16(s->code_ptr - 1, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, label_index, 0); + } + tcg_out_nop(s); } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg c1, TCGArg c2, TCGArg v) +static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, + TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh) { - switch (cond) { - case TCG_COND_EQ: - if (c1 == 0) { - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2); - } else if (c2 == 0) { - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1); + /* Merge highpart comparison into AH. */ + if (bh != 0) { + if (ah != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); + ah = tmp0; } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); + ah = bh; } - break; - case TCG_COND_NE: - if (c1 == 0) { - tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2); - } else if (c2 == 0) { - tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1); + } + /* Merge lowpart comparison into AL. */ + if (bl != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); + al = tmp1; } else { - tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); + al = bl; + } + } + /* Merge high and low part comparisons into AL. */ + if (ah != 0) { + if (al != 0) { + tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); + al = tmp0; + } else { + al = ah; } - break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1); - tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT); - break; - default: - tcg_abort(); - break; } + return al; } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg arg1, TCGArg arg2) +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { + TCGReg tmp0 = TCG_TMP0; + TCGReg tmp1 = ret; + + assert(ret != TCG_TMP0); + if (ret == ah || ret == bh) { + assert(ret != TCG_TMP1); + tmp1 = TCG_TMP1; + } + switch (cond) { case TCG_COND_EQ: - if (arg1 == 0) { - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg2, 1); - } else if (arg2 == 0) { - tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); - } else { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); - } - break; case TCG_COND_NE: - if (arg1 == 0) { - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg2); - } else if (arg2 == 0) { - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); - } else { - tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); - } - break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); + tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + + default: + tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); + tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); + tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); + tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); + tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, int label_index) +{ + TCGCond b_cond = TCG_COND_NE; + TCGReg tmp = TCG_TMP1; + + /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. + With setcond, we emit between 3 and 10 insns and only 1 branch, + which ought to get better branch prediction. */ + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + b_cond = cond; + tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); break; + default: - tcg_abort(); + /* Minimize code size by prefering a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + b_cond = TCG_COND_EQ; + } + tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); break; } + + tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, label_index); } -/* XXX: we implement it at the target level to avoid having to - handle cross basic blocks temporaries */ -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4) +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg c1, TCGReg c2, TCGReg v) { + MIPSInsn m_opc = OPC_MOVN; + switch (cond) { case TCG_COND_EQ: - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_AT, arg2, arg4); - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, arg1, arg3); - tcg_out_opc_reg(s, OPC_AND, ret, TCG_REG_AT, TCG_REG_T0); - return; + m_opc = OPC_MOVZ; + /* FALLTHRU */ case TCG_COND_NE: - tcg_out_setcond(s, TCG_COND_NE, TCG_REG_AT, arg2, arg4); - tcg_out_setcond(s, TCG_COND_NE, TCG_REG_T0, arg1, arg3); - tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_AT, TCG_REG_T0); - return; - case TCG_COND_LT: - case TCG_COND_LE: - tcg_out_setcond(s, TCG_COND_LT, TCG_REG_AT, arg2, arg4); - break; - case TCG_COND_GT: - case TCG_COND_GE: - tcg_out_setcond(s, TCG_COND_GT, TCG_REG_AT, arg2, arg4); - break; - case TCG_COND_LTU: - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LTU, TCG_REG_AT, arg2, arg4); - break; - case TCG_COND_GTU: - case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GTU, TCG_REG_AT, arg2, arg4); + if (c2 != 0) { + tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; + } break; + default: - tcg_abort(); + /* Minimize code size by prefering a compare not requiring INV. */ + if (mips_cmp_map[cond] & MIPS_CMP_INV) { + cond = tcg_invert_cond(cond); + m_opc = OPC_MOVZ; + } + tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); + c1 = TCG_TMP0; break; } - tcg_out_setcond(s, TCG_COND_EQ, TCG_REG_T0, arg2, arg4); + tcg_out_opc_reg(s, m_opc, ret, v, c1); +} - switch(cond) { - case TCG_COND_LT: - case TCG_COND_LTU: - tcg_out_setcond(s, TCG_COND_LTU, ret, arg1, arg3); - break; - case TCG_COND_LE: - case TCG_COND_LEU: - tcg_out_setcond(s, TCG_COND_LEU, ret, arg1, arg3); - break; - case TCG_COND_GT: - case TCG_COND_GTU: - tcg_out_setcond(s, TCG_COND_GTU, ret, arg1, arg3); - break; - case TCG_COND_GE: - case TCG_COND_GEU: - tcg_out_setcond(s, TCG_COND_GEU, ret, arg1, arg3); - break; - default: - tcg_abort(); +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + /* Note that the ABI requires the called function's address to be + loaded into T9, even if a direct branch is in range. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + + /* But do try a direct branch, allowing the cpu better insn prefetch. */ + if (tail) { + if (!tcg_out_opc_jmp(s, OPC_J, arg)) { + tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); + } + } else { + if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { + tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); + } } +} - tcg_out_opc_reg(s, OPC_AND, ret, ret, TCG_REG_T0); - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT); +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); + tcg_out_nop(s); } #if defined(CONFIG_SOFTMMU) -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ -static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, + [MO_BEQ] = helper_be_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ -static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, }; -#endif -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - int opc) +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) { - TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label1_ptr, *label2_ptr; - int arg_num; - int mem_index, s_bits; - int addr_meml; -# if TARGET_LONG_BITS == 64 - tcg_insn_unit *label3_ptr; - TCGReg addr_regh; - int addr_memh; -# endif -#endif - data_regl = *args++; - if (opc == 3) - data_regh = *args++; - else - data_regh = 0; - addr_regl = *args++; -#if defined(CONFIG_SOFTMMU) -# if TARGET_LONG_BITS == 64 - addr_regh = *args++; -# if defined(HOST_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -# else - addr_meml = 0; -# endif - mem_index = *args; - s_bits = opc & 3; -#endif + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); + } else { + tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); + } + return i + 1; +} - if (opc == 3) { -#if defined(HOST_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; -#else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ + TCGReg tmp = TCG_TMP0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ + TCGReg tmp = TCG_TMP0; + if (arg == 0) { + tmp = TCG_REG_ZERO; } else { - data_reg1 = data_regl; - data_reg2 = 0; + if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { + tmp = tcg_target_call_iarg_regs[i]; + } + tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); } -#if defined(CONFIG_SOFTMMU) - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) +{ + i = (i + 1) & ~1; + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); + i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); + return i; +} + +/* Perform the tlb comparison operation. The complete host address is + placed in BASE. Clobbers AT, T0, A0. */ +static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, + TCGReg addrh, int mem_index, TCGMemOp s_bits, + tcg_insn_unit *label_ptr[2], bool is_load) +{ + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + + tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_meml); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl); - -# if TARGET_LONG_BITS == 64 - label3_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_memh); + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT); - tcg_out_nop(s); + /* Load the tlb comparator. */ + tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + LO_OFF); + if (TARGET_LONG_BITS == 64) { + tcg_out_opc_imm(s, OPC_LW, base, TCG_REG_A0, cmp_off + HI_OFF); + } - reloc_pc16(label3_ptr, s->code_ptr); -# else - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); -# endif - - /* slow path */ - arg_num = 0; - tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0); -# if TARGET_LONG_BITS == 64 - tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh); -# else - tcg_out_call_iarg_reg32(s, &arg_num, addr_regl); -# endif - tcg_out_call_iarg_imm32(s, &arg_num, mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); + /* Mask the page bits, keeping the alignment bits to compare against. + In between, load the tlb addend for the fast path. */ + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_ANDI, data_reg1, TCG_REG_V0, 0xff); - break; - case 0 | 4: - tcg_out_ext8s(s, data_reg1, TCG_REG_V0); - break; - case 1: - tcg_out_opc_imm(s, OPC_ANDI, data_reg1, TCG_REG_V0, 0xffff); - break; - case 1 | 4: - tcg_out_ext16s(s, data_reg1, TCG_REG_V0); - break; - case 2: - tcg_out_mov(s, TCG_TYPE_I32, data_reg1, TCG_REG_V0); - break; - case 3: - tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_V1); - tcg_out_mov(s, TCG_TYPE_I32, data_reg1, TCG_REG_V0); - break; - default: - tcg_abort(); + label_ptr[0] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + + if (TARGET_LONG_BITS == 64) { + /* delay slot */ + tcg_out_nop(s); + + label_ptr[1] = s->code_ptr; + tcg_out_opc_br(s, OPC_BNE, addrh, base); } - label2_ptr = s->code_ptr; + /* delay slot */ + tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + int mem_index, void *raddr, + tcg_insn_unit *label_ptr[2]) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; + if (TARGET_LONG_BITS == 64) { + label->label_ptr[1] = label_ptr[1]; + } +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGReg v0; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } + + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); + } else { + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + i = tcg_out_call_iarg_imm(s, i, l->mem_index); + i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_call_int(s, qemu_ld_helpers[opc], false); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + + v0 = l->datalo_reg; + if ((opc & MO_SIZE) == MO_64) { + /* We eliminated V0 from the possible output registers, so it + cannot be clobbered here. So we must move V1 first. */ + if (MIPS_BE) { + tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); + v0 = l->datahi_reg; + } else { + tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); + } + } + + reloc_pc16(s->code_ptr, l->raddr); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); +} - /* label1: fast path */ - reloc_pc16(label1_ptr, s->code_ptr); +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOp opc = l->opc; + TCGMemOp s_bits = opc & MO_SIZE; + int i; + + /* resolve label address */ + reloc_pc16(l->label_ptr[0], s->code_ptr); + if (TARGET_LONG_BITS == 64) { + reloc_pc16(l->label_ptr[1], s->code_ptr); + } - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_A0, addr_regl); -#else - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_V0, addr_regl, GUEST_BASE); + i = 1; + if (TARGET_LONG_BITS == 64) { + i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, GUEST_BASE); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_V0, addr_regl); + i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); + } + switch (s_bits) { + case MO_8: + i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); + break; + case MO_16: + i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); + break; + case MO_32: + i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); + break; + case MO_64: + i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); + break; + default: + tcg_abort(); } + i = tcg_out_call_iarg_imm(s, i, l->mem_index); + + /* Tail call to the store helper. Thus force the return address + computation to take place in the return address register. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); + i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); + tcg_out_call_int(s, qemu_st_helpers[opc], true); + /* delay slot */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +} #endif - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_LBU, data_reg1, TCG_REG_V0, 0); +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); break; - case 0 | 4: - tcg_out_opc_imm(s, OPC_LB, data_reg1, TCG_REG_V0, 0); + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); break; - case 1: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap16(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LHU, data_reg1, TCG_REG_V0, 0); - } + case MO_UW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16(s, datalo, TCG_TMP1); break; - case 1 | 4: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LHU, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap16s(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LH, data_reg1, TCG_REG_V0, 0); - } + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); break; - case 2: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap32(s, data_reg1, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LW, data_reg1, TCG_REG_V0, 0); - } + case MO_SW | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); + tcg_out_bswap16s(s, datalo, TCG_TMP1); break; - case 3: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 4); - tcg_out_bswap32(s, data_reg1, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_T0, TCG_REG_V0, 0); - tcg_out_bswap32(s, data_reg2, TCG_REG_T0); - } else { - tcg_out_opc_imm(s, OPC_LW, data_reg1, TCG_REG_V0, 0); - tcg_out_opc_imm(s, OPC_LW, data_reg2, TCG_REG_V0, 4); - } + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); + break; + case MO_UL | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); + tcg_out_bswap32(s, datalo, TCG_TMP1); + break; + case MO_UL: + tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); + break; + case MO_Q | MO_BSWAP: + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, datalo, TCG_TMP1); + tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); + tcg_out_bswap32(s, datahi, TCG_TMP1); + break; + case MO_Q: + tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); break; default: tcg_abort(); } - -#if defined(CONFIG_SOFTMMU) - reloc_pc16(label2_ptr, s->code_ptr); -#endif } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) { - TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2; + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label1_ptr, *label2_ptr; - int arg_num; - int mem_index, s_bits; - int addr_meml; -#endif -#if TARGET_LONG_BITS == 64 -# if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label3_ptr; - TCGReg addr_regh; - int addr_memh; -# endif + tcg_insn_unit *label_ptr[2]; + int mem_index; + TCGMemOp s_bits; #endif + /* Note that we've eliminated V0 from the output registers, + so we won't overwrite the base register during loading. */ + TCGReg base = TCG_REG_V0; + data_regl = *args++; - if (opc == 3) { - data_regh = *args++; - } else { - data_regh = 0; - } + data_regh = (is_64 ? *args++ : 0); addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + #if defined(CONFIG_SOFTMMU) -# if TARGET_LONG_BITS == 64 - addr_regh = *args++; -# if defined(HOST_WORDS_BIGENDIAN) - addr_memh = 0; - addr_meml = 4; -# else - addr_memh = 4; - addr_meml = 0; -# endif -# else - addr_meml = 0; -# endif mem_index = *args; - s_bits = opc; -#endif + s_bits = opc & MO_SIZE; - if (opc == 3) { -#if defined(HOST_WORDS_BIGENDIAN) - data_reg1 = data_regh; - data_reg2 = data_regl; + tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, + s_bits, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 1, opc, data_regl, data_regh, addr_regl, addr_regh, + mem_index, s->code_ptr, label_ptr); #else - data_reg1 = data_regl; - data_reg2 = data_regh; -#endif + if (GUEST_BASE == 0 && data_regl != addr_regl) { + base = addr_regl; + } else if (GUEST_BASE == (int16_t)GUEST_BASE) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); } else { - data_reg1 = data_regl; - data_reg2 = 0; + tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +#endif +} -#if defined(CONFIG_SOFTMMU) - tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_meml); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl); - -# if TARGET_LONG_BITS == 64 - label3_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BNE, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); - - tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_memh); - - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT); - tcg_out_nop(s); - - reloc_pc16(label3_ptr, s->code_ptr); -# else - label1_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT); - tcg_out_nop(s); -# endif +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg base, TCGMemOp opc) +{ + switch (opc) { + case MO_8: + tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); + break; - /* slow path */ - arg_num = 0; - tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0); -# if TARGET_LONG_BITS == 64 - tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh); -# else - tcg_out_call_iarg_reg32(s, &arg_num, addr_regl); -# endif - switch(opc) { - case 0: - tcg_out_call_iarg_reg8(s, &arg_num, data_regl); + case MO_16 | MO_BSWAP: + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); + tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_16: + tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); break; - case 1: - tcg_out_call_iarg_reg16(s, &arg_num, data_regl); + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + datalo = TCG_TMP1; + /* FALLTHRU */ + case MO_32: + tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); break; - case 2: - tcg_out_call_iarg_reg32(s, &arg_num, data_regl); + + case MO_64 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP1, datalo); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); + tcg_out_bswap32(s, TCG_TMP1, datahi); + tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); break; - case 3: - tcg_out_call_iarg_reg64(s, &arg_num, data_regl, data_regh); + case MO_64: + tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); + tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); break; + default: tcg_abort(); } - tcg_out_call_iarg_imm32(s, &arg_num, mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); - - label2_ptr = s->code_ptr; - tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); - tcg_out_nop(s); +} - /* label1: fast path */ - reloc_pc16(label1_ptr, s->code_ptr); +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, + TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, + bool cbh, bool is_sub) +{ + TCGReg th = TCG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } - tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl); -#else - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, addr_regl, GUEST_BASE); + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, GUEST_BASE); - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl); + th = ah; } -#endif - - switch(opc) { - case 0: - tcg_out_opc_imm(s, OPC_SB, data_reg1, TCG_REG_A0, 0); - break; - case 1: - if (TCG_NEED_BSWAP) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff); - tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0); - tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0); + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); } else { - tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); + tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); } - break; - case 2: - if (TCG_NEED_BSWAP) { - tcg_out_bswap32(s, TCG_REG_T0, data_reg1); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 0); - } else { - tcg_out_opc_imm(s, OPC_SW, data_reg1, TCG_REG_A0, 0); - } - break; - case 3: - if (TCG_NEED_BSWAP) { - tcg_out_bswap32(s, TCG_REG_T0, data_reg2); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 0); - tcg_out_bswap32(s, TCG_REG_T0, data_reg1); - tcg_out_opc_imm(s, OPC_SW, TCG_REG_T0, TCG_REG_A0, 4); + tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); } else { - tcg_out_opc_imm(s, OPC_SW, data_reg1, TCG_REG_A0, 0); - tcg_out_opc_imm(s, OPC_SW, data_reg2, TCG_REG_A0, 4); + tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); } - break; - default: - tcg_abort(); + tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); } +} +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh, base; + TCGMemOp opc; #if defined(CONFIG_SOFTMMU) - reloc_pc16(label2_ptr, s->code_ptr); + tcg_insn_unit *label_ptr[2]; + int mem_index; + TCGMemOp s_bits; #endif -} -static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) -{ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (intptr_t)target); - tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); - tcg_out_nop(s); + data_regl = *args++; + data_regh = (is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); + opc = *args++; + +#if defined(CONFIG_SOFTMMU) + mem_index = *args; + s_bits = opc & 3; + + /* Note that we eliminated the helper's address argument, + so we can reuse that for the base. */ + base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); + tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, + s_bits, label_ptr, 1); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, opc, data_regl, data_regh, addr_regl, addr_regh, + mem_index, s->code_ptr, label_ptr); +#else + if (GUEST_BASE == 0) { + base = addr_regl; + } else { + base = TCG_REG_A0; + if (GUEST_BASE == (int16_t)GUEST_BASE) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); + } + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif } static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { - switch(opc) { + MIPSInsn i1, i2; + TCGArg a0, a1, a2; + int c2; + + a0 = args[0]; + a1 = args[1]; + a2 = args[2]; + c2 = const_args[2]; + + switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_V0, args[0]); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); - tcg_out_nop(s); + { + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, + (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); + } break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ - tcg_abort(); + s->tb_jmp_offset[a0] = tcg_current_code_size(s); + /* Avoid clobbering the address during retranslation. */ + tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); } else { /* indirect jump method */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, - (uintptr_t)(s->tb_next + args[0])); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_AT, TCG_REG_AT, 0); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_next + a0)); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } tcg_out_nop(s); - s->tb_next_offset[args[0]] = tcg_current_code_size(s); + s->tb_next_offset[a0] = tcg_current_code_size(s); break; case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]); + tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, a0); break; case INDEX_op_ld8u_i32: - tcg_out_ldst(s, OPC_LBU, args[0], args[1], args[2]); - break; + i1 = OPC_LBU; + goto do_ldst; case INDEX_op_ld8s_i32: - tcg_out_ldst(s, OPC_LB, args[0], args[1], args[2]); - break; + i1 = OPC_LB; + goto do_ldst; case INDEX_op_ld16u_i32: - tcg_out_ldst(s, OPC_LHU, args[0], args[1], args[2]); - break; + i1 = OPC_LHU; + goto do_ldst; case INDEX_op_ld16s_i32: - tcg_out_ldst(s, OPC_LH, args[0], args[1], args[2]); - break; + i1 = OPC_LH; + goto do_ldst; case INDEX_op_ld_i32: - tcg_out_ldst(s, OPC_LW, args[0], args[1], args[2]); - break; + i1 = OPC_LW; + goto do_ldst; case INDEX_op_st8_i32: - tcg_out_ldst(s, OPC_SB, args[0], args[1], args[2]); - break; + i1 = OPC_SB; + goto do_ldst; case INDEX_op_st16_i32: - tcg_out_ldst(s, OPC_SH, args[0], args[1], args[2]); - break; + i1 = OPC_SH; + goto do_ldst; case INDEX_op_st_i32: - tcg_out_ldst(s, OPC_SW, args[0], args[1], args[2]); + i1 = OPC_SW; + do_ldst: + tcg_out_ldst(s, i1, a0, a1, a2); break; case INDEX_op_add_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, args[0], args[1], args[2]); - } - break; - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_AT, args[2], args[4]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_AT, args[2], args[4]); - } - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_T0, TCG_REG_AT, args[2]); - if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], args[5]); - } else { - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[3], args[5]); + i1 = OPC_ADDU, i2 = OPC_ADDIU; + goto do_binary; + case INDEX_op_or_i32: + i1 = OPC_OR, i2 = OPC_ORI; + goto do_binary; + case INDEX_op_xor_i32: + i1 = OPC_XOR, i2 = OPC_XORI; + do_binary: + if (c2) { + tcg_out_opc_imm(s, i2, a0, a1, a2); + break; } - tcg_out_opc_reg(s, OPC_ADDU, args[1], args[1], TCG_REG_T0); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); + do_binaryv: + tcg_out_opc_reg(s, i1, a0, a1, a2); break; + case INDEX_op_sub_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[0], args[1], -args[2]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); + break; } - break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_AT, args[2], -args[4]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, args[2], args[4]); - } - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_T0, args[2], TCG_REG_AT); - if (const_args[5]) { - tcg_out_opc_imm(s, OPC_ADDIU, args[1], args[3], -args[5]); - } else { - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[3], args[5]); + i1 = OPC_SUBU; + goto do_binary; + case INDEX_op_and_i32: + if (c2 && a2 != (uint16_t)a2) { + int msb = ctz32(~a2) - 1; + assert(use_mips32r2_instructions); + assert(is_p2m1(a2)); + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + break; } - tcg_out_opc_reg(s, OPC_SUBU, args[1], args[1], TCG_REG_T0); - tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); - break; + i1 = OPC_AND, i2 = OPC_ANDI; + goto do_binary; + case INDEX_op_nor_i32: + i1 = OPC_NOR; + goto do_binaryv; + case INDEX_op_mul_i32: if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + break; } - break; - case INDEX_op_muls2_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, args[2], args[3]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); - break; - case INDEX_op_mulu2_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); - break; + i1 = OPC_MULT, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_mulsh_i32: - tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); - break; + i1 = OPC_MULT, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_muluh_i32: - tcg_out_opc_reg(s, OPC_MULTU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); - break; + i1 = OPC_MULTU, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_div_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - break; + i1 = OPC_DIV, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_divu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); - break; + i1 = OPC_DIVU, i2 = OPC_MFLO; + goto do_hilo1; case INDEX_op_rem_i32: - tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); - break; + i1 = OPC_DIV, i2 = OPC_MFHI; + goto do_hilo1; case INDEX_op_remu_i32: - tcg_out_opc_reg(s, OPC_DIVU, 0, args[1], args[2]); - tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + i1 = OPC_DIVU, i2 = OPC_MFHI; + do_hilo1: + tcg_out_opc_reg(s, i1, 0, a1, a2); + tcg_out_opc_reg(s, i2, a0, 0, 0); break; - case INDEX_op_and_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ANDI, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_AND, args[0], args[1], args[2]); - } - break; - case INDEX_op_or_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_ORI, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_OR, args[0], args[1], args[2]); - } - break; - case INDEX_op_nor_i32: - tcg_out_opc_reg(s, OPC_NOR, args[0], args[1], args[2]); + case INDEX_op_muls2_i32: + i1 = OPC_MULT; + goto do_hilo2; + case INDEX_op_mulu2_i32: + i1 = OPC_MULTU; + do_hilo2: + tcg_out_opc_reg(s, i1, 0, a2, args[3]); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); break; + case INDEX_op_not_i32: - tcg_out_opc_reg(s, OPC_NOR, args[0], TCG_REG_ZERO, args[1]); - break; - case INDEX_op_xor_i32: - if (const_args[2]) { - tcg_out_opc_imm(s, OPC_XORI, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_XOR, args[0], args[1], args[2]); - } + i1 = OPC_NOR; + goto do_unary; + case INDEX_op_bswap16_i32: + i1 = OPC_WSBH; + goto do_unary; + case INDEX_op_ext8s_i32: + i1 = OPC_SEB; + goto do_unary; + case INDEX_op_ext16s_i32: + i1 = OPC_SEH; + do_unary: + tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); break; case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SRA, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_SRAV, args[0], args[2], args[1]); - } - break; + i1 = OPC_SRAV, i2 = OPC_SRA; + goto do_shift; case INDEX_op_shl_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SLL, args[0], args[1], args[2]); - } else { - tcg_out_opc_reg(s, OPC_SLLV, args[0], args[2], args[1]); - } - break; + i1 = OPC_SLLV, i2 = OPC_SLL; + goto do_shift; case INDEX_op_shr_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_SRL, args[0], args[1], args[2]); + i1 = OPC_SRLV, i2 = OPC_SRL; + goto do_shift; + case INDEX_op_rotr_i32: + i1 = OPC_ROTRV, i2 = OPC_ROTR; + do_shift: + if (c2) { + tcg_out_opc_sa(s, i2, a0, a1, a2); } else { - tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, i1, a0, a2, a1); } break; case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]); - } else { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32); - tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]); - tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]); + if (c2) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); } else { - tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]); + tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); + tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); } break; - case INDEX_op_bswap16_i32: - tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); - break; case INDEX_op_bswap32_i32: - tcg_out_opc_reg(s, OPC_WSBH, args[0], 0, args[1]); - tcg_out_opc_sa(s, OPC_ROTR, args[0], args[0], 16); - break; - - case INDEX_op_ext8s_i32: - tcg_out_opc_reg(s, OPC_SEB, args[0], 0, args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_opc_reg(s, OPC_SEH, args[0], 0, args[1]); + tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); + tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); break; case INDEX_op_deposit_i32: - tcg_out_opc_imm(s, OPC_INS, args[0], args[2], - ((args[3] + args[4] - 1) << 11) | (args[3] << 6)); + tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); break; case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], args[3]); + tcg_out_brcond(s, a2, a0, a1, args[3]); break; case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]); + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], args[5]); break; case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]); + tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); break; case INDEX_op_setcond_i32: - tcg_out_setcond(s, args[3], args[0], args[1], args[2]); + tcg_out_setcond(s, args[3], a0, a1, a2); break; case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], args[0], args[1], args[2], args[3], args[4]); + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, 0); - break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, 0 | 4); - break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, 1); + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, 1 | 4); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); break; - case INDEX_op_qemu_ld32: - tcg_out_qemu_ld(s, args, 2); + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, 3); + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); break; - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, 0); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, 1); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, 2); + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false); break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, 3); + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true); break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ @@ -1561,9 +1598,9 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, - { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, + { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, - { INDEX_op_and_i32, { "r", "rZ", "rI" } }, + { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, { INDEX_op_not_i32, { "r", "rZ" } }, { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, @@ -1588,34 +1625,20 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, - { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, - { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, + { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, #if TARGET_LONG_BITS == 32 - { INDEX_op_qemu_ld8u, { "L", "lZ" } }, - { INDEX_op_qemu_ld8s, { "L", "lZ" } }, - { INDEX_op_qemu_ld16u, { "L", "lZ" } }, - { INDEX_op_qemu_ld16s, { "L", "lZ" } }, - { INDEX_op_qemu_ld32, { "L", "lZ" } }, - { INDEX_op_qemu_ld64, { "L", "L", "lZ" } }, - - { INDEX_op_qemu_st8, { "SZ", "SZ" } }, - { INDEX_op_qemu_st16, { "SZ", "SZ" } }, - { INDEX_op_qemu_st32, { "SZ", "SZ" } }, - { INDEX_op_qemu_st64, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, #else - { INDEX_op_qemu_ld8u, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld8s, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld16u, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld16s, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld32, { "L", "lZ", "lZ" } }, - { INDEX_op_qemu_ld64, { "L", "L", "lZ", "lZ" } }, - - { INDEX_op_qemu_st8, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st16, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st32, { "SZ", "SZ", "SZ" } }, - { INDEX_op_qemu_st64, { "SZ", "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, + { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, + { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, #endif { -1 }, }; @@ -1629,7 +1652,7 @@ static int tcg_target_callee_save_regs[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, - TCG_REG_FP, + TCG_REG_S8, TCG_REG_RA, /* should be last for ABI compliance */ }; @@ -1761,6 +1784,7 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_A1) | (1 << TCG_REG_A2) | (1 << TCG_REG_A3) | + (1 << TCG_REG_T0) | (1 << TCG_REG_T1) | (1 << TCG_REG_T2) | (1 << TCG_REG_T3) | @@ -1775,11 +1799,18 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_AT); /* internal use */ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */ + tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ tcg_add_target_add_op_defs(mips_op_defs); } + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ + uint32_t *ptr = (uint32_t *)jmp_addr; + *ptr = deposit32(*ptr, 0, 26, addr >> 2); + flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c6d2267d77..b5face8b4d 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -60,16 +60,14 @@ typedef enum { TCG_REG_K1, TCG_REG_GP, TCG_REG_SP, - TCG_REG_FP, + TCG_REG_S8, TCG_REG_RA, -} TCGReg; -#define TCG_CT_CONST_ZERO 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S16 0x400 + TCG_REG_CALL_STACK = TCG_REG_SP, + TCG_AREG0 = TCG_REG_S0, +} TCGReg; /* used for function call generation */ -#define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 8 #define TCG_TARGET_CALL_STACK_OFFSET 16 #define TCG_TARGET_CALL_ALIGN_ARGS 1 @@ -120,15 +118,13 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_new_ldst 0 +#define TCG_TARGET_HAS_new_ldst 1 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */ #define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ #define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ -#define TCG_AREG0 TCG_REG_S0 - #ifdef __OpenBSD__ #include <machine/sysarch.h> #else diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 9b39231c15..375e590d2b 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -544,7 +544,10 @@ static void tcg_out_movi(TCGContext *s, TCGType type, static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) { + uint8_t *old_code_ptr = s->code_ptr; + tcg_out_op_t(s, INDEX_op_call); tcg_out_ri(s, 1, (uintptr_t)arg); + old_code_ptr[1] = s->code_ptr - old_code_ptr; } static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, diff --git a/tests/tcg/lm32/Makefile b/tests/tcg/lm32/Makefile index 8e5d405459..57e7363b2c 100644 --- a/tests/tcg/lm32/Makefile +++ b/tests/tcg/lm32/Makefile @@ -3,7 +3,7 @@ CROSS=lm32-elf- SIM = qemu-system-lm32 -SIMFLAGS = -M lm32-evr -nographic -device lm32-sys -net none -kernel +SIMFLAGS = -M lm32-evr -nographic -semihosting -net none -kernel CC = $(CROSS)gcc AS = $(CROSS)as @@ -18,6 +18,7 @@ LDFLAGS = -T$(TSRC_PATH)/linker.ld ASFLAGS += -Wa,-I,$(TSRC_PATH)/ CRT = crt.o +HELPER = helper.o TESTCASES += test_add.tst TESTCASES += test_addi.tst TESTCASES += test_and.tst @@ -91,15 +92,15 @@ all: build %.o: $(TSRC_PATH)/%.S $(AS) $(ASFLAGS) -c $< -o $@ -%.tst: %.o $(TSRC_PATH)/macros.inc $(CRT) - $(LD) $(LDFLAGS) $(NOSTDFLAGS) $(CRT) $< -o $@ +%.tst: %.o $(TSRC_PATH)/macros.inc $(CRT) $(HELPER) + $(LD) $(LDFLAGS) $(NOSTDFLAGS) $(CRT) $(HELPER) $< -o $@ -build: $(CRT) $(TESTCASES) +build: $(TESTCASES) check: $(TESTCASES:test_%.tst=check_%) -check_%: test_%.tst $(CRT) $(SYS) - $(SIM) $(SIMFLAGS) $< +check_%: test_%.tst + @$(SIM) $(SIMFLAGS) $< clean: - $(RM) -fr $(TESTCASES) $(CRT) + $(RM) -fr $(TESTCASES) $(CRT) $(HELPER) diff --git a/tests/tcg/lm32/crt.S b/tests/tcg/lm32/crt.S index 5f9cfd95d3..fc437a3de1 100644 --- a/tests/tcg/lm32/crt.S +++ b/tests/tcg/lm32/crt.S @@ -8,9 +8,9 @@ _reset_handler: ori r1, r1, lo(_start) wcsr eba, r1 wcsr deba, r1 + mvhi sp, hi(_fstack) + ori sp, sp, lo(_fstack) bi _main - nop - nop _breakpoint_handler: ori r25, r25, 1 diff --git a/tests/tcg/lm32/helper.S b/tests/tcg/lm32/helper.S new file mode 100644 index 0000000000..3351d41e84 --- /dev/null +++ b/tests/tcg/lm32/helper.S @@ -0,0 +1,65 @@ +.text +.global _start, _write, _exit +.global _tc_fail, _tc_pass + +_write: + addi sp, sp, -4 + sw (sp+4), r8 + mvi r8, 5 + scall + lw r8, (sp+4) + addi sp, sp, 4 + ret + +_exit: + mvi r8, 1 + scall +1: + bi 1b + +_tc_pass: +.data +1: + .ascii "OK\n" +2: +.text + addi sp, sp, -16 + sw (sp+4), ra + sw (sp+8), r1 + sw (sp+12), r2 + sw (sp+16), r3 + mvi r1, 1 + mvhi r2, hi(1b) + ori r2, r2, lo(1b) + mvi r3, (2b - 1b) + calli _write + lw r3, (sp+16) + lw r2, (sp+12) + lw r1, (sp+8) + lw ra, (sp+4) + addi sp, sp, 16 + ret + +_tc_fail: +.data +1: + .ascii "FAILED\n" +2: +.text + addi sp, sp, -16 + sw (sp+4), ra + sw (sp+8), r1 + sw (sp+12), r2 + sw (sp+16), r3 + sw (sp+4), ra + mvi r1, 1 + mvhi r2, hi(1b) + ori r2, r2, lo(1b) + mvi r3, (2b - 1b) + calli _write + lw r3, (sp+16) + lw r2, (sp+12) + lw r1, (sp+8) + lw ra, (sp+4) + addi sp, sp, 16 + ret diff --git a/tests/tcg/lm32/macros.inc b/tests/tcg/lm32/macros.inc index 367c7c50d8..360ad53c9f 100644 --- a/tests/tcg/lm32/macros.inc +++ b/tests/tcg/lm32/macros.inc @@ -1,12 +1,26 @@ +.equ MAX_TESTNAME_LEN, 32 .macro test_name name .data tn_\name: - .asciz "\name" + .ascii "\name" + .space MAX_TESTNAME_LEN - (. - tn_\name), ' ' .text - mvhi r13, hi(tn_\name) - ori r13, r13, lo(tn_\name) - sw (r12+8), r13 + .global \name +\name: + addi sp, sp, -12 + sw (sp+4), r1 + sw (sp+8), r2 + sw (sp+12), r3 + mvi r1, 1 + mvhi r2, hi(tn_\name) + ori r2, r2, lo(tn_\name) + mvi r3, MAX_TESTNAME_LEN + calli _write + lw r3, (sp+12) + lw r2, (sp+8) + lw r1, (sp+4) + addi sp, sp, 12 .endm .macro load reg val @@ -15,13 +29,12 @@ tn_\name: .endm .macro tc_pass - mvi r13, 0 - sw (r12+4), r13 + calli _tc_pass .endm .macro tc_fail - mvi r13, 1 - sw (r12+4), r13 + addi r12, r12, 1 + calli _tc_fail .endm .macro check_r3 val @@ -63,14 +76,12 @@ tn_\name: .global _main .text _main: - mvhi r12, hi(0xffff0000) # base address of test block - ori r12, r12, lo(0xffff0000) + mvi r12, 0 .endm .macro end - sw (r12+0), r0 -1: - bi 1b + mv r1, r12 + calli _exit .endm # base + diff --git a/tests/tcg/lm32/test_lb.S b/tests/tcg/lm32/test_lb.S index f84d21ead9..d677eea4c4 100644 --- a/tests/tcg/lm32/test_lb.S +++ b/tests/tcg/lm32/test_lb.S @@ -8,10 +8,12 @@ lb r3, (r1+0) check_r3 0x7e test_name LB_2 +load r1 data lb r3, (r1+1) check_r3 0x7f test_name LB_3 +load r1 data lb r3, (r1+-1) check_r3 0x7d @@ -21,10 +23,12 @@ lb r3, (r1+0) check_r3 0xfffffffe test_name LB_5 +load r1 data_msb lb r3, (r1+1) check_r3 0xffffffff test_name LB_6 +load r1 data_msb lb r3, (r1+-1) check_r3 0xfffffffd diff --git a/tests/tcg/lm32/test_lbu.S b/tests/tcg/lm32/test_lbu.S index 4c1786ad71..dc5d5f67d3 100644 --- a/tests/tcg/lm32/test_lbu.S +++ b/tests/tcg/lm32/test_lbu.S @@ -8,10 +8,12 @@ lbu r3, (r1+0) check_r3 0x7e test_name LBU_2 +load r1 data lbu r3, (r1+1) check_r3 0x7f test_name LBU_3 +load r1 data lbu r3, (r1+-1) check_r3 0x7d @@ -21,10 +23,12 @@ lbu r3, (r1+0) check_r3 0xfe test_name LBU_5 +load r1 data_msb lbu r3, (r1+1) check_r3 0xff test_name LBU_6 +load r1 data_msb lbu r3, (r1+-1) check_r3 0xfd diff --git a/tests/tcg/lm32/test_lh.S b/tests/tcg/lm32/test_lh.S index e57d9e35cf..397996bddd 100644 --- a/tests/tcg/lm32/test_lh.S +++ b/tests/tcg/lm32/test_lh.S @@ -8,10 +8,12 @@ lh r3, (r1+0) check_r3 0x7e7f test_name LH_2 +load r1 data lh r3, (r1+2) check_r3 0x7071 test_name LH_3 +load r1 data lh r3, (r1+-2) check_r3 0x7c7d @@ -21,10 +23,12 @@ lh r3, (r1+0) check_r3 0xfffffeff test_name LH_5 +load r1 data_msb lh r3, (r1+2) check_r3 0xfffff0f1 test_name LH_6 +load r1 data_msb lh r3, (r1+-2) check_r3 0xfffffcfd diff --git a/tests/tcg/lm32/test_lhu.S b/tests/tcg/lm32/test_lhu.S index e648775d94..8de7c52560 100644 --- a/tests/tcg/lm32/test_lhu.S +++ b/tests/tcg/lm32/test_lhu.S @@ -8,10 +8,12 @@ lhu r3, (r1+0) check_r3 0x7e7f test_name LHU_2 +load r1 data lhu r3, (r1+2) check_r3 0x7071 test_name LHU_3 +load r1 data lhu r3, (r1+-2) check_r3 0x7c7d @@ -21,10 +23,12 @@ lhu r3, (r1+0) check_r3 0xfeff test_name LHU_5 +load r1 data_msb lhu r3, (r1+2) check_r3 0xf0f1 test_name LHU_6 +load r1 data_msb lhu r3, (r1+-2) check_r3 0xfcfd diff --git a/tests/tcg/lm32/test_lw.S b/tests/tcg/lm32/test_lw.S index f8c919d2b8..996e5f8c88 100644 --- a/tests/tcg/lm32/test_lw.S +++ b/tests/tcg/lm32/test_lw.S @@ -8,10 +8,12 @@ lw r3, (r1+0) check_r3 0x7e7f7071 test_name LW_2 +load r1 data lw r3, (r1+4) check_r3 0x72737475 test_name LW_3 +load r1 data lw r3, (r1+-4) check_r3 0x7a7b7c7d diff --git a/tests/tcg/lm32/test_sb.S b/tests/tcg/lm32/test_sb.S index 89e39d621d..b15a89d342 100644 --- a/tests/tcg/lm32/test_sb.S +++ b/tests/tcg/lm32/test_sb.S @@ -9,11 +9,13 @@ sb (r1+0), r2 check_mem data 0xaa000000 test_name SB_2 +load r1 data load r2 0xf0f1f2bb sb (r1+1), r2 check_mem data 0xaabb0000 test_name SB_3 +load r1 data load r2 0xf0f1f2cc sb (r1+-1), r2 check_mem data0 0x000000cc diff --git a/tests/tcg/lm32/test_scall.S b/tests/tcg/lm32/test_scall.S index b442e32374..46032f841d 100644 --- a/tests/tcg/lm32/test_scall.S +++ b/tests/tcg/lm32/test_scall.S @@ -5,6 +5,10 @@ start test_name SCALL_1 mvi r1, 1 wcsr IE, r1 +# we are running in a semi hosted environment +# therefore we have to set r8 to some unused system +# call +mvi r8, 0 insn: scall check_excp 64 diff --git a/tests/tcg/lm32/test_sh.S b/tests/tcg/lm32/test_sh.S index ea8b3f2067..bba10224f6 100644 --- a/tests/tcg/lm32/test_sh.S +++ b/tests/tcg/lm32/test_sh.S @@ -9,11 +9,13 @@ sh (r1+0), r2 check_mem data 0xaaaa0000 test_name SH_2 +load r1 data load r2 0xf0f1bbbb sh (r1+2), r2 check_mem data 0xaaaabbbb test_name SH_3 +load r1 data load r2 0xf0f1cccc sh (r1+-2), r2 check_mem data0 0x0000cccc diff --git a/tests/tcg/lm32/test_sw.S b/tests/tcg/lm32/test_sw.S index d1fdadce61..2b1c017e7b 100644 --- a/tests/tcg/lm32/test_sw.S +++ b/tests/tcg/lm32/test_sw.S @@ -9,16 +9,19 @@ sw (r1+0), r2 check_mem data 0xaabbccdd test_name SW_2 +load r1 data load r2 0x00112233 sw (r1+4), r2 check_mem data1 0x00112233 test_name SW_3 +load r1 data load r2 0x44556677 sw (r1+-4), r2 check_mem data0 0x44556677 test_name SW_4 +load r1 data sw (r1+0), r1 lw r3, (r1+0) check_r3 data diff --git a/trace-events b/trace-events index b6d289d720..7a415d952a 100644 --- a/trace-events +++ b/trace-events @@ -627,9 +627,6 @@ lm32_uart_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" lm32_uart_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" lm32_uart_irq_state(int level) "irq state %d" -# hw/misc/lm32_sys.c -lm32_sys_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x" - # hw/scsi/megasas.c megasas_init_firmware(uint64_t pa) "pa %" PRIx64 " " megasas_init_queue(uint64_t queue_pa, int queue_len, uint64_t head, uint64_t tail, uint32_t flags) "queue at %" PRIx64 " len %d head %" PRIx64 " tail %" PRIx64 " flags %x" diff --git a/translate-all.c b/translate-all.c index 5549a85ed5..6b7b46e761 100644 --- a/translate-all.c +++ b/translate-all.c @@ -475,6 +475,10 @@ static inline PageDesc *page_find(tb_page_addr_t index) #elif defined(__s390x__) /* We have a +- 4GB range on the branches; leave some slop. */ # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) +#elif defined(__mips__) + /* We have a 256MB branch region, but leave room to make sure the + main executable is also within that region. */ +# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) #else # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #endif @@ -509,14 +513,47 @@ static inline size_t size_code_gen_buffer(size_t tb_size) return tb_size; } +#ifdef __mips__ +/* In order to use J and JAL within the code_gen_buffer, we require + that the buffer not cross a 256MB boundary. */ +static inline bool cross_256mb(void *addr, size_t size) +{ + return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & 0xf0000000; +} + +/* We weren't able to allocate a buffer without crossing that boundary, + so make do with the larger portion of the buffer that doesn't cross. + Returns the new base of the buffer, and adjusts code_gen_buffer_size. */ +static inline void *split_cross_256mb(void *buf1, size_t size1) +{ + void *buf2 = (void *)(((uintptr_t)buf1 + size1) & 0xf0000000); + size_t size2 = buf1 + size1 - buf2; + + size1 = buf2 - buf1; + if (size1 < size2) { + size1 = size2; + buf1 = buf2; + } + + tcg_ctx.code_gen_buffer_size = size1; + return buf1; +} +#endif + #ifdef USE_STATIC_CODE_GEN_BUFFER static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned(CODE_GEN_ALIGN))); static inline void *alloc_code_gen_buffer(void) { - map_exec(static_code_gen_buffer, tcg_ctx.code_gen_buffer_size); - return static_code_gen_buffer; + void *buf = static_code_gen_buffer; +#ifdef __mips__ + if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) { + buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size); + } +#endif + map_exec(buf, tcg_ctx.code_gen_buffer_size); + return buf; } #elif defined(USE_MMAP) static inline void *alloc_code_gen_buffer(void) @@ -545,20 +582,76 @@ static inline void *alloc_code_gen_buffer(void) start = 0x40000000ul; # elif defined(__s390x__) start = 0x90000000ul; +# elif defined(__mips__) + /* ??? We ought to more explicitly manage layout for softmmu too. */ +# ifdef CONFIG_USER_ONLY + start = 0x68000000ul; +# elif _MIPS_SIM == _ABI64 + start = 0x128000000ul; +# else + start = 0x08000000ul; +# endif # endif buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size, PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0); - return buf == MAP_FAILED ? NULL : buf; + if (buf == MAP_FAILED) { + return NULL; + } + +#ifdef __mips__ + if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) { + /* Try again, with the original still mapped, to avoid re-aquiring + that 256mb crossing. This time don't specify an address. */ + size_t size2, size1 = tcg_ctx.code_gen_buffer_size; + void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC, + flags, -1, 0); + if (buf2 != MAP_FAILED) { + if (!cross_256mb(buf2, size1)) { + /* Success! Use the new buffer. */ + munmap(buf, size1); + return buf2; + } + /* Failure. Work with what we had. */ + munmap(buf2, size1); + } + + /* Split the original buffer. Free the smaller half. */ + buf2 = split_cross_256mb(buf, size1); + size2 = tcg_ctx.code_gen_buffer_size; + munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2); + return buf2; + } +#endif + + return buf; } #else static inline void *alloc_code_gen_buffer(void) { void *buf = g_malloc(tcg_ctx.code_gen_buffer_size); - if (buf) { - map_exec(buf, tcg_ctx.code_gen_buffer_size); + if (buf == NULL) { + return NULL; } + +#ifdef __mips__ + if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) { + void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size); + if (buf2 != NULL && !cross_256mb(buf2, size1)) { + /* Success! Use the new buffer. */ + free(buf); + buf = buf2; + } else { + /* Failure. Work with what we had. Since this is malloc + and not mmap, we can't free the other half. */ + free(buf2); + buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size); + } + } +#endif + + map_exec(buf, tcg_ctx.code_gen_buffer_size); return buf; } #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */ |