diff options
35 files changed, 1765 insertions, 832 deletions
@@ -3609,7 +3609,7 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak echo "ARCH=$ARCH" >> $config_host_mak case "$cpu" in - i386|x86_64|ppc) + arm|i386|x86_64|ppc) # The TCG interpreter currently does not support ld/st optimization. if test "$tcg_interpreter" = "no" ; then echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_host_mak @@ -227,6 +227,7 @@ void target_disas(FILE *out, CPUArchState *env, target_ulong code, s.info.mach = bfd_mach_ppc; #endif } + s.info.disassembler_options = (char *)"any"; print_insn = print_insn_ppc; #elif defined(TARGET_M68K) print_insn = print_insn_m68k; diff --git a/disas/arm.c b/disas/arm.c index 4927d8ad7c..76e97a8957 100644 --- a/disas/arm.c +++ b/disas/arm.c @@ -819,6 +819,10 @@ static const struct opcode32 arm_opcodes[] = {ARM_EXT_V3M, 0x00800090, 0x0fa000f0, "%22?sumull%20's%c\t%12-15r, %16-19r, %0-3r, %8-11r"}, {ARM_EXT_V3M, 0x00a00090, 0x0fa000f0, "%22?sumlal%20's%c\t%12-15r, %16-19r, %0-3r, %8-11r"}, + /* IDIV instructions. */ + {ARM_EXT_DIV, 0x0710f010, 0x0ff0f0f0, "sdiv%c\t%16-19r, %0-3r, %8-11r"}, + {ARM_EXT_DIV, 0x0730f010, 0x0ff0f0f0, "udiv%c\t%16-19r, %0-3r, %8-11r"}, + /* V7 instructions. */ {ARM_EXT_V7, 0xf450f000, 0xfd70f000, "pli\t%P"}, {ARM_EXT_V7, 0x0320f0f0, 0x0ffffff0, "dbg%c\t#%0-3d"}, diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index afcec1f182..2993848889 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -12,16 +12,20 @@ typedef struct VIOsPAPRVTYDevice { uint8_t buf[VTERM_BUFSIZE]; } VIOsPAPRVTYDevice; +#define TYPE_VIO_SPAPR_VTY_DEVICE "spapr-vty" +#define VIO_SPAPR_VTY_DEVICE(obj) \ + OBJECT_CHECK(VIOsPAPRVTYDevice, (obj), TYPE_VIO_SPAPR_VTY_DEVICE) + static int vty_can_receive(void *opaque) { - VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)opaque; + VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(opaque); return (dev->in - dev->out) < VTERM_BUFSIZE; } static void vty_receive(void *opaque, const uint8_t *buf, int size) { - VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)opaque; + VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(opaque); int i; if ((dev->in == dev->out) && size) { @@ -36,7 +40,7 @@ static void vty_receive(void *opaque, const uint8_t *buf, int size) static int vty_getchars(VIOsPAPRDevice *sdev, uint8_t *buf, int max) { - VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; + VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(sdev); int n = 0; while ((n < max) && (dev->out != dev->in)) { @@ -48,7 +52,7 @@ static int vty_getchars(VIOsPAPRDevice *sdev, uint8_t *buf, int max) void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len) { - VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; + VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(sdev); /* FIXME: should check the qemu_chr_fe_write() return value */ qemu_chr_fe_write(dev->chardev, buf, len); @@ -56,7 +60,7 @@ void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len) static int spapr_vty_init(VIOsPAPRDevice *sdev) { - VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; + VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(sdev); if (!dev->chardev) { fprintf(stderr, "spapr-vty: Can't create vty without a chardev!\n"); @@ -151,7 +155,7 @@ static void spapr_vty_class_init(ObjectClass *klass, void *data) } static const TypeInfo spapr_vty_info = { - .name = "spapr-vty", + .name = TYPE_VIO_SPAPR_VTY_DEVICE, .parent = TYPE_VIO_SPAPR_DEVICE, .instance_size = sizeof(VIOsPAPRVTYDevice), .class_init = spapr_vty_class_init, @@ -177,7 +181,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) continue; } - sdev = DO_UPCAST(VIOsPAPRDevice, qdev, iter); + sdev = VIO_SPAPR_DEVICE(iter); /* First VTY we've found, so it is selected for now */ if (!selected) { diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 34332f2452..3150add3c1 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -73,6 +73,10 @@ typedef uint64_t vlan_bd_t; #define VLAN_RX_BDS_OFF 16 #define VLAN_MAX_BUFS ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8) +#define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan" +#define VIO_SPAPR_VLAN_DEVICE(obj) \ + OBJECT_CHECK(VIOsPAPRVLANDevice, (obj), TYPE_VIO_SPAPR_VLAN_DEVICE) + typedef struct VIOsPAPRVLANDevice { VIOsPAPRDevice sdev; NICConf nicconf; @@ -93,8 +97,8 @@ static int spapr_vlan_can_receive(NetClientState *nc) static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, size_t size) { - VIOsPAPRDevice *sdev = qemu_get_nic_opaque(nc); - VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev; + VIOsPAPRVLANDevice *dev = qemu_get_nic_opaque(nc); + VIOsPAPRDevice *sdev = VIO_SPAPR_DEVICE(dev); vlan_bd_t rxq_bd = vio_ldq(sdev, dev->buf_list + VLAN_RXQ_BD_OFF); vlan_bd_t bd; int buf_ptr = dev->use_buf_ptr; @@ -192,7 +196,7 @@ static NetClientInfo net_spapr_vlan_info = { static void spapr_vlan_reset(VIOsPAPRDevice *sdev) { - VIOsPAPRVLANDevice *dev = DO_UPCAST(VIOsPAPRVLANDevice, sdev, sdev); + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); dev->buf_list = 0; dev->rx_bufs = 0; @@ -201,7 +205,7 @@ static void spapr_vlan_reset(VIOsPAPRDevice *sdev) static int spapr_vlan_init(VIOsPAPRDevice *sdev) { - VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev; + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); qemu_macaddr_default_if_unset(&dev->nicconf.macaddr); @@ -225,7 +229,7 @@ void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd) static int spapr_vlan_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) { - VIOsPAPRVLANDevice *vdev = (VIOsPAPRVLANDevice *)dev; + VIOsPAPRVLANDevice *vdev = VIO_SPAPR_VLAN_DEVICE(dev); uint8_t padded_mac[8] = {0, 0}; int ret; @@ -282,7 +286,7 @@ static target_ulong h_register_logical_lan(PowerPCCPU *cpu, target_ulong rec_queue = args[2]; target_ulong filter_list = args[3]; VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); - VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev; + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); vlan_bd_t filter_list_bd; if (!dev) { @@ -341,7 +345,7 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, { target_ulong reg = args[0]; VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); - VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev; + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); if (!dev) { return H_PARAMETER; @@ -365,7 +369,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, target_ulong reg = args[0]; target_ulong buf = args[1]; VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); - VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev; + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); vlan_bd_t bd; dprintf("H_ADD_LOGICAL_LAN_BUFFER(0x" TARGET_FMT_lx @@ -413,7 +417,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong *bufs = args + 1; target_ulong continue_token = args[7]; VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg); - VIOsPAPRVLANDevice *dev = (VIOsPAPRVLANDevice *)sdev; + VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev); unsigned total_len; uint8_t *lbuf, *p; int i, nbufs; @@ -511,7 +515,7 @@ static void spapr_vlan_class_init(ObjectClass *klass, void *data) } static const TypeInfo spapr_vlan_info = { - .name = "spapr-vlan", + .name = TYPE_VIO_SPAPR_VLAN_DEVICE, .parent = TYPE_VIO_SPAPR_DEVICE, .instance_size = sizeof(VIOsPAPRVLANDevice), .class_init = spapr_vlan_class_init, diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 0cc6cba0e3..1eb05c9075 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -36,6 +36,10 @@ typedef struct sPAPRNVRAM { BlockDriverState *drive; } sPAPRNVRAM; +#define TYPE_VIO_SPAPR_NVRAM "spapr-nvram" +#define VIO_SPAPR_NVRAM(obj) \ + OBJECT_CHECK(sPAPRNVRAM, (obj), TYPE_VIO_SPAPR_NVRAM) + #define MIN_NVRAM_SIZE 8192 #define DEFAULT_NVRAM_SIZE 65536 #define MAX_NVRAM_SIZE (UINT16_MAX * 16) @@ -134,7 +138,7 @@ static void rtas_nvram_store(sPAPREnvironment *spapr, static int spapr_nvram_init(VIOsPAPRDevice *dev) { - sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev; + sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); if (nvram->drive) { nvram->size = bdrv_getlength(nvram->drive); @@ -157,7 +161,7 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev) static int spapr_nvram_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) { - sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev; + sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); return fdt_setprop_cell(fdt, node_off, "#bytes", nvram->size); } @@ -182,7 +186,7 @@ static void spapr_nvram_class_init(ObjectClass *klass, void *data) } static const TypeInfo spapr_nvram_type_info = { - .name = "spapr-nvram", + .name = TYPE_VIO_SPAPR_NVRAM, .parent = TYPE_VIO_SPAPR_DEVICE, .instance_size = sizeof(sPAPRNVRAM), .class_init = spapr_nvram_class_init, diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index b4981d7005..e0996556e5 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -2,6 +2,7 @@ * QEMU Ultrasparc APB PCI host * * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2012,2013 Artyom Tarasenko * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -66,7 +67,8 @@ do { printf("APB: " fmt , ## __VA_ARGS__); } while (0) #define RESET_WCMASK 0x98000000 #define RESET_WMASK 0x60000000 -#define MAX_IVEC 0x30 +#define MAX_IVEC 0x40 +#define NO_IRQ_REQUEST (MAX_IVEC + 1) typedef struct APBState { SysBusDevice busdev; @@ -75,17 +77,64 @@ typedef struct APBState { MemoryRegion pci_config; MemoryRegion pci_mmio; MemoryRegion pci_ioport; + uint64_t pci_irq_in; uint32_t iommu[4]; uint32_t pci_control[16]; uint32_t pci_irq_map[8]; uint32_t obio_irq_map[32]; qemu_irq *pbm_irqs; qemu_irq *ivec_irqs; + unsigned int irq_request; uint32_t reset_control; unsigned int nr_resets; } APBState; -static void pci_apb_set_irq(void *opaque, int irq_num, int level); +static inline void pbm_set_request(APBState *s, unsigned int irq_num) +{ + APB_DPRINTF("%s: request irq %d\n", __func__, irq_num); + + s->irq_request = irq_num; + qemu_set_irq(s->ivec_irqs[irq_num], 1); +} + +static inline void pbm_check_irqs(APBState *s) +{ + + unsigned int i; + + /* Previous request is not acknowledged, resubmit */ + if (s->irq_request != NO_IRQ_REQUEST) { + pbm_set_request(s, s->irq_request); + return; + } + /* no request pending */ + if (s->pci_irq_in == 0ULL) { + return; + } + for (i = 0; i < 32; i++) { + if (s->pci_irq_in & (1ULL << i)) { + if (s->pci_irq_map[i >> 2] & PBM_PCI_IMR_ENABLED) { + pbm_set_request(s, i); + return; + } + } + } + for (i = 32; i < 64; i++) { + if (s->pci_irq_in & (1ULL << i)) { + if (s->obio_irq_map[i - 32] & PBM_PCI_IMR_ENABLED) { + pbm_set_request(s, i); + break; + } + } + } +} + +static inline void pbm_clear_request(APBState *s, unsigned int irq_num) +{ + APB_DPRINTF("%s: clear request irq %d\n", __func__, irq_num); + qemu_set_irq(s->ivec_irqs[irq_num], 0); + s->irq_request = NO_IRQ_REQUEST; +} static void apb_config_writel (void *opaque, hwaddr addr, uint64_t val, unsigned size) @@ -105,24 +154,43 @@ static void apb_config_writel (void *opaque, hwaddr addr, break; case 0xc00 ... 0xc3f: /* PCI interrupt control */ if (addr & 4) { - s->pci_irq_map[(addr & 0x3f) >> 3] &= PBM_PCI_IMR_MASK; - s->pci_irq_map[(addr & 0x3f) >> 3] |= val & ~PBM_PCI_IMR_MASK; + unsigned int ino = (addr & 0x3f) >> 3; + s->pci_irq_map[ino] &= PBM_PCI_IMR_MASK; + s->pci_irq_map[ino] |= val & ~PBM_PCI_IMR_MASK; + if ((s->irq_request == ino) && !(val & ~PBM_PCI_IMR_MASK)) { + pbm_clear_request(s, ino); + } + pbm_check_irqs(s); } break; case 0x1000 ... 0x1080: /* OBIO interrupt control */ if (addr & 4) { - s->obio_irq_map[(addr & 0xff) >> 3] &= PBM_PCI_IMR_MASK; - s->obio_irq_map[(addr & 0xff) >> 3] |= val & ~PBM_PCI_IMR_MASK; + unsigned int ino = ((addr & 0xff) >> 3); + s->obio_irq_map[ino] &= PBM_PCI_IMR_MASK; + s->obio_irq_map[ino] |= val & ~PBM_PCI_IMR_MASK; + if ((s->irq_request == (ino | 0x20)) + && !(val & ~PBM_PCI_IMR_MASK)) { + pbm_clear_request(s, ino | 0x20); + } + pbm_check_irqs(s); } break; - case 0x1400 ... 0x143f: /* PCI interrupt clear */ + case 0x1400 ... 0x14ff: /* PCI interrupt clear */ if (addr & 4) { - pci_apb_set_irq(s, (addr & 0x3f) >> 3, 0); + unsigned int ino = (addr & 0xff) >> 5; + if ((s->irq_request / 4) == ino) { + pbm_clear_request(s, s->irq_request); + pbm_check_irqs(s); + } } break; case 0x1800 ... 0x1860: /* OBIO interrupt clear */ if (addr & 4) { - pci_apb_set_irq(s, 0x20 | ((addr & 0xff) >> 3), 0); + unsigned int ino = ((addr & 0xff) >> 3) | 0x20; + if (s->irq_request == ino) { + pbm_clear_request(s, ino); + pbm_check_irqs(s); + } } break; case 0x2000 ... 0x202f: /* PCI control */ @@ -297,30 +365,35 @@ static int pci_pbm_map_irq(PCIDevice *pci_dev, int irq_num) bus_offset = 16; else bus_offset = 0; - return bus_offset + irq_num; + return (bus_offset + (PCI_SLOT(pci_dev->devfn) << 2) + irq_num) & 0x1f; } static void pci_apb_set_irq(void *opaque, int irq_num, int level) { APBState *s = opaque; + APB_DPRINTF("%s: set irq_in %d level %d\n", __func__, irq_num, level); /* PCI IRQ map onto the first 32 INO. */ if (irq_num < 32) { - if (s->pci_irq_map[irq_num >> 2] & PBM_PCI_IMR_ENABLED) { - APB_DPRINTF("%s: set irq %d level %d\n", __func__, irq_num, level); - qemu_set_irq(s->ivec_irqs[irq_num], level); + if (level) { + s->pci_irq_in |= 1ULL << irq_num; + if (s->pci_irq_map[irq_num >> 2] & PBM_PCI_IMR_ENABLED) { + pbm_set_request(s, irq_num); + } } else { - APB_DPRINTF("%s: not enabled: lower irq %d\n", __func__, irq_num); - qemu_irq_lower(s->ivec_irqs[irq_num]); + s->pci_irq_in &= ~(1ULL << irq_num); } } else { - /* OBIO IRQ map onto the next 16 INO. */ - if (s->obio_irq_map[irq_num - 32] & PBM_PCI_IMR_ENABLED) { + /* OBIO IRQ map onto the next 32 INO. */ + if (level) { APB_DPRINTF("%s: set irq %d level %d\n", __func__, irq_num, level); - qemu_set_irq(s->ivec_irqs[irq_num], level); + s->pci_irq_in |= 1ULL << irq_num; + if ((s->irq_request == NO_IRQ_REQUEST) + && (s->obio_irq_map[irq_num - 32] & PBM_PCI_IMR_ENABLED)) { + pbm_set_request(s, irq_num); + } } else { - APB_DPRINTF("%s: not enabled: lower irq %d\n", __func__, irq_num); - qemu_irq_lower(s->ivec_irqs[irq_num]); + s->pci_irq_in &= ~(1ULL << irq_num); } } } @@ -420,6 +493,9 @@ static void pci_pbm_reset(DeviceState *d) s->obio_irq_map[i] &= PBM_PCI_IMR_MASK; } + s->irq_request = NO_IRQ_REQUEST; + s->pci_irq_in = 0ULL; + if (s->nr_resets++ == 0) { /* Power on reset */ s->reset_control = POR; @@ -445,6 +521,8 @@ static int pci_pbm_init_device(SysBusDevice *dev) s->obio_irq_map[i] = ((0x1f << 6) | 0x20) + i; } s->pbm_irqs = qemu_allocate_irqs(pci_apb_set_irq, s, MAX_IVEC); + s->irq_request = NO_IRQ_REQUEST; + s->pci_irq_in = 0ULL; /* apb_config */ memory_region_init_io(&s->apb_config, &apb_config_ops, s, "apb-config", diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index 7292ce1290..c85299588c 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -16,6 +16,7 @@ #include "sysemu/device_tree.h" #include "hw/pci/pci.h" #include "hw/ppc/openpic.h" +#include "kvm_ppc.h" static void e500plat_fixup_devtree(PPCE500Params *params, void *fdt) { @@ -48,6 +49,12 @@ static void e500plat_init(QEMUMachineInitArgs *args) .mpic_version = OPENPIC_MODEL_FSL_MPIC_42, }; + /* Older KVM versions don't support EPR which breaks guests when we announce + MPIC variants that support EPR. Revert to an older one for those */ + if (kvm_enabled() && !kvmppc_has_cap_epr()) { + params.mpic_version = OPENPIC_MODEL_FSL_MPIC_20; + } + ppce500_init(¶ms); } diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 4a9b8837e3..ce44e95d53 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -126,6 +126,8 @@ static void ppc_core99_reset(void *opaque) PowerPCCPU *cpu = opaque; cpu_reset(CPU(cpu)); + /* 970 CPUs want to get their initial IP as part of their boot protocol */ + cpu->env.nip = PROM_ADDR + 0x100; } /* PowerPC Mac99 hardware initialisation */ diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c index 585f53b912..e41b036b8e 100644 --- a/hw/ppc/ppc_booke.c +++ b/hw/ppc/ppc_booke.c @@ -28,6 +28,7 @@ #include "hw/timer/m48t59.h" #include "qemu/log.h" #include "hw/loader.h" +#include "kvm_ppc.h" /* Timer Control Register */ @@ -211,6 +212,7 @@ void store_booke_tsr(CPUPPCState *env, target_ulong val) PowerPCCPU *cpu = ppc_env_get_cpu(env); env->spr[SPR_BOOKE_TSR] &= ~val; + kvmppc_clear_tsr_bits(cpu, val); booke_update_irq(cpu); } @@ -222,6 +224,7 @@ void store_booke_tcr(CPUPPCState *env, target_ulong val) tb_env = env->tb_env; env->spr[SPR_BOOKE_TCR] = val; + kvmppc_set_tcr(cpu); booke_update_irq(cpu); @@ -234,7 +237,6 @@ void store_booke_tcr(CPUPPCState *env, target_ulong val) booke_get_wdt_target(env, tb_env), &booke_timer->wdt_next, booke_timer->wdt_timer); - } static void ppc_booke_timer_reset_handle(void *opaque) @@ -242,16 +244,39 @@ static void ppc_booke_timer_reset_handle(void *opaque) PowerPCCPU *cpu = opaque; CPUPPCState *env = &cpu->env; - env->spr[SPR_BOOKE_TSR] = 0; - env->spr[SPR_BOOKE_TCR] = 0; + store_booke_tcr(env, 0); + store_booke_tsr(env, -1); +} - booke_update_irq(cpu); +/* + * This function will be called whenever the CPU state changes. + * CPU states are defined "typedef enum RunState". + * Regarding timer, When CPU state changes to running after debug halt + * or similar cases which takes time then in between final watchdog + * expiry happenes. This will cause exit to QEMU and configured watchdog + * action will be taken. To avoid this we always clear the watchdog state when + * state changes to running. + */ +static void cpu_state_change_handler(void *opaque, int running, RunState state) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + + if (!running) { + return; + } + + /* + * Clear watchdog interrupt condition by clearing TSR. + */ + store_booke_tsr(env, TSR_ENW | TSR_WIS | TSR_WRS_MASK); } void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags) { ppc_tb_t *tb_env; booke_timer_t *booke_timer; + int ret = 0; tb_env = g_malloc0(sizeof(ppc_tb_t)); booke_timer = g_malloc0(sizeof(booke_timer_t)); @@ -269,5 +294,17 @@ void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags) booke_timer->wdt_timer = qemu_new_timer_ns(vm_clock, &booke_wdt_cb, cpu); + ret = kvmppc_booke_watchdog_enable(cpu); + + if (ret) { + /* TODO: Start the QEMU emulated watchdog if not running on KVM. + * Also start the QEMU emulated watchdog if KVM does not support + * emulated watchdog or somehow it is not enabled (supported but + * not enabled is though some bug and requires debugging :)). + */ + } + + qemu_add_vm_change_state_handler(cpu_state_change_handler, cpu); + qemu_register_reset(ppc_booke_timer_reset_handle, cpu); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 7a425011da..c96ac8131f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -308,6 +308,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model, for (env = first_cpu; env != NULL; env = env->next_cpu) { CPUState *cpu = CPU(ppc_env_get_cpu(env)); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); int index = cpu->cpu_index; uint32_t servers_prop[smp_threads]; uint32_t gservers_prop[smp_threads * 2]; @@ -333,10 +334,26 @@ static void *spapr_create_fdt_skel(const char *cpu_model, _FDT((fdt_property_string(fdt, "device_type", "cpu"))); _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR]))); - _FDT((fdt_property_cell(fdt, "dcache-block-size", + _FDT((fdt_property_cell(fdt, "d-cache-block-size", env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "icache-block-size", + _FDT((fdt_property_cell(fdt, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_property_cell(fdt, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_property_cell(fdt, "i-cache-line-size", env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); + } + _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq))); _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq))); _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); @@ -801,8 +818,10 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); - /* PAPR always has exception vectors in RAM not ROM */ - env->hreset_excp_prefix = 0; + /* PAPR always has exception vectors in RAM not ROM. To ensure this, + * MSR[IP] should never be set. + */ + env->msr_mask &= ~(1 << 6); /* Tell KVM that we're in PAPR mode */ if (kvm_enabled()) { diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 4dbc31541b..1405c3202c 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -53,9 +53,29 @@ static Property spapr_vio_props[] = { DEFINE_PROP_END_OF_LIST(), }; +static char *spapr_vio_get_dev_name(DeviceState *qdev) +{ + VIOsPAPRDevice *dev = VIO_SPAPR_DEVICE(qdev); + VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + char *name; + + /* Device tree style name device@reg */ + name = g_strdup_printf("%s@%x", pc->dt_name, dev->reg); + + return name; +} + +static void spapr_vio_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + k->get_dev_path = spapr_vio_get_dev_name; +} + static const TypeInfo spapr_vio_bus_info = { .name = TYPE_SPAPR_VIO_BUS, .parent = TYPE_BUS, + .class_init = spapr_vio_bus_class_init, .instance_size = sizeof(VIOsPAPRBus), }; @@ -74,17 +94,6 @@ VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg) return NULL; } -static char *vio_format_dev_name(VIOsPAPRDevice *dev) -{ - VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - char *name; - - /* Device tree style name device@reg */ - name = g_strdup_printf("%s@%x", pc->dt_name, dev->reg); - - return name; -} - #ifdef CONFIG_FDT static int vio_make_devnode(VIOsPAPRDevice *dev, void *fdt) @@ -98,7 +107,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev, return vdevice_off; } - dt_name = vio_format_dev_name(dev); + dt_name = spapr_vio_get_dev_name(DEVICE(dev)); node_off = fdt_add_subnode(fdt, vdevice_off, dt_name); g_free(dt_name); if (node_off < 0) { @@ -379,7 +388,7 @@ static VIOsPAPRDevice *reg_conflict(VIOsPAPRDevice *dev) * the given dev might already be in the list. */ QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { - other = DO_UPCAST(VIOsPAPRDevice, qdev, kid->child); + other = VIO_SPAPR_DEVICE(kid->child); if (other != dev && other->reg == dev->reg) { return other; @@ -391,7 +400,7 @@ static VIOsPAPRDevice *reg_conflict(VIOsPAPRDevice *dev) static void spapr_vio_busdev_reset(DeviceState *qdev) { - VIOsPAPRDevice *dev = DO_UPCAST(VIOsPAPRDevice, qdev, qdev); + VIOsPAPRDevice *dev = VIO_SPAPR_DEVICE(qdev); VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); /* Shut down the request queue and TCEs if necessary */ @@ -437,7 +446,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) /* Don't overwrite ids assigned on the command line */ if (!dev->qdev.id) { - id = vio_format_dev_name(dev); + id = spapr_vio_get_dev_name(DEVICE(dev)); dev->qdev.id = id; } @@ -636,7 +645,7 @@ int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus) return offset; } - name = vio_format_dev_name(dev); + name = spapr_vio_get_dev_name(DEVICE(dev)); path = g_strdup_printf("/vdevice/%s", name); ret = fdt_setprop_string(fdt, offset, "linux,stdout-path", path); diff --git a/hw/ppc/xics.c b/hw/ppc/xics.c index 8e1e85edfd..1b25075d14 100644 --- a/hw/ppc/xics.c +++ b/hw/ppc/xics.c @@ -101,6 +101,7 @@ static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr) if (XISR(ss) && (cppr <= ss->pending_priority)) { old_xisr = XISR(ss); ss->xirr &= ~XISR_MASK; /* Clear XISR */ + ss->pending_priority = 0xff; qemu_irq_lower(ss->output); ics_reject(icp->ics, old_xisr); } @@ -127,6 +128,7 @@ static uint32_t icp_accept(struct icp_server_state *ss) qemu_irq_lower(ss->output); ss->xirr = ss->pending_priority << 24; + ss->pending_priority = 0xff; trace_xics_icp_accept(xirr, ss->xirr); diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c index 999a463a84..3d322d5d1e 100644 --- a/hw/scsi/spapr_vscsi.c +++ b/hw/scsi/spapr_vscsi.c @@ -91,6 +91,9 @@ typedef struct vscsi_req { int total_desc; } vscsi_req; +#define TYPE_VIO_SPAPR_VSCSI_DEVICE "spapr-vscsi" +#define VIO_SPAPR_VSCSI_DEVICE(obj) \ + OBJECT_CHECK(VSCSIState, (obj), TYPE_VIO_SPAPR_VSCSI_DEVICE) typedef struct { VIOsPAPRDevice vdev; @@ -461,7 +464,7 @@ static int vscsi_preprocess_desc(vscsi_req *req) /* Callback to indicate that the SCSI layer has completed a transfer. */ static void vscsi_transfer_data(SCSIRequest *sreq, uint32_t len) { - VSCSIState *s = DO_UPCAST(VSCSIState, vdev.qdev, sreq->bus->qbus.parent); + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(sreq->bus->qbus.parent); vscsi_req *req = sreq->hba_private; uint8_t *buf; int rc = 0; @@ -492,7 +495,7 @@ static void vscsi_transfer_data(SCSIRequest *sreq, uint32_t len) /* Callback to indicate that the SCSI layer has completed a transfer. */ static void vscsi_command_complete(SCSIRequest *sreq, uint32_t status, size_t resid) { - VSCSIState *s = DO_UPCAST(VSCSIState, vdev.qdev, sreq->bus->qbus.parent); + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(sreq->bus->qbus.parent); vscsi_req *req = sreq->hba_private; int32_t res_in = 0, res_out = 0; @@ -827,7 +830,7 @@ static void vscsi_got_payload(VSCSIState *s, vscsi_crq *crq) static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data) { - VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(dev); vscsi_crq crq; memcpy(crq.raw, crq_data, 16); @@ -897,7 +900,7 @@ static const struct SCSIBusInfo vscsi_scsi_info = { static void spapr_vscsi_reset(VIOsPAPRDevice *dev) { - VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(dev); int i; memset(s->reqs, 0, sizeof(s->reqs)); @@ -908,7 +911,7 @@ static void spapr_vscsi_reset(VIOsPAPRDevice *dev) static int spapr_vscsi_init(VIOsPAPRDevice *dev) { - VSCSIState *s = DO_UPCAST(VSCSIState, vdev, dev); + VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(dev); dev->crq.SendFunc = vscsi_do_crq; @@ -968,7 +971,7 @@ static void spapr_vscsi_class_init(ObjectClass *klass, void *data) } static const TypeInfo spapr_vscsi_info = { - .name = "spapr-vscsi", + .name = TYPE_VIO_SPAPR_VSCSI_DEVICE, .parent = TYPE_VIO_SPAPR_DEVICE, .instance_size = sizeof(VSCSIState), .class_init = spapr_vscsi_class_init, diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 0d29620094..5c2bbd44f5 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -81,7 +81,7 @@ #define FW_CFG_SPARC64_HEIGHT (FW_CFG_ARCH_LOCAL + 0x01) #define FW_CFG_SPARC64_DEPTH (FW_CFG_ARCH_LOCAL + 0x02) -#define IVEC_MAX 0x30 +#define IVEC_MAX 0x40 #define TICK_MAX 0x7fffffffffffffffULL diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c index 094284d664..deb6d4703b 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c @@ -503,9 +503,15 @@ static int emulated_initfn(CCIDCardState *base) if (init_pipe_signaling(card) < 0) { return -1; } - card->backend = parse_enumeration(card->backend_str, backend_enum_table, 0); + + card->backend = 0; + if (card->backend_str) { + card->backend = parse_enumeration(card->backend_str, + backend_enum_table, 0); + } + if (card->backend == 0) { - printf("unknown backend, must be one of:\n"); + printf("backend must be one of:\n"); for (ptable = backend_enum_table; ptable->name != NULL; ++ptable) { printf("%s\n", ptable->name); } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index e856191e40..6362074e9c 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -338,6 +338,23 @@ extern uintptr_t tci_tb_ptr; # elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64) # define GETRA() ((uintptr_t)__builtin_return_address(0)) # define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1)) +# elif defined(__arm__) +/* We define two insns between the return address and the branch back to + straight-line. Find and decode that branch insn. */ +# define GETRA() ((uintptr_t)__builtin_return_address(0)) +# define GETPC_LDST() tcg_getpc_ldst(GETRA()) +static inline uintptr_t tcg_getpc_ldst(uintptr_t ra) +{ + int32_t b; + ra += 8; /* skip the two insns */ + b = *(int32_t *)ra; /* load the branch insn */ + b = (b << 8) >> (8 - 2); /* extract the displacement */ + ra += 8; /* branches are relative to pc+8 */ + ra += b; /* apply the displacement */ + ra -= 4; /* return a pointer into the current opcode, + not the start of the next opcode */ + return ra; +} # else # error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!" # endif diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 16064d00ad..ef072b1a6e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -417,4 +417,9 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) +/* Timer Status Register OR/CLEAR interface */ +#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) +#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) +#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) +#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index caca979c49..eb02d8a837 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -449,12 +449,15 @@ enum { kvm_ioeventfd_flag_nr_datamatch, kvm_ioeventfd_flag_nr_pio, kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_max, }; #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h index c27cef7e32..eb03a00799 100644 --- a/target-ppc/cpu-qom.h +++ b/target-ppc/cpu-qom.h @@ -63,6 +63,7 @@ typedef struct PowerPCCPUClass { powerpc_input_t bus_model; uint32_t flags; int bfd_mach; + uint32_t l1_dcache_size, l1_icache_size; #if defined(TARGET_PPC64) const struct ppc_segment_page_sizes *sps; #endif diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 42c36e2829..7cacb56bc5 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1026,7 +1026,6 @@ struct CPUPPCState { /* Exception vectors */ target_ulong excp_vectors[POWERPC_EXCP_NB]; target_ulong excp_prefix; - target_ulong hreset_excp_prefix; target_ulong ivor_mask; target_ulong ivpr_mask; target_ulong hreset_vector; @@ -1446,6 +1445,7 @@ static inline void cpu_clone_regs(CPUPPCState *env, target_ulong newsp) #define SPR_PERF2 (0x302) #define SPR_RCPU_MI_RBA2 (0x302) #define SPR_MPC_MI_AP (0x302) +#define SPR_MMCRA (0x302) #define SPR_PERF3 (0x303) #define SPR_RCPU_MI_RBA3 (0x303) #define SPR_MPC_MI_EPN (0x303) @@ -1870,8 +1870,10 @@ enum { PPC2_PRCNTL = 0x0000000000000008ULL, /* Byte-reversed, indexed, double-word load and store */ PPC2_DBRX = 0x0000000000000010ULL, + /* Book I 2.05 PowerPC specification */ + PPC2_ISA205 = 0x0000000000000020ULL, -#define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_DBRX) +#define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_DBRX | PPC2_ISA205) }; /*****************************************************************************/ diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c index 9e779eace6..4f6021835f 100644 --- a/target-ppc/fpu_helper.c +++ b/target-ppc/fpu_helper.c @@ -430,20 +430,17 @@ void helper_fpscr_setbit(CPUPPCState *env, uint32_t bit) void helper_store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask) { - /* - * We use only the 32 LSB of the incoming fpr - */ - uint32_t prev, new; + target_ulong prev, new; int i; prev = env->fpscr; - new = (uint32_t)arg; - new &= ~0x60000000; - new |= prev & 0x60000000; - for (i = 0; i < 8; i++) { + new = (target_ulong)arg; + new &= ~0x60000000LL; + new |= prev & 0x60000000LL; + for (i = 0; i < sizeof(target_ulong) * 2; i++) { if (mask & (1 << i)) { - env->fpscr &= ~(0xF << (4 * i)); - env->fpscr |= new & (0xF << (4 * i)); + env->fpscr &= ~(0xFLL << (4 * i)); + env->fpscr |= new & (0xFLL << (4 * i)); } } /* Update VX and FEX */ @@ -470,6 +467,18 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask) void helper_float_check_status(CPUPPCState *env) { + int status = get_float_exception_flags(&env->fp_status); + + if (status & float_flag_divbyzero) { + float_zero_divide_excp(env); + } else if (status & float_flag_overflow) { + float_overflow_excp(env); + } else if (status & float_flag_underflow) { + float_underflow_excp(env); + } else if (status & float_flag_inexact) { + float_inexact_excp(env); + } + if (env->exception_index == POWERPC_EXCP_PROGRAM && (env->error_code & POWERPC_EXCP_FP)) { /* Differred floating-point exception after target FPR update */ @@ -477,17 +486,6 @@ void helper_float_check_status(CPUPPCState *env) helper_raise_exception_err(env, env->exception_index, env->error_code); } - } else { - int status = get_float_exception_flags(&env->fp_status); - if (status & float_flag_divbyzero) { - float_zero_divide_excp(env); - } else if (status & float_flag_overflow) { - float_overflow_excp(env); - } else if (status & float_flag_underflow) { - float_underflow_excp(env); - } else if (status & float_flag_inexact) { - float_inexact_excp(env); - } } } @@ -595,37 +593,6 @@ uint64_t helper_fdiv(CPUPPCState *env, uint64_t arg1, uint64_t arg2) return farg1.ll; } -/* fabs */ -uint64_t helper_fabs(CPUPPCState *env, uint64_t arg) -{ - CPU_DoubleU farg; - - farg.ll = arg; - farg.d = float64_abs(farg.d); - return farg.ll; -} - -/* fnabs */ -uint64_t helper_fnabs(CPUPPCState *env, uint64_t arg) -{ - CPU_DoubleU farg; - - farg.ll = arg; - farg.d = float64_abs(farg.d); - farg.d = float64_chs(farg.d); - return farg.ll; -} - -/* fneg */ -uint64_t helper_fneg(CPUPPCState *env, uint64_t arg) -{ - CPU_DoubleU farg; - - farg.ll = arg; - farg.d = float64_chs(farg.d); - return farg.ll; -} - /* fctiw - fctiw. */ uint64_t helper_fctiw(CPUPPCState *env, uint64_t arg) { diff --git a/target-ppc/helper.h b/target-ppc/helper.h index d33ee66b53..56814b501f 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -36,6 +36,7 @@ DEF_HELPER_3(mulldo, i64, env, i64, i64) DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_3(sraw, tl, env, tl, tl) #if defined(TARGET_PPC64) DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl) @@ -80,9 +81,6 @@ DEF_HELPER_4(fmadd, i64, env, i64, i64, i64) DEF_HELPER_4(fmsub, i64, env, i64, i64, i64) DEF_HELPER_4(fnmadd, i64, env, i64, i64, i64) DEF_HELPER_4(fnmsub, i64, env, i64, i64, i64) -DEF_HELPER_2(fabs, i64, env, i64) -DEF_HELPER_2(fnabs, i64, env, i64) -DEF_HELPER_2(fneg, i64, env, i64) DEF_HELPER_2(fsqrt, i64, env, i64) DEF_HELPER_2(fre, i64, env, i64) DEF_HELPER_2(fres, i64, env, i64) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 54eca9bbee..e50bdd20ec 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -53,6 +53,21 @@ target_ulong helper_cntlzd(target_ulong t) } #endif +target_ulong helper_cmpb(target_ulong rs, target_ulong rb) +{ + target_ulong mask = 0xff; + target_ulong ra = 0; + int i; + + for (i = 0; i < sizeof(target_ulong); i++) { + if ((rs & mask) == (rb & mask)) { + ra |= mask; + } + mask <<= 8; + } + return ra; +} + /* shift right arithmetic helper */ target_ulong helper_sraw(CPUPPCState *env, target_ulong value, target_ulong shift) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 5e9dddbb54..725071e6a7 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -37,6 +37,7 @@ #include "hw/sysbus.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" +#include "sysemu/watchdog.h" //#define DEBUG_KVM @@ -63,6 +64,9 @@ static int cap_ppc_rma; static int cap_spapr_tce; static int cap_hior; static int cap_one_reg; +static int cap_epr; +static int cap_ppc_watchdog; +static int cap_papr; /* XXX We have a race condition where we actually have a level triggered * interrupt, but the infrastructure can't expose that yet, so the guest @@ -95,6 +99,10 @@ int kvm_arch_init(KVMState *s) cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); + cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); + cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); + /* Note: we don't set cap_papr here, because this capability is + * only activated after this by kvmppc_set_papr() */ if (!cap_interrupt_level) { fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " @@ -652,6 +660,103 @@ static int kvm_get_fp(CPUState *cs) return 0; } +#if defined(TARGET_PPC64) +static int kvm_get_vpa(CPUState *cs) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + struct kvm_one_reg reg; + int ret; + + reg.id = KVM_REG_PPC_VPA_ADDR; + reg.addr = (uintptr_t)&env->vpa_addr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno)); + return ret; + } + + assert((uintptr_t)&env->slb_shadow_size + == ((uintptr_t)&env->slb_shadow_addr + 8)); + reg.id = KVM_REG_PPC_VPA_SLB; + reg.addr = (uintptr_t)&env->slb_shadow_addr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to get SLB shadow state from KVM: %s\n", + strerror(errno)); + return ret; + } + + assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); + reg.id = KVM_REG_PPC_VPA_DTL; + reg.addr = (uintptr_t)&env->dtl_addr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to get dispatch trace log state from KVM: %s\n", + strerror(errno)); + return ret; + } + + return 0; +} + +static int kvm_put_vpa(CPUState *cs) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + struct kvm_one_reg reg; + int ret; + + /* SLB shadow or DTL can't be registered unless a master VPA is + * registered. That means when restoring state, if a VPA *is* + * registered, we need to set that up first. If not, we need to + * deregister the others before deregistering the master VPA */ + assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); + + if (env->vpa_addr) { + reg.id = KVM_REG_PPC_VPA_ADDR; + reg.addr = (uintptr_t)&env->vpa_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno)); + return ret; + } + } + + assert((uintptr_t)&env->slb_shadow_size + == ((uintptr_t)&env->slb_shadow_addr + 8)); + reg.id = KVM_REG_PPC_VPA_SLB; + reg.addr = (uintptr_t)&env->slb_shadow_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); + return ret; + } + + assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); + reg.id = KVM_REG_PPC_VPA_DTL; + reg.addr = (uintptr_t)&env->dtl_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to set dispatch trace log state to KVM: %s\n", + strerror(errno)); + return ret; + } + + if (!env->vpa_addr) { + reg.id = KVM_REG_PPC_VPA_ADDR; + reg.addr = (uintptr_t)&env->vpa_addr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret < 0) { + dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno)); + return ret; + } + } + + return 0; +} +#endif /* TARGET_PPC64 */ + int kvm_arch_put_registers(CPUState *cs, int level) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -752,6 +857,14 @@ int kvm_arch_put_registers(CPUState *cs, int level) kvm_put_one_spr(cs, id, i); } } + +#ifdef TARGET_PPC64 + if (cap_papr) { + if (kvm_put_vpa(cs) < 0) { + dprintf("Warning: Unable to set VPA information to KVM\n"); + } + } +#endif /* TARGET_PPC64 */ } return ret; @@ -953,6 +1066,14 @@ int kvm_arch_get_registers(CPUState *cs) kvm_get_one_spr(cs, id, i); } } + +#ifdef TARGET_PPC64 + if (cap_papr) { + if (kvm_get_vpa(cs) < 0) { + dprintf("Warning: Unable to get VPA information from KVM\n"); + } + } +#endif } return 0; @@ -1092,6 +1213,12 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) run->epr.epr = ldl_phys(env->mpic_iack); ret = 0; break; + case KVM_EXIT_WATCHDOG: + dprintf("handle watchdog expiry\n"); + watchdog_perform_action(); + ret = 0; + break; + default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; @@ -1101,6 +1228,71 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) return ret; } +int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) +{ + CPUState *cs = CPU(cpu); + uint32_t bits = tsr_bits; + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_OR_TSR, + .addr = (uintptr_t) &bits, + }; + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + +int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) +{ + + CPUState *cs = CPU(cpu); + uint32_t bits = tsr_bits; + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_CLEAR_TSR, + .addr = (uintptr_t) &bits, + }; + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + +int kvmppc_set_tcr(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + uint32_t tcr = env->spr[SPR_BOOKE_TCR]; + + struct kvm_one_reg reg = { + .id = KVM_REG_PPC_TCR, + .addr = (uintptr_t) &tcr, + }; + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + +int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + struct kvm_enable_cap encap = {}; + int ret; + + if (!kvm_enabled()) { + return -1; + } + + if (!cap_ppc_watchdog) { + printf("warning: KVM does not support watchdog"); + return -1; + } + + encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG; + ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap); + if (ret < 0) { + fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", + __func__, strerror(-ret)); + return ret; + } + + return ret; +} + static int read_cpuinfo(const char *field, char *value, int len) { FILE *f; @@ -1301,6 +1493,10 @@ void kvmppc_set_papr(PowerPCCPU *cpu) if (ret) { cpu_abort(env, "This KVM version does not support PAPR\n"); } + + /* Update the capability flag so we sync the right information + * with kvm */ + cap_papr = 1; } void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) @@ -1370,11 +1566,35 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) { + struct kvm_ppc_smmu_info info; + long rampagesize, best_page_shift; + int i; + if (cap_ppc_rma >= 2) { return current_size; } + + /* Find the largest hardware supported page size that's less than + * or equal to the (logical) backing page size of guest RAM */ + kvm_get_smmu_info(ppc_env_get_cpu(first_cpu), &info); + rampagesize = getrampagesize(); + best_page_shift = 0; + + for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { + struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; + + if (!sps->page_shift) { + continue; + } + + if ((sps->page_shift > best_page_shift) + && ((1UL << sps->page_shift) <= rampagesize)) { + best_page_shift = sps->page_shift; + } + } + return MIN(current_size, - getrampagesize() << (hash_shift - 7)); + 1ULL << (best_page_shift + hash_shift - 7)); } #endif @@ -1503,6 +1723,8 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); uint32_t vmx = kvmppc_get_vmx(); uint32_t dfp = kvmppc_get_dfp(); + uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); + uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); /* Now fix up the class with information we can query from the host */ @@ -1515,6 +1737,14 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) /* Only override when we know what the host supports */ alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); } + + if (dcache_size != -1) { + pcc->l1_dcache_size = dcache_size; + } + + if (icache_size != -1) { + pcc->l1_icache_size = icache_size; + } } int kvmppc_fixup_cpu(PowerPCCPU *cpu) @@ -1530,6 +1760,11 @@ int kvmppc_fixup_cpu(PowerPCCPU *cpu) return 0; } +bool kvmppc_has_cap_epr(void) +{ + return cap_epr; +} + static int kvm_ppc_register_host_cpu_type(void) { TypeInfo type_info = { diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 6bcc5cc36c..771cfbe82b 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -25,6 +25,10 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level); void kvmppc_set_papr(PowerPCCPU *cpu); void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); int kvmppc_smt_threads(void); +int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits); +int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits); +int kvmppc_set_tcr(PowerPCCPU *cpu); +int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); #ifndef CONFIG_USER_ONLY off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem); void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd); @@ -33,6 +37,7 @@ int kvmppc_reset_htab(int shift_hint); uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); #endif /* !CONFIG_USER_ONLY */ int kvmppc_fixup_cpu(PowerPCCPU *cpu); +bool kvmppc_has_cap_epr(void); #else @@ -89,6 +94,26 @@ static inline int kvmppc_smt_threads(void) return 1; } +static inline int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) +{ + return 0; +} + +static inline int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) +{ + return 0; +} + +static inline int kvmppc_set_tcr(PowerPCCPU *cpu) +{ + return 0; +} + +static inline int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) +{ + return -1; +} + #ifndef CONFIG_USER_ONLY static inline off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem) { @@ -129,6 +154,11 @@ static inline int kvmppc_fixup_cpu(PowerPCCPU *cpu) { return -1; } + +static inline bool kvmppc_has_cap_epr(void) +{ + return false; +} #endif #ifndef CONFIG_KVM diff --git a/target-ppc/machine.c b/target-ppc/machine.c index 235b0d5f49..2d10adb60d 100644 --- a/target-ppc/machine.c +++ b/target-ppc/machine.c @@ -78,7 +78,6 @@ void cpu_save(QEMUFile *f, void *opaque) for (i = 0; i < POWERPC_EXCP_NB; i++) qemu_put_betls(f, &env->excp_vectors[i]); qemu_put_betls(f, &env->excp_prefix); - qemu_put_betls(f, &env->hreset_excp_prefix); qemu_put_betls(f, &env->ivor_mask); qemu_put_betls(f, &env->ivpr_mask); qemu_put_betls(f, &env->hreset_vector); @@ -167,7 +166,6 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) for (i = 0; i < POWERPC_EXCP_NB; i++) qemu_get_betls(f, &env->excp_vectors[i]); qemu_get_betls(f, &env->excp_prefix); - qemu_get_betls(f, &env->hreset_excp_prefix); qemu_get_betls(f, &env->ivor_mask); qemu_get_betls(f, &env->ivpr_mask); qemu_get_betls(f, &env->hreset_vector); diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c index 9783e52b0c..d8e63ca7d2 100644 --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -140,7 +140,7 @@ void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t is_dcbzl) { int dcbz_size = env->dcache_line_size; -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +#if defined(TARGET_PPC64) if (!is_dcbzl && (env->excp_model == POWERPC_EXCP_970) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) { diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 5e741d1ab4..1a84653983 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -202,6 +202,8 @@ typedef struct DisasContext { int spe_enabled; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; + uint64_t insns_flags; + uint64_t insns_flags2; } DisasContext; /* True when active word size < size of target_long. */ @@ -423,9 +425,14 @@ EXTRACT_HELPER(ME, 1, 5); EXTRACT_HELPER(TO, 21, 5); EXTRACT_HELPER(CRM, 12, 8); -EXTRACT_HELPER(FM, 17, 8); EXTRACT_HELPER(SR, 16, 4); + +/* mtfsf/mtfsfi */ +EXTRACT_HELPER(FPBF, 19, 3); EXTRACT_HELPER(FPIMM, 12, 4); +EXTRACT_HELPER(FPL, 21, 1); +EXTRACT_HELPER(FPFLM, 17, 8); +EXTRACT_HELPER(FPW, 16, 1); /*** Jump target decoding ***/ /* Displacement */ @@ -739,6 +746,13 @@ static void gen_isel(DisasContext *ctx) tcg_temp_free_i32(t0); } +/* cmpb: PowerPC 2.05 specification */ +static void gen_cmpb(DisasContext *ctx) +{ + gen_helper_cmpb(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], + cpu_gpr[rB(ctx->opcode)]); +} + /*** Integer arithmetic ***/ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0, @@ -746,7 +760,7 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0, { TCGv t0 = tcg_temp_new(); - tcg_gen_xor_tl(cpu_ov, arg0, arg1); + tcg_gen_xor_tl(cpu_ov, arg0, arg2); tcg_gen_xor_tl(t0, arg1, arg2); if (sub) { tcg_gen_and_tl(cpu_ov, cpu_ov, t0); @@ -768,22 +782,25 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, { TCGv t0 = ret; - if (((compute_ca && add_ca) || compute_ov) - && (TCGV_EQUAL(ret, arg1) || TCGV_EQUAL(ret, arg2))) { + if (compute_ca || compute_ov) { t0 = tcg_temp_new(); } if (compute_ca) { if (NARROW_MODE(ctx)) { + /* Caution: a non-obvious corner case of the spec is that we + must produce the *entire* 64-bit addition, but produce the + carry into bit 32. */ TCGv t1 = tcg_temp_new(); - tcg_gen_ext32u_tl(t1, arg2); - tcg_gen_ext32u_tl(t0, arg1); - tcg_gen_add_tl(t0, t0, t1); - tcg_temp_free(t1); + tcg_gen_xor_tl(t1, arg1, arg2); /* add without carry */ + tcg_gen_add_tl(t0, arg1, arg2); if (add_ca) { tcg_gen_add_tl(t0, t0, cpu_ca); } - tcg_gen_shri_tl(cpu_ca, t0, 32); + tcg_gen_xor_tl(cpu_ca, t0, t1); /* bits changed w/ carry */ + tcg_temp_free(t1); + tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */ + tcg_gen_andi_tl(cpu_ca, cpu_ca, 1); } else { TCGv zero = tcg_const_tl(0); if (add_ca) { @@ -1122,24 +1139,30 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, { TCGv t0 = ret; - if (compute_ov && (TCGV_EQUAL(ret, arg1) || TCGV_EQUAL(ret, arg2))) { + if (compute_ca || compute_ov) { t0 = tcg_temp_new(); } if (compute_ca) { /* dest = ~arg1 + arg2 [+ ca]. */ if (NARROW_MODE(ctx)) { + /* Caution: a non-obvious corner case of the spec is that we + must produce the *entire* 64-bit addition, but produce the + carry into bit 32. */ TCGv inv1 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); tcg_gen_not_tl(inv1, arg1); - tcg_gen_ext32u_tl(t0, arg2); - tcg_gen_ext32u_tl(inv1, inv1); if (add_ca) { - tcg_gen_add_tl(t0, t0, cpu_ca); + tcg_gen_add_tl(t0, arg2, cpu_ca); } else { - tcg_gen_addi_tl(t0, t0, 1); + tcg_gen_addi_tl(t0, arg2, 1); } + tcg_gen_xor_tl(t1, arg2, inv1); /* add without carry */ tcg_gen_add_tl(t0, t0, inv1); - tcg_gen_shri_tl(cpu_ca, t0, 32); + tcg_gen_xor_tl(cpu_ca, t0, t1); /* bits changes w/ carry */ + tcg_temp_free(t1); + tcg_gen_shri_tl(cpu_ca, cpu_ca, 32); /* extract bit 32 */ + tcg_gen_andi_tl(cpu_ca, cpu_ca, 1); } else if (add_ca) { TCGv zero, inv1 = tcg_temp_new(); tcg_gen_not_tl(inv1, arg1); @@ -1451,6 +1474,38 @@ static void gen_popcntd(DisasContext *ctx) } #endif +/* prtyw: PowerPC 2.05 specification */ +static void gen_prtyw(DisasContext *ctx) +{ + TCGv ra = cpu_gpr[rA(ctx->opcode)]; + TCGv rs = cpu_gpr[rS(ctx->opcode)]; + TCGv t0 = tcg_temp_new(); + tcg_gen_shri_tl(t0, rs, 16); + tcg_gen_xor_tl(ra, rs, t0); + tcg_gen_shri_tl(t0, ra, 8); + tcg_gen_xor_tl(ra, ra, t0); + tcg_gen_andi_tl(ra, ra, (target_ulong)0x100000001ULL); + tcg_temp_free(t0); +} + +#if defined(TARGET_PPC64) +/* prtyd: PowerPC 2.05 specification */ +static void gen_prtyd(DisasContext *ctx) +{ + TCGv ra = cpu_gpr[rA(ctx->opcode)]; + TCGv rs = cpu_gpr[rS(ctx->opcode)]; + TCGv t0 = tcg_temp_new(); + tcg_gen_shri_tl(t0, rs, 32); + tcg_gen_xor_tl(ra, rs, t0); + tcg_gen_shri_tl(t0, ra, 16); + tcg_gen_xor_tl(ra, ra, t0); + tcg_gen_shri_tl(t0, ra, 8); + tcg_gen_xor_tl(ra, ra, t0); + tcg_gen_andi_tl(ra, ra, 1); + tcg_temp_free(t0); +} +#endif + #if defined(TARGET_PPC64) /* extsw & extsw. */ GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B); @@ -2161,7 +2216,16 @@ static void gen_fcmpu(DisasContext *ctx) /*** Floating-point move ***/ /* fabs */ /* XXX: beware that fabs never checks for NaNs nor update FPSCR */ -GEN_FLOAT_B(abs, 0x08, 0x08, 0, PPC_FLOAT); +static void gen_fabs(DisasContext *ctx) +{ + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + tcg_gen_andi_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], + ~(1ULL << 63)); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); +} /* fmr - fmr. */ /* XXX: beware that fmr never checks for NaNs nor update FPSCR */ @@ -2177,10 +2241,42 @@ static void gen_fmr(DisasContext *ctx) /* fnabs */ /* XXX: beware that fnabs never checks for NaNs nor update FPSCR */ -GEN_FLOAT_B(nabs, 0x08, 0x04, 0, PPC_FLOAT); +static void gen_fnabs(DisasContext *ctx) +{ + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + tcg_gen_ori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], + 1ULL << 63); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); +} + /* fneg */ /* XXX: beware that fneg never checks for NaNs nor update FPSCR */ -GEN_FLOAT_B(neg, 0x08, 0x01, 0, PPC_FLOAT); +static void gen_fneg(DisasContext *ctx) +{ + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + tcg_gen_xori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], + 1ULL << 63); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); +} + +/* fcpsgn: PowerPC 2.05 specification */ +/* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */ +static void gen_fcpsgn(DisasContext *ctx) +{ + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)], + cpu_fpr[rB(ctx->opcode)], 0, 63); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); +} /*** Floating-Point status & ctrl register ***/ @@ -2271,19 +2367,27 @@ static void gen_mtfsb1(DisasContext *ctx) static void gen_mtfsf(DisasContext *ctx) { TCGv_i32 t0; - int L = ctx->opcode & 0x02000000; + int flm, l, w; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); return; } + flm = FPFLM(ctx->opcode); + l = FPL(ctx->opcode); + w = FPW(ctx->opcode); + if (unlikely(w & !(ctx->insns_flags2 & PPC2_ISA205))) { + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); + return; + } /* NIP cannot be restored if the memory exception comes from an helper */ gen_update_nip(ctx, ctx->nip - 4); gen_reset_fpstatus(); - if (L) - t0 = tcg_const_i32(0xff); - else - t0 = tcg_const_i32(FM(ctx->opcode)); + if (l) { + t0 = tcg_const_i32((ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff); + } else { + t0 = tcg_const_i32(flm << (w * 8)); + } gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx->opcode)], t0); tcg_temp_free_i32(t0); if (unlikely(Rc(ctx->opcode) != 0)) { @@ -2297,7 +2401,7 @@ static void gen_mtfsf(DisasContext *ctx) /* mtfsfi */ static void gen_mtfsfi(DisasContext *ctx) { - int bf, sh; + int bf, sh, w; TCGv_i64 t0; TCGv_i32 t1; @@ -2305,12 +2409,17 @@ static void gen_mtfsfi(DisasContext *ctx) gen_exception(ctx, POWERPC_EXCP_FPU); return; } - bf = crbD(ctx->opcode) >> 2; - sh = 7 - bf; + w = FPW(ctx->opcode); + bf = FPBF(ctx->opcode); + if (unlikely(w & !(ctx->insns_flags2 & PPC2_ISA205))) { + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); + return; + } + sh = (8 * w) + 7 - bf; /* NIP cannot be restored if the memory exception comes from an helper */ gen_update_nip(ctx, ctx->nip - 4); gen_reset_fpstatus(); - t0 = tcg_const_i64(FPIMM(ctx->opcode) << (4 * sh)); + t0 = tcg_const_i64(((uint64_t)FPIMM(ctx->opcode)) << (4 * sh)); t1 = tcg_const_i32(1 << sh); gen_helper_store_fpscr(cpu_env, t0, t1); tcg_temp_free_i64(t0); @@ -2442,7 +2551,6 @@ static inline void gen_qemu_ld32u(DisasContext *ctx, TCGv arg1, TCGv arg2) } } -#if defined(TARGET_PPC64) static inline void gen_qemu_ld32s(DisasContext *ctx, TCGv arg1, TCGv arg2) { if (unlikely(ctx->le_mode)) { @@ -2452,7 +2560,6 @@ static inline void gen_qemu_ld32s(DisasContext *ctx, TCGv arg1, TCGv arg2) } else tcg_gen_qemu_ld32s(arg1, arg2, ctx->mem_idx); } -#endif static inline void gen_qemu_ld64(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) { @@ -3211,6 +3318,71 @@ GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT); /* lfs lfsu lfsux lfsx */ GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT); +/* lfdp */ +static void gen_lfdp(DisasContext *ctx) +{ + TCGv EA; + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + gen_set_access_type(ctx, ACCESS_FLOAT); + EA = tcg_temp_new(); + gen_addr_imm_index(ctx, EA, 0); \ + if (unlikely(ctx->le_mode)) { + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + } else { + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + } + tcg_temp_free(EA); +} + +/* lfdpx */ +static void gen_lfdpx(DisasContext *ctx) +{ + TCGv EA; + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + gen_set_access_type(ctx, ACCESS_FLOAT); + EA = tcg_temp_new(); + gen_addr_reg_index(ctx, EA); + if (unlikely(ctx->le_mode)) { + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + } else { + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + } + tcg_temp_free(EA); +} + +/* lfiwax */ +static void gen_lfiwax(DisasContext *ctx) +{ + TCGv EA; + TCGv t0; + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + gen_set_access_type(ctx, ACCESS_FLOAT); + EA = tcg_temp_new(); + t0 = tcg_temp_new(); + gen_addr_reg_index(ctx, EA); + gen_qemu_ld32s(ctx, t0, EA); + tcg_gen_ext_tl_i64(cpu_fpr[rD(ctx->opcode)], t0); + tcg_temp_free(EA); + tcg_temp_free(t0); +} + /*** Floating-point store ***/ #define GEN_STF(name, stop, opc, type) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -3304,6 +3476,52 @@ GEN_STFS(stfd, st64, 0x16, PPC_FLOAT); /* stfs stfsu stfsux stfsx */ GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT); +/* stfdp */ +static void gen_stfdp(DisasContext *ctx) +{ + TCGv EA; + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + gen_set_access_type(ctx, ACCESS_FLOAT); + EA = tcg_temp_new(); + gen_addr_imm_index(ctx, EA, 0); \ + if (unlikely(ctx->le_mode)) { + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + } else { + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + } + tcg_temp_free(EA); +} + +/* stfdpx */ +static void gen_stfdpx(DisasContext *ctx) +{ + TCGv EA; + if (unlikely(!ctx->fpu_enabled)) { + gen_exception(ctx, POWERPC_EXCP_FPU); + return; + } + gen_set_access_type(ctx, ACCESS_FLOAT); + EA = tcg_temp_new(); + gen_addr_reg_index(ctx, EA); + if (unlikely(ctx->le_mode)) { + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + } else { + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA); + } + tcg_temp_free(EA); +} + /* Optional: */ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) { @@ -8426,6 +8644,7 @@ GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER), GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER), GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER), +GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205), GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL), GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), @@ -8453,9 +8672,11 @@ GEN_HANDLER(xori, 0x1A, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(xoris, 0x1B, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(popcntb, 0x1F, 0x03, 0x03, 0x0000F801, PPC_POPCNTB), GEN_HANDLER(popcntw, 0x1F, 0x1A, 0x0b, 0x0000F801, PPC_POPCNTWD), +GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205), #if defined(TARGET_PPC64) GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD), GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B), +GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205), #endif GEN_HANDLER(rlwimi, 0x14, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(rlwinm, 0x15, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), @@ -8476,13 +8697,17 @@ GEN_HANDLER(fsqrt, 0x3F, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT), GEN_HANDLER(fsqrts, 0x3B, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT), GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x00600001, PPC_FLOAT), GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x00600001, PPC_FLOAT), +GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F0000, PPC_FLOAT), GEN_HANDLER(fmr, 0x3F, 0x08, 0x02, 0x001F0000, PPC_FLOAT), +GEN_HANDLER(fnabs, 0x3F, 0x08, 0x04, 0x001F0000, PPC_FLOAT), +GEN_HANDLER(fneg, 0x3F, 0x08, 0x01, 0x001F0000, PPC_FLOAT), +GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205), GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT), GEN_HANDLER(mffs, 0x3F, 0x07, 0x12, 0x001FF800, PPC_FLOAT), GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT), GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT), -GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x00010000, PPC_FLOAT), -GEN_HANDLER(mtfsfi, 0x3F, 0x06, 0x04, 0x006f0800, PPC_FLOAT), +GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x00000000, PPC_FLOAT), +GEN_HANDLER(mtfsfi, 0x3F, 0x06, 0x04, 0x006e0800, PPC_FLOAT), #if defined(TARGET_PPC64) GEN_HANDLER(ld, 0x3A, 0xFF, 0xFF, 0x00000000, PPC_64B), GEN_HANDLER(lq, 0x38, 0xFF, 0xFF, 0x00000000, PPC_64BX), @@ -8833,9 +9058,6 @@ GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT), GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT), GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT), GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT), -GEN_FLOAT_B(abs, 0x08, 0x08, 0, PPC_FLOAT), -GEN_FLOAT_B(nabs, 0x08, 0x04, 0, PPC_FLOAT), -GEN_FLOAT_B(neg, 0x08, 0x01, 0, PPC_FLOAT), #undef GEN_LD #undef GEN_LDU @@ -8921,6 +9143,9 @@ GEN_LDXF(name, ldop, 0x17, op | 0x00, type) GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT) GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT) +GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205), +GEN_HANDLER_E(lfdp, 0x39, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205), +GEN_HANDLER_E(lfdpx, 0x1F, 0x17, 0x18, 0x00200001, PPC_NONE, PPC2_ISA205), #undef GEN_STF #undef GEN_STUF @@ -8944,6 +9169,8 @@ GEN_STXF(name, stop, 0x17, op | 0x00, type) GEN_STFS(stfd, st64, 0x16, PPC_FLOAT) GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT) GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX) +GEN_HANDLER_E(stfdp, 0x3D, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205), +GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205), #undef GEN_CRLOGIC #define GEN_CRLOGIC(name, tcg_op, opc) \ @@ -9518,6 +9745,8 @@ static inline void gen_intermediate_code_internal(CPUPPCState *env, ctx.exception = POWERPC_EXCP_NONE; ctx.spr_cb = env->spr_cb; ctx.mem_idx = env->mmu_idx; + ctx.insns_flags = env->insns_flags; + ctx.insns_flags2 = env->insns_flags2; ctx.access_type = -1; ctx.le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0; #if defined(TARGET_PPC64) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 781170fb05..6feb62abcd 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -1149,6 +1149,13 @@ static void gen_spr_603 (CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); + /* Breakpoints */ + /* XXX : not implemented */ + spr_register(env, SPR_IABR, "IABR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + } /* SPR specific to PowerPC G2 implementation */ @@ -2593,7 +2600,6 @@ static void init_excp_4xx_real (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_FIT] = 0x00001010; env->excp_vectors[POWERPC_EXCP_WDT] = 0x00001020; env->excp_vectors[POWERPC_EXCP_DEBUG] = 0x00002000; - env->hreset_excp_prefix = 0x00000000UL; env->ivor_mask = 0x0000FFF0UL; env->ivpr_mask = 0xFFFF0000UL; /* Hardware reset vector */ @@ -2618,7 +2624,6 @@ static void init_excp_4xx_softmmu (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_DTLB] = 0x00001100; env->excp_vectors[POWERPC_EXCP_ITLB] = 0x00001200; env->excp_vectors[POWERPC_EXCP_DEBUG] = 0x00002000; - env->hreset_excp_prefix = 0x00000000UL; env->ivor_mask = 0x0000FFF0UL; env->ivpr_mask = 0xFFFF0000UL; /* Hardware reset vector */ @@ -2644,11 +2649,10 @@ static void init_excp_MPC5xx (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001C00; env->excp_vectors[POWERPC_EXCP_MEXTBR] = 0x00001E00; env->excp_vectors[POWERPC_EXCP_NMEXTBR] = 0x00001F00; - env->hreset_excp_prefix = 0x00000000UL; env->ivor_mask = 0x0000FFF0UL; env->ivpr_mask = 0xFFFF0000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2676,11 +2680,10 @@ static void init_excp_MPC8xx (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001C00; env->excp_vectors[POWERPC_EXCP_MEXTBR] = 0x00001E00; env->excp_vectors[POWERPC_EXCP_NMEXTBR] = 0x00001F00; - env->hreset_excp_prefix = 0x00000000UL; env->ivor_mask = 0x0000FFF0UL; env->ivpr_mask = 0xFFFF0000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2704,9 +2707,8 @@ static void init_excp_G2 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_DSTLB] = 0x00001200; env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2733,7 +2735,6 @@ static void init_excp_e200(CPUPPCState *env, target_ulong ivpr_mask) env->excp_vectors[POWERPC_EXCP_SPEU] = 0x00000000; env->excp_vectors[POWERPC_EXCP_EFPDI] = 0x00000000; env->excp_vectors[POWERPC_EXCP_EFPRI] = 0x00000000; - env->hreset_excp_prefix = 0x00000000UL; env->ivor_mask = 0x0000FFF7UL; env->ivpr_mask = ivpr_mask; /* Hardware reset vector */ @@ -2760,7 +2761,6 @@ static void init_excp_BookE (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_DTLB] = 0x00000000; env->excp_vectors[POWERPC_EXCP_ITLB] = 0x00000000; env->excp_vectors[POWERPC_EXCP_DEBUG] = 0x00000000; - env->hreset_excp_prefix = 0x00000000UL; env->ivor_mask = 0x0000FFE0UL; env->ivpr_mask = 0xFFFF0000UL; /* Hardware reset vector */ @@ -2783,7 +2783,6 @@ static void init_excp_601 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_IO] = 0x00000A00; env->excp_vectors[POWERPC_EXCP_SYSCALL] = 0x00000C00; env->excp_vectors[POWERPC_EXCP_RUNM] = 0x00002000; - env->hreset_excp_prefix = 0xFFF00000UL; /* Hardware reset vector */ env->hreset_vector = 0x00000100UL; #endif @@ -2811,9 +2810,8 @@ static void init_excp_602 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; env->excp_vectors[POWERPC_EXCP_WDT] = 0x00001500; env->excp_vectors[POWERPC_EXCP_EMUL] = 0x00001600; - env->hreset_excp_prefix = 0xFFF00000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2836,9 +2834,8 @@ static void init_excp_603 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_DSTLB] = 0x00001200; env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2859,7 +2856,6 @@ static void init_excp_604 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_PERFM] = 0x00000F00; env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; - env->hreset_excp_prefix = 0xFFF00000UL; /* Hardware reset vector */ env->hreset_vector = 0x00000100UL; #endif @@ -2883,9 +2879,8 @@ static void init_excp_7x0 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; env->excp_vectors[POWERPC_EXCP_THERM] = 0x00001700; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2906,9 +2901,8 @@ static void init_excp_750cl (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_PERFM] = 0x00000F00; env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2929,9 +2923,8 @@ static void init_excp_750cx (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_PERFM] = 0x00000F00; env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_THERM] = 0x00001700; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2957,9 +2950,8 @@ static void init_excp_7x5 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; env->excp_vectors[POWERPC_EXCP_THERM] = 0x00001700; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -2983,9 +2975,8 @@ static void init_excp_7400 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; env->excp_vectors[POWERPC_EXCP_VPUA] = 0x00001600; env->excp_vectors[POWERPC_EXCP_THERM] = 0x00001700; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -3011,9 +3002,8 @@ static void init_excp_7450 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_IABR] = 0x00001300; env->excp_vectors[POWERPC_EXCP_SMI] = 0x00001400; env->excp_vectors[POWERPC_EXCP_VPUA] = 0x00001600; - env->hreset_excp_prefix = 0x00000000UL; /* Hardware reset vector */ - env->hreset_vector = 0xFFFFFFFCUL; + env->hreset_vector = 0x00000100UL; #endif } @@ -3041,7 +3031,6 @@ static void init_excp_970 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_MAINT] = 0x00001600; env->excp_vectors[POWERPC_EXCP_VPUA] = 0x00001700; env->excp_vectors[POWERPC_EXCP_THERM] = 0x00001800; - env->hreset_excp_prefix = 0x00000000FFF00000ULL; /* Hardware reset vector */ env->hreset_vector = 0x0000000000000100ULL; #endif @@ -3070,7 +3059,6 @@ static void init_excp_POWER7 (CPUPPCState *env) env->excp_vectors[POWERPC_EXCP_MAINT] = 0x00001600; env->excp_vectors[POWERPC_EXCP_VPUA] = 0x00001700; env->excp_vectors[POWERPC_EXCP_THERM] = 0x00001800; - env->hreset_excp_prefix = 0; /* Hardware reset vector */ env->hreset_vector = 0x0000000000000100ULL; #endif @@ -4157,6 +4145,33 @@ static void init_proc_G2LE (CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); + /* Breakpoints */ + /* XXX : not implemented */ + spr_register(env, SPR_DABR, "DABR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* XXX : not implemented */ + spr_register(env, SPR_DABR2, "DABR2", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* XXX : not implemented */ + spr_register(env, SPR_IABR2, "IABR2", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* XXX : not implemented */ + spr_register(env, SPR_IBCR, "IBCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* XXX : not implemented */ + spr_register(env, SPR_DBCR, "DBCR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x00000000); + /* Memory management */ gen_low_BATs(env); gen_high_BATs(env); @@ -6962,6 +6977,18 @@ static void init_proc_POWER7 (CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, KVM_REG_PPC_DSCR, 0x00000000); + spr_register_kvm(env, SPR_MMCRA, "SPR_MMCRA", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_MMCRA, 0x00000000); + spr_register_kvm(env, SPR_PMC5, "SPR_PMC5", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_PMC5, 0x00000000); + spr_register_kvm(env, SPR_PMC6, "SPR_PMC6", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_PMC6, 0x00000000); #endif /* !CONFIG_USER_ONLY */ /* Memory management */ /* XXX : not implemented */ @@ -6989,6 +7016,7 @@ static void init_proc_POWER7 (CPUPPCState *env) init_excp_POWER7(env); env->dcache_line_size = 128; env->icache_line_size = 128; + /* Allocate hardware IRQ controller */ ppcPOWER7_irq_init(env); /* Can't find information on what this should be on reset. This @@ -7004,7 +7032,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) dc->desc = "POWER7"; pcc->init_proc = init_proc_POWER7; pcc->check_pow = check_pow_nocheck; - pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | + pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -7014,7 +7042,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) PPC_64B | PPC_ALTIVEC | PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD; - pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX; + pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205; pcc->msr_mask = 0x800000000204FF36ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) @@ -7026,6 +7054,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR; + pcc->l1_dcache_size = 0x8000; + pcc->l1_icache_size = 0x8000; } #endif /* defined (TARGET_PPC64) */ @@ -7043,7 +7073,6 @@ static void init_ppc_proc(PowerPCCPU *cpu) /* Set all exception vectors to an invalid address */ for (i = 0; i < POWERPC_EXCP_NB; i++) env->excp_vectors[i] = (target_ulong)(-1ULL); - env->hreset_excp_prefix = 0x00000000; env->ivor_mask = 0x00000000; env->ivpr_mask = 0x00000000; /* Default MMU definitions */ @@ -7080,9 +7109,7 @@ static void init_ppc_proc(PowerPCCPU *cpu) } /* PowerPC implementation specific initialisations (SPRs, timers, ...) */ (*pcc->init_proc)(env); -#if !defined(CONFIG_USER_ONLY) - env->excp_prefix = env->hreset_excp_prefix; -#endif + /* MSR bits & flags consistency checks */ if (env->msr_mask & (1 << 25)) { switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) { @@ -8182,19 +8209,23 @@ static void ppc_cpu_reset(CPUState *s) msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */ msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */ msr |= (target_ulong)1 << MSR_PR; -#else - env->excp_prefix = env->hreset_excp_prefix; - env->nip = env->hreset_vector | env->excp_prefix; - if (env->mmu_model != POWERPC_MMU_REAL) { - ppc_tlb_invalidate_all(env); - } #endif - env->msr = msr & env->msr_mask; + #if defined(TARGET_PPC64) if (env->mmu_model & POWERPC_MMU_64) { env->msr |= (1ULL << MSR_SF); } #endif + + hreg_store_msr(env, msr, 1); + +#if !defined(CONFIG_USER_ONLY) + env->nip = env->hreset_vector | env->excp_prefix; + if (env->mmu_model != POWERPC_MMU_REAL) { + ppc_tlb_invalidate_all(env); + } +#endif + hreg_compute_hflags(env); env->reserve_addr = (target_ulong)-1ULL; /* Be sure no exception or interrupt is pending */ diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 94c6ca43aa..7216f7a63d 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -113,6 +113,8 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_R0, TCG_REG_R1 }; +#define TCG_REG_TMP TCG_REG_R12 + static inline void reloc_abs32(void *code_ptr, tcg_target_long target) { *(uint32_t *) code_ptr = target; @@ -145,6 +147,11 @@ static void patch_reloc(uint8_t *code_ptr, int type, } } +#define TCG_CT_CONST_ARM 0x100 +#define TCG_CT_CONST_INV 0x200 +#define TCG_CT_CONST_NEG 0x400 +#define TCG_CT_CONST_ZERO 0x800 + /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { @@ -153,8 +160,17 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct_str = *pct_str; switch (ct_str[0]) { case 'I': - ct->ct |= TCG_CT_CONST_ARM; - break; + ct->ct |= TCG_CT_CONST_ARM; + break; + case 'K': + ct->ct |= TCG_CT_CONST_INV; + break; + case 'N': /* The gcc constraint letter is L, already used here. */ + ct->ct |= TCG_CT_CONST_NEG; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_ZERO; + break; case 'r': ct->ct |= TCG_CT_REG; @@ -166,18 +182,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); #ifdef CONFIG_SOFTMMU - /* r0 and r1 will be overwritten when reading the tlb entry, + /* r0-r2 will be overwritten when reading the tlb entry, so don't use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#if TARGET_LONG_BITS == 64 - /* If we're passing env to the helper as r0 and need a regpair - * for the address then r2 will be overwritten as we're setting - * up the args to the helper. - */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); #endif -#endif break; case 'L': ct->ct |= TCG_CT_REG; @@ -193,29 +203,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) case 's': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0 and r1 will be overwritten when reading the tlb entry - (softmmu only) and doing the byte swapping, so don't - use these. */ + /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) + and r0-r1 doing the byte swapping, so don't use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#if defined(CONFIG_SOFTMMU) && (TARGET_LONG_BITS == 64) +#if defined(CONFIG_SOFTMMU) /* Avoid clashes with registers being used for helper args */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); -#endif - break; - /* qemu_st64 data_reg2 */ - case 'S': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0 and r1 will be overwritten when reading the tlb entry - (softmmu only) and doing the byte swapping, so don't - use these. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -#ifdef CONFIG_SOFTMMU - /* r2 is still needed to load data_reg, so don't use it. */ - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); #if TARGET_LONG_BITS == 64 /* Avoid clashes with registers being used for helper args */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); @@ -275,38 +269,65 @@ static inline int check_fit_imm(uint32_t imm) * add, sub, eor...: ditto */ static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) + const TCGArgConstraint *arg_ct) { int ct; ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + if (ct & TCG_CT_CONST) { + return 1; + } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { + return 1; + } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { + return 1; + } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { return 1; - else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) + } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; - else + } else { return 0; + } } -enum arm_data_opc_e { - ARITH_AND = 0x0, - ARITH_EOR = 0x1, - ARITH_SUB = 0x2, - ARITH_RSB = 0x3, - ARITH_ADD = 0x4, - ARITH_ADC = 0x5, - ARITH_SBC = 0x6, - ARITH_RSC = 0x7, - ARITH_TST = 0x8, - ARITH_CMP = 0xa, - ARITH_CMN = 0xb, - ARITH_ORR = 0xc, - ARITH_MOV = 0xd, - ARITH_BIC = 0xe, - ARITH_MVN = 0xf, -}; - -#define TO_CPSR(opc) \ - ((opc == ARITH_CMP || opc == ARITH_CMN || opc == ARITH_TST) << 20) +#define TO_CPSR (1 << 20) + +typedef enum { + ARITH_AND = 0x0 << 21, + ARITH_EOR = 0x1 << 21, + ARITH_SUB = 0x2 << 21, + ARITH_RSB = 0x3 << 21, + ARITH_ADD = 0x4 << 21, + ARITH_ADC = 0x5 << 21, + ARITH_SBC = 0x6 << 21, + ARITH_RSC = 0x7 << 21, + ARITH_TST = 0x8 << 21 | TO_CPSR, + ARITH_CMP = 0xa << 21 | TO_CPSR, + ARITH_CMN = 0xb << 21 | TO_CPSR, + ARITH_ORR = 0xc << 21, + ARITH_MOV = 0xd << 21, + ARITH_BIC = 0xe << 21, + ARITH_MVN = 0xf << 21, + + INSN_LDR_IMM = 0x04100000, + INSN_LDR_REG = 0x06100000, + INSN_STR_IMM = 0x04000000, + INSN_STR_REG = 0x06000000, + + INSN_LDRH_IMM = 0x005000b0, + INSN_LDRH_REG = 0x001000b0, + INSN_LDRSH_IMM = 0x005000f0, + INSN_LDRSH_REG = 0x001000f0, + INSN_STRH_IMM = 0x004000b0, + INSN_STRH_REG = 0x000000b0, + + INSN_LDRB_IMM = 0x04500000, + INSN_LDRB_REG = 0x06500000, + INSN_LDRSB_IMM = 0x005000d0, + INSN_LDRSB_REG = 0x001000d0, + INSN_STRB_IMM = 0x04400000, + INSN_STRB_REG = 0x06400000, + + INSN_LDRD_IMM = 0x004000d0, +} ARMInsn; #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00) #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20) @@ -394,53 +415,64 @@ static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) static inline void tcg_out_dat_reg(TCGContext *s, int cond, int opc, int rd, int rn, int rm, int shift) { - tcg_out32(s, (cond << 28) | (0 << 25) | (opc << 21) | TO_CPSR(opc) | + tcg_out32(s, (cond << 28) | (0 << 25) | opc | (rn << 16) | (rd << 12) | shift | rm); } -static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) +static inline void tcg_out_nop(TCGContext *s) { - /* Simple reg-reg move, optimising out the 'do nothing' case */ - if (rd != rm) { - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); + if (use_armv7_instructions) { + /* Architected nop introduced in v6k. */ + /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this + also Just So Happened to do nothing on pre-v6k so that we + don't need to conditionalize it? */ + tcg_out32(s, 0xe320f000); + } else { + /* Prior to that the assembler uses mov r0, r0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); } } -static inline void tcg_out_dat_reg2(TCGContext *s, - int cond, int opc0, int opc1, int rd0, int rd1, - int rn0, int rn1, int rm0, int rm1, int shift) +static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) { - if (rd0 == rn1 || rd0 == rm1) { - tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) | - (rn0 << 16) | (8 << 12) | shift | rm0); - tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) | - (rn1 << 16) | (rd1 << 12) | shift | rm1); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd0, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); - } else { - tcg_out32(s, (cond << 28) | (0 << 25) | (opc0 << 21) | (1 << 20) | - (rn0 << 16) | (rd0 << 12) | shift | rm0); - tcg_out32(s, (cond << 28) | (0 << 25) | (opc1 << 21) | - (rn1 << 16) | (rd1 << 12) | shift | rm1); + /* Simple reg-reg move, optimising out the 'do nothing' case */ + if (rd != rm) { + tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); } } static inline void tcg_out_dat_imm(TCGContext *s, int cond, int opc, int rd, int rn, int im) { - tcg_out32(s, (cond << 28) | (1 << 25) | (opc << 21) | TO_CPSR(opc) | + tcg_out32(s, (cond << 28) | (1 << 25) | opc | (rn << 16) | (rd << 12) | im); } -static inline void tcg_out_movi32(TCGContext *s, - int cond, int rd, uint32_t arg) +static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) { - /* TODO: This is very suboptimal, we can easily have a constant - * pool somewhere after all the instructions. */ - if ((int)arg < 0 && (int)arg >= -0x100) { - tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); - } else if (use_armv7_instructions) { - /* use movw/movt */ + int rot, opc, rn; + + /* For armv7, make sure not to use movw+movt when mov/mvn would do. + Speed things up by only checking when movt would be required. + Prior to armv7, have one go at fully rotated immediates before + doing the decomposition thing below. */ + if (!use_armv7_instructions || (arg & 0xffff0000)) { + rot = encode_imm(arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, + rotl(arg, rot) | (rot << 7)); + return; + } + rot = encode_imm(~arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, + rotl(~arg, rot) | (rot << 7)); + return; + } + } + + /* Use movw + movt. */ + if (use_armv7_instructions) { /* movw */ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); @@ -449,22 +481,27 @@ static inline void tcg_out_movi32(TCGContext *s, tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); } - } else { - int opc = ARITH_MOV; - int rn = 0; - - do { - int i, rot; - - i = ctz32(arg) & ~1; - rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); - arg &= ~(0xff << i); + return; + } - opc = ARITH_ORR; - rn = rd; - } while (arg); + /* TODO: This is very suboptimal, we can easily have a constant + pool somewhere after all the instructions. */ + opc = ARITH_MOV; + rn = 0; + /* If we have lots of leading 1's, we can shorten the sequence by + beginning with mvn and then clearing higher bits with eor. */ + if (clz32(~arg) > clz32(arg)) { + opc = ARITH_MVN, arg = ~arg; } + do { + int i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_EOR; + rn = rd; + } while (arg); } static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, @@ -482,55 +519,112 @@ static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, } } -static inline void tcg_out_mul32(TCGContext *s, - int cond, int rd, int rs, int rm) +static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, + TCGReg dst, TCGReg lhs, TCGArg rhs, + bool rhs_is_const) { - if (rd != rm) - tcg_out32(s, (cond << 28) | (rd << 16) | (0 << 12) | - (rs << 8) | 0x90 | rm); - else if (rd != rs) - tcg_out32(s, (cond << 28) | (rd << 16) | (0 << 12) | - (rm << 8) | 0x90 | rs); - else { - tcg_out32(s, (cond << 28) | ( 8 << 16) | (0 << 12) | - (rs << 8) | 0x90 | rm); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIK" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = ~rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opinv; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } } -static inline void tcg_out_umull32(TCGContext *s, - int cond, int rd0, int rd1, int rs, int rm) +static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, + TCGArg dst, TCGArg lhs, TCGArg rhs, + bool rhs_is_const) { - if (rd0 != rm && rd1 != rm) - tcg_out32(s, (cond << 28) | 0x800090 | - (rd1 << 16) | (rd0 << 12) | (rs << 8) | rm); - else if (rd0 != rs && rd1 != rs) - tcg_out32(s, (cond << 28) | 0x800090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rs); - else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rm, SHIFT_IMM_LSL(0)); - tcg_out32(s, (cond << 28) | 0x800098 | - (rd1 << 16) | (rd0 << 12) | (rs << 8)); + /* Emit either the reg,imm or reg,reg form of a data-processing insn. + * rhs must satisfy the "rIN" constraint. + */ + if (rhs_is_const) { + int rot = encode_imm(rhs); + if (rot < 0) { + rhs = -rhs; + rot = encode_imm(rhs); + assert(rot >= 0); + opc = opneg; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } } -static inline void tcg_out_smull32(TCGContext *s, - int cond, int rd0, int rd1, int rs, int rm) +static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, + TCGReg rn, TCGReg rm) { - if (rd0 != rm && rd1 != rm) - tcg_out32(s, (cond << 28) | 0xc00090 | - (rd1 << 16) | (rd0 << 12) | (rs << 8) | rm); - else if (rd0 != rs && rd1 != rs) - tcg_out32(s, (cond << 28) | 0xc00090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rs); - else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rm, SHIFT_IMM_LSL(0)); - tcg_out32(s, (cond << 28) | 0xc00098 | - (rd1 << 16) | (rd0 << 12) | (rs << 8)); + /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ + if (!use_armv6_instructions && rd == rn) { + if (rd == rm) { + /* rd == rn == rm; copy an input to tmp first. */ + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rm = rn = TCG_REG_TMP; + } else { + rn = rm; + rm = rd; + } } + /* mul */ + tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); +} + +static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* umull */ + tcg_out32(s, (cond << 28) | 0x00800090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, + TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ + if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { + if (rd0 == rm || rd1 == rm) { + tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); + rn = TCG_REG_TMP; + } else { + TCGReg t = rn; + rn = rm; + rm = t; + } + } + /* smull */ + tcg_out32(s, (cond << 28) | 0x00c00090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ + tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); } static inline void tcg_out_ext8s(TCGContext *s, int cond, @@ -588,11 +682,11 @@ static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_ASR(16)); + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); } } @@ -603,11 +697,11 @@ static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_LSR(16)); + TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); } } @@ -620,10 +714,10 @@ static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_R8, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_R8, TCG_REG_R8, 0xff); + TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); + tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_R8, rn, SHIFT_IMM_LSL(8)); + rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); } } @@ -634,205 +728,186 @@ static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); } else { tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_R8, rn, rn, SHIFT_IMM_ROR(16)); + TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_R8, TCG_REG_R8, 0xff | 0x800); + TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rn, SHIFT_IMM_ROR(8)); tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_R8, SHIFT_IMM_LSR(8)); + rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); } } -static inline void tcg_out_ld32_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +bool tcg_target_deposit_valid(int ofs, int len) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05900000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05100000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + /* ??? Without bfi, we could improve over generic code by combining + the right-shift from a non-zero ofs with the orr. We do run into + problems when rd == rs, and the mask generated from ofs+len doesn't + fit into an immediate. We would have to be careful not to pessimize + wrt the optimizations performed on the expanded code. */ + return use_armv7_instructions; } -/* Offset pre-increment with base writeback. */ -static inline void tcg_out_ld32_12wb(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, + TCGArg a1, int ofs, int len, bool const_a1) { - /* ldr with writeback and both register equals is UNPREDICTABLE */ - assert(rd != rn); + if (const_a1) { + /* bfi becomes bfc with rn == 15. */ + a1 = 15; + } + /* bfi/bfc */ + tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 + | (ofs << 7) | ((ofs + len - 1) << 16)); +} - if (im >= 0) { - tcg_out32(s, (cond << 28) | 0x05b00000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - } else { - tcg_out32(s, (cond << 28) | 0x05300000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); +/* Note that this routine is used for both LDR and LDRH formats, so we do + not wish to include an immediate shift at this point. */ +static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, TCGReg rm, bool u, bool p, bool w) +{ + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) + | (w << 21) | (rn << 16) | (rt << 12) | rm); +} + +static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm8, bool p, bool w) +{ + bool u = 1; + if (imm8 < 0) { + imm8 = -imm8; + u = 0; + } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); +} + +static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, + TCGReg rn, int imm12, bool p, bool w) +{ + bool u = 1; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; } + tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | + (rn << 16) | (rt << 12) | imm12); +} + +static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) +{ + tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_st32_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05800000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05000000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_ld32_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07900000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_st32_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07800000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); } /* Register pre-increment with base writeback. */ -static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07b00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); } -static inline void tcg_out_st32_rwb(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07a00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); } -static inline void tcg_out_ld16u_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01d000b0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x015000b0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_st16_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01c000b0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x014000b0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_ld16u_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x019000b0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_st16_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x018000b0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_ld16s_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01d000f0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x015000f0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_ld16s_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x019000f0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_ld8_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05d00000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05500000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_st8_12(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm12) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x05c00000 | - (rn << 16) | (rd << 12) | (im & 0xfff)); - else - tcg_out32(s, (cond << 28) | 0x05400000 | - (rn << 16) | (rd << 12) | ((-im) & 0xfff)); + tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); } -static inline void tcg_out_ld8_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07d00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_st8_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x07c00000 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); } -static inline void tcg_out_ld8s_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) +static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, int imm8) { - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01d000d0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x015000d0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); + tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); } -static inline void tcg_out_ld8s_r(TCGContext *s, int cond, - int rd, int rn, int rm) +static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, + TCGReg rn, TCGReg rm) { - tcg_out32(s, (cond << 28) | 0x019000d0 | - (rn << 16) | (rd << 12) | rm); + tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); } static inline void tcg_out_ld32u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld32_12(s, cond, rd, rn, offset); } @@ -841,8 +916,8 @@ static inline void tcg_out_st32(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st32_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_st32_12(s, cond, rd, rn, offset); } @@ -851,8 +926,8 @@ static inline void tcg_out_ld16u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld16u_8(s, cond, rd, rn, offset); } @@ -861,8 +936,8 @@ static inline void tcg_out_ld16s(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld16s_8(s, cond, rd, rn, offset); } @@ -871,8 +946,8 @@ static inline void tcg_out_st16(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st16_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_st16_8(s, cond, rd, rn, offset); } @@ -881,8 +956,8 @@ static inline void tcg_out_ld8u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld8_12(s, cond, rd, rn, offset); } @@ -891,8 +966,8 @@ static inline void tcg_out_ld8s(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_ld8s_8(s, cond, rd, rn, offset); } @@ -901,8 +976,8 @@ static inline void tcg_out_st8(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st8_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_movi32(s, cond, TCG_REG_TMP, offset); + tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); } else tcg_out_st8_12(s, cond, rd, rn, offset); } @@ -930,10 +1005,10 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); tcg_out32(s, addr); } else { - tcg_out_movi32(s, cond, TCG_REG_R8, val - 8); + tcg_out_movi32(s, cond, TCG_REG_TMP, val - 8); tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_PC, TCG_REG_PC, - TCG_REG_R8, SHIFT_IMM_LSL(0)); + TCG_REG_TMP, SHIFT_IMM_LSL(0)); } } } @@ -955,6 +1030,9 @@ static inline void tcg_out_call(TCGContext *s, uint32_t addr) } else { tcg_out_bl(s, COND_AL, val); } + } else if (use_armv7_instructions) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr); + tcg_out_blx(s, COND_AL, TCG_REG_TMP); } else { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); @@ -977,14 +1055,9 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) { TCGLabel *l = &s->labels[label_index]; - if (l->has_value) + if (l->has_value) { tcg_out_goto(s, cond, l->u.value); - else if (cond == COND_AL) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out_reloc(s, s->code_ptr, R_ARM_ABS32, label_index, 31337); - s->code_ptr += 4; } else { - /* Probably this should be preferred even for COND_AL... */ tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337); tcg_out_b_noaddr(s, cond); } @@ -1017,64 +1090,35 @@ static const void * const qemu_st_helpers[4] = { * argreg is where we want to put this argument, arg is the argument itself. * Return value is the updated argreg ready for the next call. * Note that argreg 0..3 is real registers, 4+ on stack. - * When we reach the first stacked argument, we allocate space for it - * and the following stacked arguments using "str r8, [sp, #-0x10]!". - * Following arguments are filled in with "str r8, [sp, #0xNN]". - * For more than 4 stacked arguments we'd need to know how much - * space to allocate when we pushed the first stacked argument. - * We don't need this, so don't implement it (and will assert if you try it.) * * We provide routines for arguments which are: immediate, 32 bit * value in register, 16 and 8 bit values in register (which must be zero * extended before use) and 64 bit value in a lo:hi register pair. */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGPARAM) \ - static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGPARAM) \ - { \ - if (argreg < 4) { \ - TCG_OUT_ARG_GET_ARG(argreg); \ - } else if (argreg == 4) { \ - TCG_OUT_ARG_GET_ARG(TCG_REG_R8); \ - tcg_out32(s, (COND_AL << 28) | 0x052d8010); \ - } else { \ - assert(argreg < 8); \ - TCG_OUT_ARG_GET_ARG(TCG_REG_R8); \ - tcg_out32(s, (COND_AL << 28) | 0x058d8000 | (argreg - 4) * 4); \ - } \ - return argreg + 1; \ - } - -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_dat_imm(s, COND_AL, ARITH_MOV, A, 0, arg) -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t arg) -#undef TCG_OUT_ARG_GET_ARG -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext8u(s, COND_AL, A, arg) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg arg) -#undef TCG_OUT_ARG_GET_ARG -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext16u(s, COND_AL, A, arg) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg arg) -#undef TCG_OUT_ARG_GET_ARG - -/* We don't use the macro for this one to avoid an unnecessary reg-reg - * move when storing to the stack. - */ -static TCGReg tcg_out_arg_reg32(TCGContext *s, TCGReg argreg, TCGReg arg) -{ - if (argreg < 4) { - tcg_out_mov_reg(s, COND_AL, argreg, arg); - } else if (argreg == 4) { - /* str arg, [sp, #-0x10]! */ - tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (arg << 12)); - } else { - assert(argreg < 8); - /* str arg, [sp, #0xNN] */ - tcg_out32(s, (COND_AL << 28) | 0x058d0000 | - (arg << 12) | (argreg - 4) * 4); - } - return argreg + 1; -} - -static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ +{ \ + if (argreg < 4) { \ + MOV_ARG(s, COND_AL, argreg, arg); \ + } else { \ + int ofs = (argreg - 4) * 4; \ + EXT_ARG; \ + assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ + tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ + } \ + return argreg + 1; \ +} + +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, + (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, + (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, + (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) + +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, + TCGReg arglo, TCGReg arghi) { /* 64 bit arguments must go in even/odd register pairs * and in 8-aligned stack slots. @@ -1087,180 +1131,302 @@ static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, return argreg; } -static inline void tcg_out_arg_stacktidy(TCGContext *s, TCGReg argreg) +#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + +/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing + to the tlb entry. Clobbers R1 and TMP. */ + +static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + int s_bits, int tlb_offset) { - /* Output any necessary post-call cleanup of the stack */ - if (argreg > 4) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10); + TCGReg base = TCG_AREG0; + + /* Should generate something like the following: + * pre-v7: + * shr tmp, addr_reg, #TARGET_PAGE_BITS (1) + * add r2, env, #off & 0xff00 + * and r0, tmp, #(CPU_TLB_SIZE - 1) (2) + * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) + * ldr r0, [r2, #off & 0xff]! (4) + * tst addr_reg, #s_mask + * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5) + * + * v7 (not implemented yet): + * ubfx r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS (1) + * movw tmp, #~TARGET_PAGE_MASK & ~s_mask + * movw r0, #off + * add r2, env, r2, lsl #CPU_TLB_ENTRY_BITS (2) + * bic tmp, addr_reg, tmp + * ldr r0, [r2, r0]! (3) + * cmp r0, tmp (4) + */ +# if CPU_TLB_BITS > 8 +# error +# endif + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, + 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + + /* We assume that the offset is contained within 16 bits. */ + assert((tlb_offset & ~0xffff) == 0); + if (tlb_offset > 0xff) { + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + (24 << 7) | (tlb_offset >> 8)); + tlb_offset &= 0xff; + base = TCG_REG_R2; + } + + tcg_out_dat_imm(s, COND_AL, ARITH_AND, + TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, + TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + + /* Load the tlb comparator. Use ldrd if needed and available, + but due to how the pointer needs setting up, ldm isn't useful. + Base arm5 doesn't have ldrd, but armv5te does. */ + if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0, + TCG_REG_R2, tlb_offset, 1, 1); + } else { + tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, + TCG_REG_R2, tlb_offset, 1, 1); + if (TARGET_LONG_BITS == 64) { + tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, + TCG_REG_R2, 4, 1, 0); + } + } + + /* Check alignment. */ + if (s_bits) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, + 0, addrlo, (1 << s_bits) - 1); + } + + tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, + TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + + if (TARGET_LONG_BITS == 64) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); } } -#endif +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, + int data_reg, int data_reg2, int addrlo_reg, + int addrhi_reg, int mem_index, + uint8_t *raddr, uint8_t *label_ptr) +{ + int idx; + TCGLabelQemuLdst *label; -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { + tcg_abort(); + } -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) + idx = s->nb_qemu_ldst_labels++; + label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->datahi_reg = data_reg2; + label->addrlo_reg = addrlo_reg; + label->addrhi_reg = addrhi_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - int addr_reg, data_reg, data_reg2, bswap; + TCGReg argreg, data_reg, data_reg2; + uint8_t *start; + + reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); + tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[lb->opc & 3]); + + data_reg = lb->datalo_reg; + data_reg2 = lb->datahi_reg; + + start = s->code_ptr; + switch (lb->opc) { + case 0 | 4: + tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 1 | 4: + tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 0: + case 1: + case 2: + default: + tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 3: + tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); + break; + } + + /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between + the call and the branch back to straight-line code. Note that the + moves above could be elided by register allocation, nor do we know + which code alternative we chose for extension. */ + switch (s->code_ptr - start) { + case 0: + tcg_out_nop(s); + /* FALLTHRU */ + case 4: + tcg_out_nop(s); + /* FALLTHRU */ + case 8: + break; + default: + abort(); + } + + tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, data_reg, data_reg2; + + reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + argreg = TCG_REG_R0; + argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + + data_reg = lb->datalo_reg; + data_reg2 = lb->datahi_reg; + switch (lb->opc) { + case 0: + argreg = tcg_out_arg_reg8(s, argreg, data_reg); + break; + case 1: + argreg = tcg_out_arg_reg16(s, argreg, data_reg); + break; + case 2: + argreg = tcg_out_arg_reg32(s, argreg, data_reg); + break; + case 3: + argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); + break; + } + + argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); + tcg_out_call(s, (tcg_target_long) qemu_st_helpers[lb->opc & 3]); + + /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between + the call and the branch back to straight-line code. */ + tcg_out_nop(s); + tcg_out_nop(s); + tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +{ + TCGReg addr_reg, data_reg, data_reg2; + bool bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, tlb_offset; - TCGReg argreg; -# if TARGET_LONG_BITS == 64 - int addr_reg2; -# endif - uint32_t *label_ptr; + int mem_index, s_bits; + TCGReg addr_reg2; + uint8_t *label_ptr; #endif - #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; #else bswap = 0; #endif + data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; /* suppress warning */ + data_reg2 = (opc == 3 ? *args++ : 0); addr_reg = *args++; #ifdef CONFIG_SOFTMMU -# if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -# endif + addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; s_bits = opc & 3; - /* Should generate something like the following: - * shr r8, addr_reg, #TARGET_PAGE_BITS - * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8 - * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS - */ -# if CPU_TLB_BITS > 8 -# error -# endif - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8, - 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, - TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - /* We assume that the offset is contained within 20 bits. */ - tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - assert((tlb_offset & ~0xfffff) == 0); - if (tlb_offset > 0xfff) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, - 0xa00 | (tlb_offset >> 12)); - tlb_offset &= 0xfff; - } - tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, - TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - /* Check alignment. */ - if (s_bits) - tcg_out_dat_imm(s, COND_EQ, ARITH_TST, - 0, addr_reg, (1 << s_bits) - 1); -# if TARGET_LONG_BITS == 64 - /* XXX: possibly we could use a block data load in the first access. */ - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); -# endif - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, + tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); + + label_ptr = s->code_ptr; + tcg_out_b_noaddr(s, COND_NE); + + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read)); switch (opc) { case 0: - tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 0 | 4: - tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 1: - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); if (bswap) { - tcg_out_bswap16(s, COND_EQ, data_reg, data_reg); + tcg_out_bswap16(s, COND_AL, data_reg, data_reg); } break; case 1 | 4: if (bswap) { - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); - tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg); + tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); } else { - tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 2: default: - tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); if (bswap) { - tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); } break; case 3: if (bswap) { - tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); - tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2); - tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + tcg_out_ld32_rwb(s, COND_AL, data_reg2, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); } else { - tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); } break; } - label_ptr = (void *) s->code_ptr; - tcg_out_b_noaddr(s, COND_EQ); - - /* TODO: move this code to where the constants pool will be */ - /* Note that this code relies on the constraints we set in arm_op_defs[] - * to ensure that later arguments are not passed to us in registers we - * trash by moving the earlier arguments into them. - */ - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); -#if TARGET_LONG_BITS == 64 - argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2); -#else - argreg = tcg_out_arg_reg32(s, argreg, addr_reg); -#endif - argreg = tcg_out_arg_imm32(s, argreg, mem_index); - tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); - tcg_out_arg_stacktidy(s, argreg); - - switch (opc) { - case 0 | 4: - tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 1 | 4: - tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 0: - case 1: - case 2: - default: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 3: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); - break; - } - - reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2, + mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; - int i; - int rot; + int i, rot; while (offset) { i = ctz32(offset) & ~1; rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg, + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addr_reg, ((offset >> i) & 0xff) | rot); - addr_reg = TCG_REG_R8; + addr_reg = TCG_REG_TMP; offset &= ~(0xff << i); } } @@ -1311,142 +1477,76 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, bswap; + TCGReg addr_reg, data_reg, data_reg2; + bool bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, tlb_offset; - TCGReg argreg; -# if TARGET_LONG_BITS == 64 - int addr_reg2; -# endif - uint32_t *label_ptr; + int mem_index, s_bits; + TCGReg addr_reg2; + uint8_t *label_ptr; #endif - #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; #else bswap = 0; #endif + data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; /* suppress warning */ + data_reg2 = (opc == 3 ? *args++ : 0); addr_reg = *args++; #ifdef CONFIG_SOFTMMU -# if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -# endif + addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; s_bits = opc & 3; - /* Should generate something like the following: - * shr r8, addr_reg, #TARGET_PAGE_BITS - * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8 - * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS - */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - TCG_REG_R8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_imm(s, COND_AL, ARITH_AND, - TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, - TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); - /* We assume that the offset is contained within 20 bits. */ - tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - assert((tlb_offset & ~0xfffff) == 0); - if (tlb_offset > 0xfff) { - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, - 0xa00 | (tlb_offset >> 12)); - tlb_offset &= 0xfff; - } - tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, - TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - /* Check alignment. */ - if (s_bits) - tcg_out_dat_imm(s, COND_EQ, ARITH_TST, - 0, addr_reg, (1 << s_bits) - 1); -# if TARGET_LONG_BITS == 64 - /* XXX: possibly we could use a block data load in the first access. */ - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); -# endif - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, + tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, + offsetof(CPUArchState, + tlb_table[mem_index][0].addr_write)); + + label_ptr = s->code_ptr; + tcg_out_b_noaddr(s, COND_NE); + + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write)); switch (opc) { case 0: - tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 1: if (bswap) { - tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st16_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { - tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st16_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 2: default: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { - tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 3: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2); - tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg); - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2); + tcg_out_st32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R1, addr_reg); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R1, 4); } else { - tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); - tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + tcg_out_st32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); + tcg_out_st32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); } break; } - label_ptr = (void *) s->code_ptr; - tcg_out_b_noaddr(s, COND_EQ); - - /* TODO: move this code to where the constants pool will be */ - /* Note that this code relies on the constraints we set in arm_op_defs[] - * to ensure that later arguments are not passed to us in registers we - * trash by moving the earlier arguments into them. - */ - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); -#if TARGET_LONG_BITS == 64 - argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2); -#else - argreg = tcg_out_arg_reg32(s, argreg, addr_reg); -#endif - - switch (opc) { - case 0: - argreg = tcg_out_arg_reg8(s, argreg, data_reg); - break; - case 1: - argreg = tcg_out_arg_reg16(s, argreg, data_reg); - break; - case 2: - argreg = tcg_out_arg_reg32(s, argreg, data_reg); - break; - case 3: - argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, mem_index); - tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]); - tcg_out_arg_stacktidy(s, argreg); - - reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2, + mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1506,6 +1606,7 @@ static uint8_t *tb_ret_addr; static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { + TCGArg a0, a1, a2, a3, a4, a5; int c; switch (opc) { @@ -1598,23 +1699,36 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, /* Constraints mean that v2 is always in the same register as dest, * so we only need to do "if condition passed, move v1 to dest". */ - tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]], - ARITH_MOV, args[0], 0, args[3], const_args[3]); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, + ARITH_MVN, args[0], 0, args[3], const_args[3]); break; case INDEX_op_add_i32: - c = ARITH_ADD; - goto gen_arith; + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, + args[0], args[1], args[2], const_args[2]); + break; case INDEX_op_sub_i32: - c = ARITH_SUB; - goto gen_arith; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); + } else { + tcg_out_dat_rI(s, COND_AL, ARITH_RSB, + args[0], args[2], args[1], 1); + } + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, + args[0], args[1], args[2], const_args[2]); + } + break; case INDEX_op_and_i32: - c = ARITH_AND; - goto gen_arith; + tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, + args[0], args[1], args[2], const_args[2]); + break; case INDEX_op_andc_i32: - c = ARITH_BIC; - goto gen_arith; + tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, + args[0], args[1], args[2], const_args[2]); + break; case INDEX_op_or_i32: c = ARITH_ORR; goto gen_arith; @@ -1625,14 +1739,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_add2_i32: - tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC, - args[0], args[1], args[2], args[3], - args[4], args[5], SHIFT_IMM_LSL(0)); + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if (a0 == a3 || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, + a0, a2, a4, const_args[4]); + tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, + a1, a3, a5, const_args[5]); + tcg_out_mov_reg(s, COND_AL, args[0], a0); break; case INDEX_op_sub2_i32: - tcg_out_dat_reg2(s, COND_AL, ARITH_SUB, ARITH_SBC, - args[0], args[1], args[2], args[3], - args[4], args[5], SHIFT_IMM_LSL(0)); + a0 = args[0], a1 = args[1], a2 = args[2]; + a3 = args[3], a4 = args[4], a5 = args[5]; + if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { + a0 = TCG_REG_TMP; + } + if (const_args[2]) { + if (const_args[4]) { + tcg_out_movi32(s, COND_AL, a0, a4); + a4 = a0; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); + } else { + tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, + ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); + } + if (const_args[3]) { + if (const_args[5]) { + tcg_out_movi32(s, COND_AL, a1, a5); + a5 = a1; + } + tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); + } else { + tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, + a1, a3, a5, const_args[5]); + } + tcg_out_mov_reg(s, COND_AL, args[0], a0); break; case INDEX_op_neg_i32: tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); @@ -1678,14 +1822,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : SHIFT_IMM_LSL(0)); } else { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_R8, args[1], 0x20); + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20); tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - SHIFT_REG_ROR(TCG_REG_R8)); + SHIFT_REG_ROR(TCG_REG_TMP)); } break; case INDEX_op_brcond_i32: - tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, args[0], args[1], const_args[1]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]); break; @@ -1698,15 +1842,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3, */ - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[1], args[3], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - args[0], args[2], SHIFT_IMM_LSL(0)); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[0], args[2], const_args[2]); tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]); break; case INDEX_op_setcond_i32: - tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, - args[1], args[2], const_args[2]); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[1], args[2], const_args[2]); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], ARITH_MOV, args[0], 0, 1); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], @@ -1714,10 +1858,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_setcond2_i32: /* See brcond2_i32 comment */ - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[2], args[4], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, - args[1], args[3], SHIFT_IMM_LSL(0)); + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, + args[2], args[4], const_args[4]); + tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, + args[1], args[3], const_args[3]); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, args[0], 0, 1); tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], @@ -1773,11 +1917,51 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ext16u(s, COND_AL, args[0], args[1]); break; + case INDEX_op_deposit_i32: + tcg_out_deposit(s, COND_AL, args[0], args[2], + args[3], args[4], const_args[2]); + break; + + case INDEX_op_div_i32: + tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_divu_i32: + tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); + break; + case INDEX_op_rem_i32: + tcg_out_sdiv(s, COND_AL, TCG_REG_TMP, args[1], args[2]); + tcg_out_mul32(s, COND_AL, TCG_REG_TMP, TCG_REG_TMP, args[2]); + tcg_out_dat_reg(s, COND_AL, ARITH_SUB, args[0], args[1], TCG_REG_TMP, + SHIFT_IMM_LSL(0)); + break; + case INDEX_op_remu_i32: + tcg_out_udiv(s, COND_AL, TCG_REG_TMP, args[1], args[2]); + tcg_out_mul32(s, COND_AL, TCG_REG_TMP, TCG_REG_TMP, args[2]); + tcg_out_dat_reg(s, COND_AL, ARITH_SUB, args[0], args[1], TCG_REG_TMP, + SHIFT_IMM_LSL(0)); + break; + default: tcg_abort(); } } +#ifdef CONFIG_SOFTMMU +/* Generate TB finalization at the end of block. */ +void tcg_out_tb_finalize(TCGContext *s) +{ + int i; + for (i = 0; i < s->nb_qemu_ldst_labels; i++) { + TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; + if (label->is_ld) { + tcg_out_qemu_ld_slow_path(s, label); + } else { + tcg_out_qemu_st_slow_path(s, label); + } + } +} +#endif /* SOFTMMU */ + static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, @@ -1797,13 +1981,13 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_st_i32, { "r", "r" } }, /* TODO: "r", "r", "ri" */ - { INDEX_op_add_i32, { "r", "r", "rI" } }, - { INDEX_op_sub_i32, { "r", "r", "rI" } }, + { INDEX_op_add_i32, { "r", "r", "rIN" } }, + { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, { INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "rI" } }, - { INDEX_op_andc_i32, { "r", "r", "rI" } }, + { INDEX_op_and_i32, { "r", "r", "rIK" } }, + { INDEX_op_andc_i32, { "r", "r", "rIK" } }, { INDEX_op_or_i32, { "r", "r", "rI" } }, { INDEX_op_xor_i32, { "r", "r", "rI" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1815,15 +1999,14 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_rotl_i32, { "r", "r", "ri" } }, { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - { INDEX_op_brcond_i32, { "r", "rI" } }, - { INDEX_op_setcond_i32, { "r", "r", "rI" } }, - { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } }, + { INDEX_op_brcond_i32, { "r", "rIN" } }, + { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, + { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, - /* TODO: "r", "r", "r", "r", "ri", "ri" */ - { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } }, - { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } }, - { INDEX_op_brcond2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, + { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld8u, { "r", "l" } }, @@ -1836,7 +2019,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st8, { "s", "s" } }, { INDEX_op_qemu_st16, { "s", "s" } }, { INDEX_op_qemu_st32, { "s", "s" } }, - { INDEX_op_qemu_st64, { "S", "S", "s" } }, + { INDEX_op_qemu_st64, { "s", "s", "s" } }, #else { INDEX_op_qemu_ld8u, { "r", "l", "l" } }, { INDEX_op_qemu_ld8s, { "r", "l", "l" } }, @@ -1848,7 +2031,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st8, { "s", "s", "s" } }, { INDEX_op_qemu_st16, { "s", "s", "s" } }, { INDEX_op_qemu_st32, { "s", "s", "s" } }, - { INDEX_op_qemu_st64, { "S", "S", "s", "s" } }, + { INDEX_op_qemu_st64, { "s", "s", "s", "s" } }, #endif { INDEX_op_bswap16_i32, { "r", "r" } }, @@ -1858,6 +2041,15 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_ext16s_i32, { "r", "r" } }, { INDEX_op_ext16u_i32, { "r", "r" } }, + { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + +#if TCG_TARGET_HAS_div_i32 + { INDEX_op_div_i32, { "r", "r", "r" } }, + { INDEX_op_rem_i32, { "r", "r", "r" } }, + { INDEX_op_divu_i32, { "r", "r", "r" } }, + { INDEX_op_remu_i32, { "r", "r", "r" } }, +#endif + { -1 }, }; @@ -1880,12 +2072,10 @@ static void tcg_target_init(TCGContext *s) tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R8); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); tcg_add_target_add_op_defs(arm_op_defs); - tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf), - CPU_TEMP_BUF_NLONGS * sizeof(long)); } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, @@ -1914,18 +2104,33 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_target_qemu_prologue(TCGContext *s) { - /* Calling convention requires us to save r4-r11 and lr; - * save also r12 to maintain stack 8-alignment. - */ + int frame_size; + + /* Calling convention requires us to save r4-r11 and lr. */ + /* stmdb sp!, { r4 - r11, lr } */ + tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); - /* stmdb sp!, { r4 - r12, lr } */ - tcg_out32(s, (COND_AL << 28) | 0x092d5ff0); + /* Allocate the local stack frame. */ + frame_size = TCG_STATIC_CALL_ARGS_SIZE; + frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long); + /* We saved an odd number of registers above; keep an 8 aligned stack. */ + frame_size = ((frame_size + TCG_TARGET_STACK_ALIGN - 1) + & -TCG_TARGET_STACK_ALIGN) + 4; + + tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, frame_size, 1); + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + CPU_TEMP_BUF_NLONGS * sizeof(long)); tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); tb_ret_addr = s->code_ptr; - /* ldmia sp!, { r4 - r12, pc } */ - tcg_out32(s, (COND_AL << 28) | 0x08bd9ff0); + /* Epilogue. We branch here via tb_ret_addr. */ + tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, + TCG_REG_CALL_STACK, frame_size, 1); + + /* ldmia sp!, { r4 - r11, pc } */ + tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index b6eed1f3f4..3be41cce3c 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -49,8 +49,6 @@ typedef enum { #define TCG_TARGET_NB_REGS 16 -#define TCG_CT_CONST_ARM 0x100 - /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R13 #define TCG_TARGET_STACK_ALIGN 8 @@ -58,7 +56,6 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 0 /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 0 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */ @@ -73,10 +70,19 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#ifdef __ARM_ARCH_EXT_IDIV__ +#define TCG_TARGET_HAS_div_i32 1 +#else +#define TCG_TARGET_HAS_div_i32 0 +#endif + +extern bool tcg_target_deposit_valid(int ofs, int len); +#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid + enum { TCG_AREG0 = TCG_REG_R6, }; diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d70b2eba33..94f6043786 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -2188,9 +2188,9 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, #if TCG_TARGET_REG_BITS == 32 if (ofs >= 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_LOW(arg2), ofs - 32, len); + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); return; } if (ofs + len <= 32) { @@ -268,6 +268,16 @@ void tcg_prologue_init(TCGContext *s) tcg_target_qemu_prologue(s); flush_icache_range((tcg_target_ulong)s->code_buf, (tcg_target_ulong)s->code_ptr); + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { + size_t size = s->code_ptr - s->code_buf; + qemu_log("PROLOGUE: [size=%zu]\n", size); + log_disas(s->code_buf, size); + qemu_log("\n"); + qemu_log_flush(); + } +#endif } void tcg_set_frame(TCGContext *s, int reg, diff --git a/translate-all.c b/translate-all.c index d04a1162cd..da93608f03 100644 --- a/translate-all.c +++ b/translate-all.c @@ -55,6 +55,7 @@ #else #include "exec/address-spaces.h" #endif +#include "qemu/timer.h" #include "exec/cputlb.h" #include "translate-all.h" |