aboutsummaryrefslogtreecommitdiff
path: root/tcg/tci
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/tci')
-rw-r--r--tcg/tci/README20
-rw-r--r--tcg/tci/tcg-target.c.inc377
-rw-r--r--tcg/tci/tcg-target.h3
3 files changed, 176 insertions, 224 deletions
diff --git a/tcg/tci/README b/tcg/tci/README
index 9bb7d7a5d3..f72a40a395 100644
--- a/tcg/tci/README
+++ b/tcg/tci/README
@@ -23,10 +23,12 @@ This is what TCI (Tiny Code Interpreter) does.
Like each TCG host frontend, TCI implements the code generator in
tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci.
-The additional file tcg/tci.c adds the interpreter.
+The additional file tcg/tci.c adds the interpreter and disassembler.
-The bytecode consists of opcodes (same numeric values as those used by
-TCG), command length and arguments of variable size and number.
+The bytecode consists of opcodes (with only a few exceptions, with
+the same same numeric values and semantics as used by TCG), and up
+to six arguments packed into a 32-bit integer. See comments in tci.c
+for details on the encoding.
3) Usage
@@ -39,11 +41,6 @@ suggest using this option. Setting it automatically would need
additional code in configure which must be fixed when new native TCG
implementations are added.
-System emulation should work on any 32 or 64 bit host.
-User mode emulation might work. Maybe a new linker script (*.ld)
-is needed. Byte order might be wrong (on big endian hosts)
-and need fixes in configure.
-
For hosts with native TCG, the interpreter TCI can be enabled by
configure --enable-tcg-interpreter
@@ -118,13 +115,6 @@ u1 = linux-user-test works
in the interpreter. These opcodes raise a runtime exception, so it is
possible to see where code must be added.
-* The pseudo code is not optimized and still ugly. For hosts with special
- alignment requirements, it needs some fixes (maybe aligned bytecode
- would also improve speed for hosts which support byte alignment).
-
-* A better disassembler for the pseudo code would be nice (a very primitive
- disassembler is included in tcg-target.c.inc).
-
* It might be useful to have a runtime option which selects the native TCG
or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
is a configure option, so you need two compilations of QEMU.
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index a1b5177abb..f74328dcbd 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -22,20 +22,7 @@
* THE SOFTWARE.
*/
-/* TODO list:
- * - See TODO comments in code.
- */
-
-/* Marker for missing code. */
-#define TODO() \
- do { \
- fprintf(stderr, "TODO %s:%u: %s()\n", \
- __FILE__, __LINE__, __func__); \
- tcg_abort(); \
- } while (0)
-
-/* Bitfield n...m (in 32 bit value). */
-#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
+#include "../tcg-pool.c.inc"
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
{
@@ -226,52 +213,16 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
- /* tcg_out_reloc always uses the same type, addend. */
- tcg_debug_assert(type == sizeof(tcg_target_long));
- tcg_debug_assert(addend == 0);
- tcg_debug_assert(value != 0);
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_patch32(code_ptr, value);
- } else {
- tcg_patch64(code_ptr, value);
- }
- return true;
-}
+ intptr_t diff = value - (intptr_t)(code_ptr + 1);
-/* Write value (native size). */
-static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
-{
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_out32(s, v);
- } else {
- tcg_out64(s, v);
- }
-}
-
-/* Write opcode. */
-static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
-{
- tcg_out8(s, op);
- tcg_out8(s, 0);
-}
-
-/* Write register. */
-static void tcg_out_r(TCGContext *s, TCGArg t0)
-{
- tcg_debug_assert(t0 < TCG_TARGET_NB_REGS);
- tcg_out8(s, t0);
-}
+ tcg_debug_assert(addend == 0);
+ tcg_debug_assert(type == 20);
-/* Write label. */
-static void tci_out_label(TCGContext *s, TCGLabel *label)
-{
- if (label->has_value) {
- tcg_out_i(s, label->u.value);
- tcg_debug_assert(label->u.value);
- } else {
- tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
- s->code_ptr += sizeof(tcg_target_ulong);
+ if (diff == sextract32(diff, 0, type)) {
+ tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff));
+ return true;
}
+ return false;
}
static void stack_bounds_check(TCGReg base, target_long offset)
@@ -285,239 +236,236 @@ static void stack_bounds_check(TCGReg base, target_long offset)
static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tci_out_label(s, l0);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_out_reloc(s, s->code_ptr, 20, l0, 0);
+ insn = deposit32(insn, 0, 8, op);
+ tcg_out32(s, insn);
}
static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
+ intptr_t diff;
- tcg_out_op_t(s, op);
- tcg_out_i(s, (uintptr_t)p0);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ /* Special case for exit_tb: map null -> 0. */
+ if (p0 == NULL) {
+ diff = 0;
+ } else {
+ diff = p0 - (void *)(s->code_ptr + 1);
+ tcg_debug_assert(diff != 0);
+ if (diff != sextract32(diff, 0, 20)) {
+ tcg_raise_tb_overflow(s);
+ }
+ }
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 12, 20, diff);
+ tcg_out32(s, insn);
}
static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_out32(s, (uint8_t)op);
}
static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out32(s, i1);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_op_rI(TCGContext *s, TCGOpcode op,
- TCGReg r0, uint64_t i1)
-{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out64(s, i1);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_debug_assert(i1 == sextract32(i1, 0, 20));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 20, i1);
+ tcg_out32(s, insn);
}
-#endif
static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tci_out_label(s, l1);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_out_reloc(s, s->code_ptr, 20, l1, 0);
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGArg m2)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out32(s, m2);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_debug_assert(m2 == extract32(m2, 0, 12));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 20, 12, m2);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, intptr_t i2)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_debug_assert(i2 == (int32_t)i2);
- tcg_out32(s, i2);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_debug_assert(i2 == sextract32(i2, 0, 16));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 16, i2);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out8(s, c3);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, c3);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out32(s, m3);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_debug_assert(m3 == extract32(m3, 0, 12));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 12, m3);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out8(s, b3);
- tcg_out8(s, b4);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ tcg_debug_assert(b3 == extract32(b3, 0, 6));
+ tcg_debug_assert(b4 == extract32(b4, 0, 6));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 6, b3);
+ insn = deposit32(insn, 26, 6, b4);
+ tcg_out32(s, insn);
}
-static void tcg_out_op_rrrrm(TCGContext *s, TCGOpcode op, TCGReg r0,
- TCGReg r1, TCGReg r2, TCGReg r3, TCGArg m4)
+static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
+ TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out_r(s, r3);
- tcg_out32(s, m4);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ insn = deposit32(insn, 24, 4, r4);
+ tcg_out32(s, insn);
}
#if TCG_TARGET_REG_BITS == 32
static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out_r(s, r3);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGCond c5)
{
- uint8_t *old_code_ptr = s->code_ptr;
-
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out_r(s, r3);
- tcg_out_r(s, r4);
- tcg_out8(s, c5);
+ tcg_insn_unit insn = 0;
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ insn = deposit32(insn, 24, 4, r4);
+ insn = deposit32(insn, 28, 4, c5);
+ tcg_out32(s, insn);
}
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGReg r5)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
- tcg_out_op_t(s, op);
- tcg_out_r(s, r0);
- tcg_out_r(s, r1);
- tcg_out_r(s, r2);
- tcg_out_r(s, r3);
- tcg_out_r(s, r4);
- tcg_out_r(s, r5);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ insn = deposit32(insn, 24, 4, r4);
+ insn = deposit32(insn, 28, 4, r5);
+ tcg_out32(s, insn);
}
#endif
+static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
+ TCGReg base, intptr_t offset)
+{
+ stack_bounds_check(base, offset);
+ if (offset != sextract32(offset, 0, 16)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
+ tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32
+ ? INDEX_op_add_i32 : INDEX_op_add_i64),
+ TCG_REG_TMP, TCG_REG_TMP, base);
+ base = TCG_REG_TMP;
+ offset = 0;
+ }
+ tcg_out_op_rrs(s, op, val, base, offset);
+}
+
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
intptr_t offset)
{
- stack_bounds_check(base, offset);
switch (type) {
case TCG_TYPE_I32:
- tcg_out_op_rrs(s, INDEX_op_ld_i32, val, base, offset);
+ tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset);
break;
#if TCG_TARGET_REG_BITS == 64
case TCG_TYPE_I64:
- tcg_out_op_rrs(s, INDEX_op_ld_i64, val, base, offset);
+ tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset);
break;
#endif
default:
@@ -547,22 +495,32 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
{
switch (type) {
case TCG_TYPE_I32:
- tcg_out_op_ri(s, INDEX_op_tci_movi_i32, ret, arg);
- break;
#if TCG_TARGET_REG_BITS == 64
+ arg = (int32_t)arg;
+ /* fall through */
case TCG_TYPE_I64:
- tcg_out_op_rI(s, INDEX_op_tci_movi_i64, ret, arg);
- break;
#endif
+ break;
default:
g_assert_not_reached();
}
+
+ if (arg == sextract32(arg, 0, 20)) {
+ tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg);
+ } else {
+ tcg_insn_unit insn = 0;
+
+ new_pool_label(s, arg, 20, s->code_ptr, 0);
+ insn = deposit32(insn, 0, 8, INDEX_op_tci_movl);
+ insn = deposit32(insn, 8, 4, ret);
+ tcg_out32(s, insn);
+ }
}
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
ffi_cif *cif)
{
- uint8_t *old_code_ptr = s->code_ptr;
+ tcg_insn_unit insn = 0;
uint8_t which;
if (cif->rtype == &ffi_type_void) {
@@ -573,12 +531,10 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
tcg_debug_assert(cif->rtype->size == 8);
which = 2;
}
- tcg_out_op_t(s, INDEX_op_call);
- tcg_out8(s, which);
- tcg_out_i(s, (uintptr_t)func);
- tcg_out_i(s, (uintptr_t)cif);
-
- old_code_ptr[1] = s->code_ptr - old_code_ptr;
+ new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
+ insn = deposit32(insn, 0, 8, INDEX_op_call);
+ insn = deposit32(insn, 8, 4, which);
+ tcg_out32(s, insn);
}
#if TCG_TARGET_REG_BITS == 64
@@ -637,8 +593,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_i32:
CASE_64(st32)
CASE_64(st)
- stack_bounds_check(args[1], args[2]);
- tcg_out_op_rrs(s, opc, args[0], args[1], args[2]);
+ tcg_out_ldst(s, opc, args[0], args[1], args[2]);
break;
CASE_32_64(add)
@@ -731,8 +686,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
} else {
- tcg_out_op_rrrrm(s, opc, args[0], args[1],
- args[2], args[3], args[4]);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
+ tcg_out_op_rrrrr(s, opc, args[0], args[1],
+ args[2], args[3], TCG_REG_TMP);
}
break;
@@ -778,6 +734,11 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
return ct & TCG_CT_CONST;
}
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ memset(p, 0, sizeof(*p) * count);
+}
+
static void tcg_target_init(TCGContext *s)
{
#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 80cafb7d18..c9cbe505a7 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -41,7 +41,7 @@
#define TCG_TARGET_H
#define TCG_TARGET_INTERPRETER 1
-#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
@@ -166,6 +166,7 @@ typedef enum {
#define TCG_TARGET_STACK_ALIGN 8
#define HAVE_TCG_QEMU_TB_EXEC
+#define TCG_TARGET_NEED_POOL_LABELS
/* We could notice __i386__ or __s390x__ and reduce the barriers depending
on the host. But if you want performance, you use the normal backend.