target/xtensa: sort FLIX instruction opcodes

Opcodes in different slots may read and write same resources (registers, states). In the absence of resource dependency loops it must be possible to sort opcodes to avoid interference. Record resources used by each opcode in the bundle. Build opcode dependency graph and use topological sort to order its nodes. In case of success translate opcodes in sort order. In case of failure report and raise invalid opcode exception. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
author: Max Filippov <jcmvbkbc@gmail.com> 2019-01-29 19:21:10 -0800
committer: Max Filippov <jcmvbkbc@gmail.com> 2019-02-28 04:43:15 -0800
commit: 20e9fd0fc0634de24ee2763d769e3780e369afdd (patch)
tree: a9d58f7b053dde736db607776c4fa9c771a1821b /target
parent: fa6bc73c8b46782bf07dadcac908405cf19b8ab8 (diff)
2 files changed, 221 insertions, 8 deletions
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index db8ee70a03..c59f79e563 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -373,6 +373,8 @@ enum {
     XTENSA_OP_EXIT_TB_0 = 0x800,
 
     XTENSA_OP_NAME_ARRAY = 0x8000,
+
+    XTENSA_OP_CONTROL_FLOW = 0x10000,
 };
 
 typedef struct XtensaOpcodeOps {
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index e8dddd32b3..bda4e9469b 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -846,6 +846,157 @@ static inline unsigned xtensa_op0_insn_len(DisasContext *dc, uint8_t op0)
     return xtensa_isa_length_from_chars(dc->config->isa, &op0);
 }
 
+struct opcode_arg_info {
+    uint32_t resource;
+    int index;
+};
+
+struct slot_prop {
+    XtensaOpcodeOps *ops;
+    uint32_t arg[MAX_OPCODE_ARGS];
+    uint32_t raw_arg[MAX_OPCODE_ARGS];
+    struct opcode_arg_info in[MAX_OPCODE_ARGS];
+    struct opcode_arg_info out[MAX_OPCODE_ARGS];
+    unsigned n_in;
+    unsigned n_out;
+    uint32_t op_flags;
+};
+
+enum resource_type {
+    RES_REGFILE,
+    RES_STATE,
+    RES_MAX,
+};
+
+static uint32_t encode_resource(enum resource_type r, unsigned g, unsigned n)
+{
+    assert(r < RES_MAX && g < 256 && n < 65536);
+    return (r << 24) | (g << 16) | n;
+}
+
+/*
+ * a depends on b if b must be executed before a,
+ * because a's side effects will destroy b's inputs.
+ */
+static bool op_depends_on(const struct slot_prop *a,
+                          const struct slot_prop *b)
+{
+    unsigned i = 0;
+    unsigned j = 0;
+
+    if (a->op_flags & XTENSA_OP_CONTROL_FLOW) {
+        return true;
+    }
+    while (i < a->n_out && j < b->n_in) {
+        if (a->out[i].resource < b->in[j].resource) {
+            ++i;
+        } else if (a->out[i].resource > b->in[j].resource) {
+            ++j;
+        } else {
+            return true;
+        }
+    }
+    return false;
+}
+
+/*
+ * Calculate evaluation order for slot opcodes.
+ * Build opcode order graph and output its nodes in topological sort order.
+ * An edge a -> b in the graph means that opcode a must be followed by
+ * opcode b.
+ */
+static bool tsort(struct slot_prop *slot,
+                  struct slot_prop *sorted[],
+                  unsigned n)
+{
+    struct tsnode {
+        unsigned n_in_edge;
+        unsigned n_out_edge;
+        unsigned out_edge[MAX_INSN_SLOTS];
+    } node[MAX_INSN_SLOTS];
+
+    unsigned in[MAX_INSN_SLOTS];
+    unsigned i, j;
+    unsigned n_in = 0;
+    unsigned n_out = 0;
+    unsigned n_edge = 0;
+    unsigned in_idx;
+
+    for (i = 0; i < n; ++i) {
+        node[i].n_in_edge = 0;
+        node[i].n_out_edge = 0;
+    }
+
+    for (i = 0; i < n; ++i) {
+        unsigned n_out_edge = 0;
+
+        for (j = 0; j < n; ++j) {
+            if (i != j && op_depends_on(slot + j, slot + i)) {
+                node[i].out_edge[n_out_edge] = j;
+                ++node[j].n_in_edge;
+                ++n_out_edge;
+                ++n_edge;
+            }
+        }
+        node[i].n_out_edge = n_out_edge;
+    }
+
+    for (i = 0; i < n; ++i) {
+        if (!node[i].n_in_edge) {
+            in[n_in] = i;
+            ++n_in;
+        }
+    }
+
+    for (in_idx = 0; in_idx < n_in; ++in_idx) {
+        i = in[in_idx];
+        sorted[n_out] = slot + i;
+        ++n_out;
+        for (j = 0; j < node[i].n_out_edge; ++j) {
+            --n_edge;
+            if (--node[node[i].out_edge[j]].n_in_edge == 0) {
+                in[n_in] = node[i].out_edge[j];
+                ++n_in;
+            }
+        }
+    }
+    return n_edge == 0;
+}
+
+static void opcode_add_resource(struct slot_prop *op,
+                                uint32_t resource, char direction,
+                                int index)
+{
+    switch (direction) {
+    case 'm':
+    case 'i':
+        assert(op->n_in < ARRAY_SIZE(op->in));
+        op->in[op->n_in].resource = resource;
+        op->in[op->n_in].index = index;
+        ++op->n_in;
+        /* fall through */
+    case 'o':
+        if (direction == 'm' || direction == 'o') {
+            assert(op->n_out < ARRAY_SIZE(op->out));
+            op->out[op->n_out].resource = resource;
+            op->out[op->n_out].index = index;
+            ++op->n_out;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static int resource_compare(const void *a, const void *b)
+{
+    const struct opcode_arg_info *pa = a;
+    const struct opcode_arg_info *pb = b;
+
+    return pa->resource < pb->resource ?
+        -1 : (pa->resource > pb->resource ? 1 : 0);
+}
+
 static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
 {
     xtensa_isa isa = dc->config->isa;
@@ -855,11 +1006,8 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
     int slot, slots;
     unsigned i;
     uint32_t op_flags = 0;
-    struct {
-        XtensaOpcodeOps *ops;
-        uint32_t arg[MAX_OPCODE_ARGS];
-        uint32_t raw_arg[MAX_OPCODE_ARGS];
-    } slot_prop[MAX_INSN_SLOTS];
+    struct slot_prop slot_prop[MAX_INSN_SLOTS];
+    struct slot_prop *ordered[MAX_INSN_SLOTS];
     uint32_t debug_cause = 0;
     uint32_t windowed_register = 0;
     uint32_t coprocessor = 0;
@@ -954,6 +1102,68 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
             windowed_register |= ops->test_overflow(dc, arg, ops->par);
         }
         coprocessor |= ops->coprocessor;
+
+        if (slots > 1) {
+            slot_prop[slot].n_in = 0;
+            slot_prop[slot].n_out = 0;
+            slot_prop[slot].op_flags = 0;
+
+            opnds = xtensa_opcode_num_operands(isa, opc);
+
+            for (opnd = vopnd = 0; opnd < opnds; ++opnd) {
+                bool visible = xtensa_operand_is_visible(isa, opc, opnd);
+
+                if (xtensa_operand_is_register(isa, opc, opnd)) {
+                    xtensa_regfile rf = xtensa_operand_regfile(isa, opc, opnd);
+                    uint32_t v = 0;
+
+                    xtensa_operand_get_field(isa, opc, opnd, fmt, slot,
+                                             dc->slotbuf, &v);
+                    xtensa_operand_decode(isa, opc, opnd, &v);
+                    opcode_add_resource(slot_prop + slot,
+                                        encode_resource(RES_REGFILE, rf, v),
+                                        xtensa_operand_inout(isa, opc, opnd),
+                                        visible ? vopnd : -1);
+                }
+                if (visible) {
+                    ++vopnd;
+                }
+            }
+
+            opnds = xtensa_opcode_num_stateOperands(isa, opc);
+
+            for (opnd = 0; opnd < opnds; ++opnd) {
+                xtensa_state state = xtensa_stateOperand_state(isa, opc, opnd);
+
+                opcode_add_resource(slot_prop + slot,
+                                    encode_resource(RES_STATE, 0, state),
+                                    xtensa_stateOperand_inout(isa, opc, opnd),
+                                    -1);
+            }
+            if (xtensa_opcode_is_branch(isa, opc) ||
+                xtensa_opcode_is_jump(isa, opc) ||
+                xtensa_opcode_is_loop(isa, opc) ||
+                xtensa_opcode_is_call(isa, opc)) {
+                slot_prop[slot].op_flags |= XTENSA_OP_CONTROL_FLOW;
+            }
+
+            qsort(slot_prop[slot].in, slot_prop[slot].n_in,
+                  sizeof(slot_prop[slot].in[0]), resource_compare);
+            qsort(slot_prop[slot].out, slot_prop[slot].n_out,
+                  sizeof(slot_prop[slot].out[0]), resource_compare);
+        }
+    }
+
+    if (slots > 1) {
+        if (!tsort(slot_prop, ordered, slots)) {
+            qemu_log_mask(LOG_UNIMP,
+                          "Circular resource dependencies (pc = %08x)\n",
+                          dc->pc);
+            gen_exception_cause(dc, ILLEGAL_INSTRUCTION_CAUSE);
+            return;
+        }
+    } else {
+        ordered[0] = slot_prop + 0;
     }
 
     if ((op_flags & XTENSA_OP_PRIVILEGED) &&
@@ -1002,10 +1212,11 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
     }
 
     for (slot = 0; slot < slots; ++slot) {
-        XtensaOpcodeOps *ops = slot_prop[slot].ops;
+        struct slot_prop *pslot = ordered[slot];
+        XtensaOpcodeOps *ops = pslot->ops;
 
-        dc->raw_arg = slot_prop[slot].raw_arg;
-        ops->translate(dc, slot_prop[slot].arg, ops->par);
+        dc->raw_arg = pslot->raw_arg;
+        ops->translate(dc, pslot->arg, ops->par);
     }
 
     if (dc->base.is_jmp == DISAS_NEXT) {
author	Max Filippov <jcmvbkbc@gmail.com>	2019-01-29 19:21:10 -0800
committer	Max Filippov <jcmvbkbc@gmail.com>	2019-02-28 04:43:15 -0800
commit	20e9fd0fc0634de24ee2763d769e3780e369afdd (patch)
tree	a9d58f7b053dde736db607776c4fa9c771a1821b /target
parent	fa6bc73c8b46782bf07dadcac908405cf19b8ab8 (diff)