aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/tcg/hexagon/hvx_histogram.c88
-rw-r--r--tests/tcg/hexagon/hvx_histogram_input.h717
-rw-r--r--tests/tcg/hexagon/hvx_histogram_row.S294
-rw-r--r--tests/tcg/hexagon/hvx_histogram_row.h24
-rw-r--r--tests/tcg/hexagon/hvx_misc.c469
-rw-r--r--tests/tcg/hexagon/scatter_gather.c1011
-rw-r--r--tests/tcg/hexagon/vector_add_int.c61
7 files changed, 2664 insertions, 0 deletions
diff --git a/tests/tcg/hexagon/hvx_histogram.c b/tests/tcg/hexagon/hvx_histogram.c
new file mode 100644
index 0000000000..43377a9abb
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include "hvx_histogram_row.h"
+
+const int vector_len = 128;
+const int width = 275;
+const int height = 20;
+const int stride = (width + vector_len - 1) & -vector_len;
+
+int err;
+
+static uint8_t input[height][stride] __attribute__((aligned(128))) = {
+#include "hvx_histogram_input.h"
+};
+
+static int result[256] __attribute__((aligned(128)));
+static int expect[256] __attribute__((aligned(128)));
+
+static void check(void)
+{
+ for (int i = 0; i < 256; i++) {
+ int res = result[i];
+ int exp = expect[i];
+ if (res != exp) {
+ printf("ERROR at %3d: 0x%04x != 0x%04x\n",
+ i, res, exp);
+ err++;
+ }
+ }
+}
+
+static void ref_histogram(uint8_t *src, int stride, int width, int height,
+ int *hist)
+{
+ for (int i = 0; i < 256; i++) {
+ hist[i] = 0;
+ }
+
+ for (int i = 0; i < height; i++) {
+ for (int j = 0; j < width; j++) {
+ hist[src[i * stride + j]]++;
+ }
+ }
+}
+
+static void hvx_histogram(uint8_t *src, int stride, int width, int height,
+ int *hist)
+{
+ int n = 8192 / width;
+
+ for (int i = 0; i < 256; i++) {
+ hist[i] = 0;
+ }
+
+ for (int i = 0; i < height; i += n) {
+ int k = height - i > n ? n : height - i;
+ hvx_histogram_row(src, stride, width, k, hist);
+ src += n * stride;
+ }
+}
+
+int main()
+{
+ ref_histogram(&input[0][0], stride, width, height, expect);
+ hvx_histogram(&input[0][0], stride, width, height, result);
+ check();
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/hvx_histogram_input.h b/tests/tcg/hexagon/hvx_histogram_input.h
new file mode 100644
index 0000000000..2f9109255e
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram_input.h
@@ -0,0 +1,717 @@
+/*
+ * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+ { 0x26, 0x32, 0x2e, 0x2e, 0x2d, 0x2c, 0x2d, 0x2d,
+ 0x2c, 0x2e, 0x31, 0x33, 0x36, 0x39, 0x3b, 0x3f,
+ 0x42, 0x46, 0x4a, 0x4c, 0x51, 0x53, 0x53, 0x54,
+ 0x56, 0x57, 0x58, 0x57, 0x56, 0x52, 0x51, 0x4f,
+ 0x4c, 0x49, 0x47, 0x42, 0x3e, 0x3b, 0x38, 0x35,
+ 0x33, 0x30, 0x2e, 0x2c, 0x2b, 0x2a, 0x2a, 0x28,
+ 0x28, 0x27, 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2e,
+ 0x2f, 0x33, 0x36, 0x38, 0x3c, 0x3d, 0x40, 0x42,
+ 0x43, 0x42, 0x43, 0x44, 0x43, 0x41, 0x40, 0x3b,
+ 0x3b, 0x3a, 0x38, 0x35, 0x32, 0x2f, 0x2c, 0x29,
+ 0x27, 0x26, 0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x19,
+ 0x17, 0x15, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+ 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+ 0x0c, 0x0d, 0x0e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x0d, 0x0c, 0x0f, 0x0e, 0x0f, 0x0f,
+ 0x0f, 0x10, 0x11, 0x12, 0x14, 0x16, 0x17, 0x19,
+ 0x1c, 0x1d, 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2f,
+ 0x31, 0x33, 0x36, 0x38, 0x39, 0x3a, 0x3b, 0x3c,
+ 0x3c, 0x3d, 0x3e, 0x3e, 0x3c, 0x3b, 0x3a, 0x39,
+ 0x39, 0x3a, 0x3a, 0x3a, 0x3a, 0x3c, 0x3e, 0x43,
+ 0x47, 0x4a, 0x4d, 0x51, 0x51, 0x54, 0x56, 0x56,
+ 0x57, 0x56, 0x53, 0x4f, 0x4b, 0x47, 0x43, 0x41,
+ 0x3e, 0x3c, 0x3a, 0x37, 0x36, 0x33, 0x32, 0x34,
+ 0x34, 0x34, 0x34, 0x35, 0x36, 0x39, 0x3d, 0x3d,
+ 0x3f, 0x40, 0x40, 0x40, 0x40, 0x3e, 0x40, 0x40,
+ 0x42, 0x44, 0x47, 0x48, 0x4b, 0x4e, 0x56, 0x5c,
+ 0x62, 0x68, 0x6f, 0x73, 0x76, 0x79, 0x7a, 0x7c,
+ 0x7e, 0x7c, 0x78, 0x72, 0x6e, 0x69, 0x65, 0x60,
+ 0x5b, 0x56, 0x52, 0x4d, 0x4a, 0x48, 0x47, 0x46,
+ 0x44, 0x43, 0x42, 0x41, 0x41, 0x41, 0x40, 0x40,
+ 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3b, 0x38, 0x37,
+ 0x36, 0x35, 0x36, 0x35, 0x36, 0x37, 0x38, 0x3c,
+ 0x3d, 0x3f, 0x42, 0x44, 0x46, 0x48, 0x4b, 0x4c,
+ 0x4e, 0x4e, 0x4d, 0x4c, 0x4a, 0x48, 0x49, 0x49,
+ 0x4b, 0x4d, 0x4e, },
+ { 0x23, 0x2d, 0x29, 0x29, 0x28, 0x28, 0x29, 0x29,
+ 0x28, 0x2b, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a,
+ 0x3d, 0x41, 0x44, 0x47, 0x4a, 0x4c, 0x4e, 0x4e,
+ 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48,
+ 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25,
+ 0x25, 0x24, 0x24, 0x24, 0x26, 0x28, 0x28, 0x2a,
+ 0x2b, 0x2e, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+ 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3c, 0x3b, 0x38,
+ 0x37, 0x35, 0x33, 0x30, 0x2e, 0x2b, 0x27, 0x25,
+ 0x24, 0x21, 0x20, 0x1d, 0x1b, 0x1a, 0x18, 0x16,
+ 0x15, 0x14, 0x13, 0x12, 0x10, 0x11, 0x10, 0x0e,
+ 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b,
+ 0x0b, 0x0b, 0x0c, 0x0b, 0x0b, 0x09, 0x0a, 0x0b,
+ 0x0b, 0x0a, 0x0a, 0x0c, 0x0c, 0x0c, 0x0d, 0x0e,
+ 0x0e, 0x0f, 0x0f, 0x11, 0x12, 0x15, 0x15, 0x17,
+ 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2a,
+ 0x2d, 0x30, 0x33, 0x34, 0x35, 0x35, 0x37, 0x37,
+ 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x36, 0x37,
+ 0x35, 0x36, 0x35, 0x35, 0x36, 0x37, 0x3a, 0x3e,
+ 0x40, 0x43, 0x48, 0x49, 0x4b, 0x4c, 0x4d, 0x4e,
+ 0x4f, 0x4f, 0x4c, 0x48, 0x45, 0x41, 0x3e, 0x3b,
+ 0x3a, 0x37, 0x36, 0x33, 0x32, 0x31, 0x30, 0x31,
+ 0x32, 0x31, 0x31, 0x31, 0x31, 0x34, 0x37, 0x38,
+ 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, 0x3b, 0x3d, 0x3e,
+ 0x3f, 0x40, 0x43, 0x44, 0x47, 0x4b, 0x4f, 0x56,
+ 0x5a, 0x60, 0x66, 0x69, 0x6a, 0x6e, 0x71, 0x72,
+ 0x73, 0x72, 0x6d, 0x69, 0x66, 0x60, 0x5c, 0x59,
+ 0x54, 0x50, 0x4d, 0x48, 0x46, 0x44, 0x44, 0x43,
+ 0x42, 0x41, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d,
+ 0x3d, 0x3d, 0x3c, 0x3a, 0x39, 0x38, 0x35, 0x35,
+ 0x34, 0x34, 0x35, 0x34, 0x35, 0x36, 0x39, 0x3c,
+ 0x3d, 0x3e, 0x41, 0x43, 0x44, 0x46, 0x48, 0x49,
+ 0x4a, 0x49, 0x48, 0x47, 0x45, 0x43, 0x43, 0x44,
+ 0x45, 0x47, 0x48, },
+ { 0x23, 0x2d, 0x2a, 0x2a, 0x29, 0x29, 0x2a, 0x2a,
+ 0x29, 0x2c, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a,
+ 0x3d, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+ 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48,
+ 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25,
+ 0x25, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,
+ 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+ 0x3d, 0x3e, 0x3e, 0x3d, 0x3e, 0x3c, 0x3c, 0x3a,
+ 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26,
+ 0x24, 0x21, 0x20, 0x1e, 0x1c, 0x1b, 0x18, 0x17,
+ 0x16, 0x14, 0x13, 0x12, 0x10, 0x10, 0x0f, 0x0e,
+ 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c,
+ 0x0b, 0x0b, 0x0c, 0x0c, 0x0c, 0x0b, 0x0b, 0x0c,
+ 0x0c, 0x0b, 0x0b, 0x0c, 0x0d, 0x0c, 0x0e, 0x0e,
+ 0x0e, 0x0f, 0x11, 0x11, 0x13, 0x14, 0x16, 0x18,
+ 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2b,
+ 0x2d, 0x31, 0x33, 0x34, 0x36, 0x37, 0x38, 0x38,
+ 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x37, 0x37,
+ 0x35, 0x36, 0x35, 0x36, 0x35, 0x38, 0x3a, 0x3e,
+ 0x40, 0x41, 0x45, 0x47, 0x49, 0x4a, 0x4c, 0x4d,
+ 0x4e, 0x4d, 0x4a, 0x47, 0x44, 0x40, 0x3d, 0x3b,
+ 0x39, 0x37, 0x34, 0x34, 0x32, 0x31, 0x31, 0x33,
+ 0x32, 0x31, 0x32, 0x33, 0x32, 0x36, 0x38, 0x39,
+ 0x3b, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3e, 0x3e,
+ 0x41, 0x42, 0x43, 0x45, 0x48, 0x4c, 0x50, 0x56,
+ 0x5b, 0x5f, 0x62, 0x67, 0x69, 0x6c, 0x6e, 0x6e,
+ 0x70, 0x6f, 0x6b, 0x67, 0x63, 0x5e, 0x5b, 0x58,
+ 0x54, 0x51, 0x4e, 0x4a, 0x48, 0x46, 0x46, 0x46,
+ 0x45, 0x46, 0x44, 0x43, 0x44, 0x43, 0x42, 0x42,
+ 0x41, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, 0x3a, 0x39,
+ 0x39, 0x39, 0x38, 0x37, 0x37, 0x3a, 0x3e, 0x40,
+ 0x42, 0x43, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4c,
+ 0x4c, 0x4b, 0x4a, 0x48, 0x46, 0x44, 0x43, 0x45,
+ 0x45, 0x46, 0x47, },
+ { 0x21, 0x2b, 0x28, 0x28, 0x28, 0x28, 0x29, 0x29,
+ 0x28, 0x2a, 0x2d, 0x30, 0x32, 0x34, 0x37, 0x3a,
+ 0x3c, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+ 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48,
+ 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2b, 0x2a, 0x28, 0x27, 0x26, 0x25,
+ 0x24, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+ 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3d, 0x3c, 0x3a,
+ 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26,
+ 0x25, 0x21, 0x20, 0x1e, 0x1c, 0x19, 0x19, 0x18,
+ 0x17, 0x15, 0x15, 0x12, 0x11, 0x11, 0x11, 0x0f,
+ 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c,
+ 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0e, 0x0e, 0x0f, 0x0f,
+ 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x16, 0x18,
+ 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x28, 0x29, 0x2d,
+ 0x2f, 0x32, 0x34, 0x35, 0x36, 0x37, 0x38, 0x38,
+ 0x39, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36,
+ 0x35, 0x35, 0x37, 0x35, 0x36, 0x37, 0x3a, 0x3d,
+ 0x3e, 0x41, 0x43, 0x46, 0x46, 0x47, 0x48, 0x49,
+ 0x4a, 0x49, 0x47, 0x45, 0x42, 0x3f, 0x3d, 0x3b,
+ 0x3a, 0x38, 0x36, 0x34, 0x32, 0x32, 0x32, 0x32,
+ 0x32, 0x31, 0x33, 0x32, 0x34, 0x37, 0x38, 0x38,
+ 0x3a, 0x3b, 0x3d, 0x3d, 0x3d, 0x3e, 0x3f, 0x41,
+ 0x42, 0x44, 0x44, 0x46, 0x49, 0x4d, 0x50, 0x54,
+ 0x58, 0x5c, 0x61, 0x63, 0x65, 0x69, 0x6a, 0x6c,
+ 0x6d, 0x6c, 0x68, 0x64, 0x61, 0x5c, 0x59, 0x57,
+ 0x53, 0x51, 0x4f, 0x4c, 0x4a, 0x48, 0x48, 0x49,
+ 0x49, 0x48, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46,
+ 0x45, 0x44, 0x42, 0x41, 0x3f, 0x3e, 0x3c, 0x3c,
+ 0x3c, 0x3d, 0x3c, 0x3c, 0x3c, 0x3e, 0x41, 0x43,
+ 0x46, 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4e,
+ 0x4e, 0x4d, 0x4b, 0x49, 0x47, 0x44, 0x44, 0x45,
+ 0x45, 0x45, 0x46, },
+ { 0x22, 0x2b, 0x27, 0x27, 0x27, 0x27, 0x28, 0x28,
+ 0x28, 0x2a, 0x2c, 0x2f, 0x30, 0x34, 0x37, 0x3b,
+ 0x3d, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+ 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x47,
+ 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2b, 0x2a, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3b,
+ 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, 0x3d, 0x3c, 0x3a,
+ 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26,
+ 0x25, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x19, 0x18,
+ 0x16, 0x15, 0x14, 0x12, 0x10, 0x11, 0x11, 0x0f,
+ 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0c, 0x0d, 0x0c,
+ 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0f, 0x0f,
+ 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x15, 0x18,
+ 0x19, 0x1d, 0x1f, 0x21, 0x24, 0x27, 0x2a, 0x2c,
+ 0x30, 0x33, 0x35, 0x36, 0x37, 0x38, 0x39, 0x39,
+ 0x3a, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x39, 0x3a,
+ 0x3d, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x46, 0x46,
+ 0x47, 0x46, 0x44, 0x42, 0x40, 0x3d, 0x3a, 0x39,
+ 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x32, 0x32,
+ 0x32, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+ 0x39, 0x3c, 0x3c, 0x3e, 0x3e, 0x3e, 0x41, 0x43,
+ 0x44, 0x45, 0x46, 0x48, 0x49, 0x4c, 0x51, 0x54,
+ 0x56, 0x5a, 0x5f, 0x61, 0x63, 0x65, 0x67, 0x69,
+ 0x6a, 0x69, 0x67, 0x61, 0x5f, 0x5b, 0x58, 0x56,
+ 0x54, 0x51, 0x50, 0x4e, 0x4c, 0x4a, 0x4b, 0x4c,
+ 0x4c, 0x4b, 0x4b, 0x4b, 0x4b, 0x49, 0x4a, 0x49,
+ 0x49, 0x48, 0x46, 0x44, 0x42, 0x41, 0x40, 0x3f,
+ 0x3f, 0x40, 0x40, 0x40, 0x40, 0x42, 0x46, 0x49,
+ 0x4b, 0x4c, 0x4f, 0x4f, 0x50, 0x52, 0x51, 0x51,
+ 0x50, 0x4f, 0x4c, 0x4a, 0x48, 0x46, 0x45, 0x44,
+ 0x44, 0x45, 0x46, },
+ { 0x21, 0x2a, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27,
+ 0x27, 0x29, 0x2d, 0x2f, 0x31, 0x34, 0x37, 0x3b,
+ 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+ 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48,
+ 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x2f,
+ 0x2f, 0x2d, 0x2a, 0x2a, 0x27, 0x26, 0x25, 0x24,
+ 0x22, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2f, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3c,
+ 0x3d, 0x3e, 0x3f, 0x40, 0x3f, 0x3d, 0x3d, 0x3a,
+ 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26,
+ 0x25, 0x22, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x18,
+ 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f,
+ 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0c,
+ 0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f,
+ 0x0f, 0x10, 0x13, 0x13, 0x14, 0x15, 0x17, 0x19,
+ 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x27, 0x2a, 0x2e,
+ 0x31, 0x33, 0x35, 0x38, 0x39, 0x3a, 0x3b, 0x3b,
+ 0x3c, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x38, 0x37,
+ 0x36, 0x36, 0x37, 0x36, 0x37, 0x38, 0x38, 0x3a,
+ 0x3b, 0x3e, 0x40, 0x40, 0x41, 0x42, 0x43, 0x42,
+ 0x43, 0x42, 0x40, 0x40, 0x3f, 0x3c, 0x3b, 0x39,
+ 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x33,
+ 0x32, 0x32, 0x34, 0x35, 0x35, 0x36, 0x39, 0x39,
+ 0x3a, 0x3c, 0x3c, 0x3f, 0x40, 0x41, 0x43, 0x45,
+ 0x45, 0x47, 0x48, 0x4a, 0x4b, 0x4d, 0x50, 0x53,
+ 0x56, 0x59, 0x5c, 0x5f, 0x60, 0x65, 0x64, 0x66,
+ 0x68, 0x66, 0x64, 0x61, 0x5e, 0x5a, 0x59, 0x56,
+ 0x54, 0x52, 0x51, 0x50, 0x4e, 0x4c, 0x4d, 0x4f,
+ 0x4f, 0x4f, 0x50, 0x50, 0x4f, 0x4f, 0x4e, 0x4d,
+ 0x4c, 0x4b, 0x49, 0x47, 0x45, 0x44, 0x43, 0x43,
+ 0x42, 0x43, 0x44, 0x44, 0x46, 0x47, 0x49, 0x4d,
+ 0x4f, 0x51, 0x53, 0x54, 0x53, 0x54, 0x54, 0x53,
+ 0x53, 0x51, 0x4e, 0x4b, 0x4a, 0x47, 0x45, 0x44,
+ 0x44, 0x45, 0x46, },
+ { 0x20, 0x28, 0x26, 0x26, 0x25, 0x24, 0x27, 0x27,
+ 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x3b,
+ 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+ 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4a, 0x49,
+ 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+ 0x3d, 0x3e, 0x3f, 0x40, 0x3e, 0x3d, 0x3d, 0x3a,
+ 0x38, 0x36, 0x34, 0x31, 0x2f, 0x2c, 0x29, 0x27,
+ 0x25, 0x21, 0x21, 0x1f, 0x1c, 0x1d, 0x19, 0x18,
+ 0x16, 0x15, 0x15, 0x13, 0x12, 0x11, 0x11, 0x0f,
+ 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x10,
+ 0x10, 0x10, 0x12, 0x13, 0x15, 0x16, 0x18, 0x1a,
+ 0x1c, 0x1d, 0x20, 0x22, 0x25, 0x27, 0x2a, 0x2e,
+ 0x30, 0x34, 0x38, 0x39, 0x3a, 0x3b, 0x3b, 0x3b,
+ 0x3c, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37,
+ 0x36, 0x36, 0x38, 0x37, 0x37, 0x37, 0x38, 0x3a,
+ 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x40, 0x40,
+ 0x42, 0x40, 0x3f, 0x3e, 0x3d, 0x3b, 0x3a, 0x39,
+ 0x37, 0x36, 0x36, 0x35, 0x34, 0x34, 0x33, 0x33,
+ 0x33, 0x34, 0x35, 0x35, 0x35, 0x36, 0x38, 0x39,
+ 0x3a, 0x3b, 0x3d, 0x3f, 0x42, 0x43, 0x45, 0x45,
+ 0x46, 0x48, 0x49, 0x4b, 0x4b, 0x4d, 0x50, 0x53,
+ 0x56, 0x57, 0x5a, 0x5c, 0x5e, 0x61, 0x63, 0x65,
+ 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x59, 0x58, 0x56,
+ 0x55, 0x54, 0x52, 0x51, 0x50, 0x51, 0x51, 0x52,
+ 0x52, 0x52, 0x52, 0x52, 0x51, 0x51, 0x51, 0x50,
+ 0x4f, 0x4e, 0x4c, 0x4a, 0x47, 0x46, 0x45, 0x45,
+ 0x45, 0x46, 0x46, 0x46, 0x4a, 0x4c, 0x4d, 0x52,
+ 0x54, 0x56, 0x58, 0x58, 0x56, 0x57, 0x57, 0x56,
+ 0x55, 0x53, 0x50, 0x4d, 0x49, 0x45, 0x44, 0x44,
+ 0x43, 0x44, 0x45, },
+ { 0x1f, 0x27, 0x24, 0x23, 0x25, 0x24, 0x25, 0x26,
+ 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+ 0x3d, 0x41, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4e,
+ 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x49,
+ 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30,
+ 0x2f, 0x2d, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x25, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+ 0x3e, 0x3f, 0x3f, 0x40, 0x3e, 0x3d, 0x3c, 0x3a,
+ 0x38, 0x36, 0x34, 0x31, 0x30, 0x2c, 0x29, 0x28,
+ 0x25, 0x23, 0x22, 0x1f, 0x1c, 0x1c, 0x18, 0x18,
+ 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f,
+ 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+ 0x0c, 0x0c, 0x0b, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0d, 0x0e, 0x0e, 0x0f, 0x0d, 0x0f, 0x10, 0x10,
+ 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x19, 0x1a,
+ 0x1c, 0x1f, 0x20, 0x23, 0x26, 0x28, 0x2a, 0x2e,
+ 0x31, 0x35, 0x38, 0x39, 0x3a, 0x3c, 0x3d, 0x3d,
+ 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, 0x3a, 0x39, 0x39,
+ 0x38, 0x37, 0x38, 0x38, 0x37, 0x38, 0x39, 0x3a,
+ 0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x3f, 0x40, 0x3f,
+ 0x41, 0x40, 0x3e, 0x3e, 0x3d, 0x3b, 0x3b, 0x39,
+ 0x37, 0x37, 0x35, 0x36, 0x34, 0x34, 0x34, 0x35,
+ 0x35, 0x34, 0x34, 0x35, 0x35, 0x37, 0x38, 0x39,
+ 0x3a, 0x3c, 0x3f, 0x3f, 0x43, 0x43, 0x45, 0x47,
+ 0x48, 0x48, 0x4a, 0x4b, 0x4e, 0x4d, 0x51, 0x53,
+ 0x56, 0x58, 0x59, 0x5b, 0x5d, 0x60, 0x62, 0x63,
+ 0x64, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x57, 0x56,
+ 0x55, 0x54, 0x53, 0x52, 0x51, 0x51, 0x52, 0x52,
+ 0x54, 0x54, 0x55, 0x55, 0x55, 0x54, 0x54, 0x53,
+ 0x52, 0x50, 0x4e, 0x4d, 0x4b, 0x4a, 0x48, 0x48,
+ 0x48, 0x48, 0x4a, 0x4b, 0x4d, 0x4f, 0x52, 0x55,
+ 0x58, 0x5a, 0x5b, 0x5b, 0x5b, 0x5b, 0x5a, 0x59,
+ 0x58, 0x55, 0x51, 0x4e, 0x4a, 0x46, 0x45, 0x44,
+ 0x44, 0x44, 0x44, },
+ { 0x1e, 0x26, 0x23, 0x23, 0x25, 0x24, 0x25, 0x26,
+ 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+ 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f,
+ 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x48,
+ 0x46, 0x44, 0x3f, 0x3b, 0x39, 0x36, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3d,
+ 0x3e, 0x3f, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3b,
+ 0x38, 0x37, 0x34, 0x32, 0x30, 0x2c, 0x2a, 0x27,
+ 0x26, 0x23, 0x22, 0x20, 0x1d, 0x1b, 0x1a, 0x19,
+ 0x17, 0x15, 0x15, 0x13, 0x12, 0x12, 0x11, 0x0f,
+ 0x11, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0e, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x11,
+ 0x11, 0x13, 0x16, 0x15, 0x15, 0x18, 0x1a, 0x1b,
+ 0x1d, 0x20, 0x22, 0x24, 0x27, 0x29, 0x2c, 0x30,
+ 0x33, 0x37, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3e,
+ 0x40, 0x40, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3a,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3b, 0x3d,
+ 0x3d, 0x3f, 0x40, 0x40, 0x3f, 0x41, 0x41, 0x41,
+ 0x41, 0x41, 0x40, 0x40, 0x3f, 0x3e, 0x3c, 0x3b,
+ 0x3a, 0x39, 0x37, 0x36, 0x36, 0x35, 0x35, 0x36,
+ 0x36, 0x35, 0x35, 0x36, 0x36, 0x38, 0x39, 0x39,
+ 0x3b, 0x3c, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x47,
+ 0x48, 0x48, 0x4b, 0x4c, 0x4d, 0x4f, 0x51, 0x53,
+ 0x56, 0x56, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x62,
+ 0x63, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x59, 0x57,
+ 0x56, 0x54, 0x54, 0x53, 0x52, 0x53, 0x53, 0x55,
+ 0x56, 0x56, 0x57, 0x57, 0x57, 0x57, 0x56, 0x56,
+ 0x55, 0x53, 0x51, 0x4f, 0x4d, 0x4b, 0x49, 0x4b,
+ 0x4b, 0x4c, 0x4d, 0x4e, 0x51, 0x53, 0x55, 0x58,
+ 0x5b, 0x5c, 0x60, 0x60, 0x5f, 0x5e, 0x5d, 0x5c,
+ 0x5a, 0x57, 0x53, 0x4f, 0x4b, 0x46, 0x45, 0x44,
+ 0x44, 0x44, 0x44, },
+ { 0x1d, 0x25, 0x22, 0x22, 0x23, 0x23, 0x24, 0x25,
+ 0x25, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+ 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f,
+ 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x47,
+ 0x45, 0x43, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2b, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3c, 0x3d,
+ 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3e, 0x3d, 0x3b,
+ 0x39, 0x36, 0x34, 0x32, 0x30, 0x2d, 0x2a, 0x26,
+ 0x26, 0x24, 0x22, 0x1f, 0x1d, 0x1c, 0x1a, 0x19,
+ 0x18, 0x16, 0x15, 0x14, 0x12, 0x12, 0x12, 0x10,
+ 0x10, 0x0f, 0x0e, 0x10, 0x0e, 0x0e, 0x0d, 0x0c,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e,
+ 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x11, 0x12,
+ 0x13, 0x14, 0x16, 0x16, 0x18, 0x1a, 0x1b, 0x1c,
+ 0x1e, 0x21, 0x23, 0x25, 0x28, 0x2a, 0x2e, 0x32,
+ 0x34, 0x38, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42,
+ 0x43, 0x43, 0x43, 0x42, 0x40, 0x3e, 0x3e, 0x3c,
+ 0x3b, 0x3b, 0x3c, 0x3a, 0x3b, 0x3b, 0x3e, 0x3e,
+ 0x40, 0x3f, 0x41, 0x41, 0x41, 0x42, 0x42, 0x43,
+ 0x42, 0x41, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3c,
+ 0x3b, 0x3a, 0x39, 0x37, 0x36, 0x35, 0x36, 0x37,
+ 0x35, 0x36, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+ 0x3b, 0x3d, 0x3e, 0x40, 0x41, 0x41, 0x44, 0x46,
+ 0x48, 0x48, 0x4a, 0x4c, 0x4d, 0x4f, 0x51, 0x53,
+ 0x55, 0x57, 0x59, 0x5a, 0x5b, 0x5e, 0x5f, 0x61,
+ 0x62, 0x61, 0x60, 0x5e, 0x5c, 0x5a, 0x59, 0x58,
+ 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, 0x54, 0x55,
+ 0x57, 0x57, 0x58, 0x59, 0x5a, 0x58, 0x59, 0x58,
+ 0x57, 0x55, 0x53, 0x52, 0x4f, 0x4e, 0x4d, 0x4d,
+ 0x4d, 0x4f, 0x51, 0x50, 0x54, 0x56, 0x59, 0x5c,
+ 0x5f, 0x61, 0x64, 0x64, 0x63, 0x61, 0x5e, 0x5e,
+ 0x5c, 0x59, 0x54, 0x50, 0x4c, 0x46, 0x45, 0x44,
+ 0x44, 0x44, 0x44, },
+ { 0x1c, 0x24, 0x21, 0x21, 0x21, 0x22, 0x23, 0x23,
+ 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b,
+ 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4c, 0x50, 0x4f,
+ 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4a, 0x49,
+ 0x45, 0x42, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2b, 0x2f, 0x32, 0x34, 0x38, 0x39, 0x3c, 0x3d,
+ 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3e, 0x3c, 0x3a,
+ 0x39, 0x37, 0x35, 0x33, 0x30, 0x2d, 0x2b, 0x28,
+ 0x26, 0x23, 0x23, 0x20, 0x1e, 0x1b, 0x19, 0x19,
+ 0x17, 0x16, 0x15, 0x14, 0x12, 0x12, 0x11, 0x10,
+ 0x0f, 0x0e, 0x0e, 0x10, 0x0e, 0x0d, 0x0c, 0x0c,
+ 0x0c, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e,
+ 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x12, 0x14,
+ 0x14, 0x14, 0x16, 0x18, 0x19, 0x1b, 0x1c, 0x1e,
+ 0x20, 0x23, 0x26, 0x27, 0x29, 0x2c, 0x2f, 0x33,
+ 0x36, 0x38, 0x3b, 0x3e, 0x3e, 0x42, 0x43, 0x46,
+ 0x46, 0x46, 0x46, 0x44, 0x42, 0x41, 0x3f, 0x3e,
+ 0x3d, 0x3d, 0x3e, 0x3d, 0x3d, 0x3e, 0x3e, 0x40,
+ 0x40, 0x40, 0x43, 0x43, 0x42, 0x43, 0x45, 0x43,
+ 0x43, 0x43, 0x42, 0x42, 0x41, 0x40, 0x40, 0x3e,
+ 0x3c, 0x3a, 0x3a, 0x38, 0x36, 0x36, 0x36, 0x36,
+ 0x37, 0x37, 0x36, 0x38, 0x38, 0x39, 0x3b, 0x3b,
+ 0x3e, 0x3e, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x46,
+ 0x46, 0x49, 0x4c, 0x4c, 0x4d, 0x4f, 0x51, 0x54,
+ 0x56, 0x57, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x60,
+ 0x61, 0x61, 0x60, 0x5f, 0x5c, 0x5a, 0x59, 0x58,
+ 0x57, 0x57, 0x55, 0x54, 0x53, 0x55, 0x55, 0x58,
+ 0x58, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, 0x5b, 0x5b,
+ 0x5a, 0x59, 0x56, 0x54, 0x53, 0x4e, 0x4e, 0x50,
+ 0x50, 0x51, 0x52, 0x52, 0x57, 0x59, 0x5d, 0x60,
+ 0x63, 0x63, 0x66, 0x66, 0x66, 0x64, 0x63, 0x61,
+ 0x60, 0x5b, 0x55, 0x51, 0x4d, 0x48, 0x45, 0x44,
+ 0x43, 0x43, 0x43, },
+ { 0x1b, 0x23, 0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
+ 0x26, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b,
+ 0x3d, 0x42, 0x46, 0x49, 0x4a, 0x4c, 0x4f, 0x4f,
+ 0x50, 0x50, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x49,
+ 0x45, 0x42, 0x3e, 0x3c, 0x38, 0x35, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3a, 0x3c, 0x3d,
+ 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3f, 0x3d, 0x3b,
+ 0x3a, 0x38, 0x36, 0x33, 0x30, 0x2d, 0x2b, 0x29,
+ 0x27, 0x24, 0x24, 0x21, 0x1e, 0x1c, 0x1b, 0x1a,
+ 0x18, 0x17, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f,
+ 0x10, 0x0f, 0x0e, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x0e, 0x0f,
+ 0x10, 0x11, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15,
+ 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x1d, 0x1e, 0x20,
+ 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x31, 0x35,
+ 0x37, 0x39, 0x3c, 0x3f, 0x40, 0x43, 0x46, 0x47,
+ 0x4a, 0x49, 0x48, 0x46, 0x45, 0x43, 0x42, 0x41,
+ 0x3f, 0x40, 0x3f, 0x3f, 0x40, 0x3f, 0x41, 0x43,
+ 0x43, 0x43, 0x44, 0x45, 0x45, 0x45, 0x45, 0x45,
+ 0x45, 0x45, 0x44, 0x43, 0x43, 0x42, 0x42, 0x40,
+ 0x3e, 0x3d, 0x3c, 0x39, 0x38, 0x38, 0x38, 0x38,
+ 0x38, 0x36, 0x38, 0x39, 0x39, 0x3a, 0x3c, 0x3d,
+ 0x3e, 0x3e, 0x3f, 0x41, 0x42, 0x42, 0x43, 0x45,
+ 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x50, 0x53, 0x54,
+ 0x57, 0x58, 0x5a, 0x5c, 0x5b, 0x5e, 0x60, 0x61,
+ 0x60, 0x60, 0x5f, 0x5f, 0x5d, 0x5b, 0x5b, 0x59,
+ 0x58, 0x57, 0x56, 0x55, 0x55, 0x55, 0x57, 0x59,
+ 0x5b, 0x5b, 0x5d, 0x5c, 0x5c, 0x5e, 0x5e, 0x5e,
+ 0x5d, 0x5b, 0x59, 0x56, 0x54, 0x51, 0x51, 0x51,
+ 0x52, 0x55, 0x56, 0x56, 0x5a, 0x5d, 0x5f, 0x63,
+ 0x66, 0x68, 0x6b, 0x6b, 0x68, 0x67, 0x66, 0x64,
+ 0x61, 0x5d, 0x57, 0x52, 0x4f, 0x49, 0x46, 0x45,
+ 0x43, 0x43, 0x43, },
+ { 0x1a, 0x22, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
+ 0x26, 0x27, 0x2a, 0x2d, 0x31, 0x33, 0x37, 0x3b,
+ 0x3d, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4f,
+ 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x48,
+ 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x33, 0x30,
+ 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+ 0x2d, 0x2f, 0x32, 0x35, 0x39, 0x3a, 0x3c, 0x3d,
+ 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c,
+ 0x3a, 0x38, 0x36, 0x33, 0x31, 0x2d, 0x2c, 0x29,
+ 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a,
+ 0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10,
+ 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e,
+ 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10,
+ 0x11, 0x12, 0x12, 0x13, 0x15, 0x15, 0x16, 0x16,
+ 0x17, 0x18, 0x1a, 0x1b, 0x1c, 0x1e, 0x1f, 0x21,
+ 0x22, 0x25, 0x27, 0x2a, 0x2c, 0x2e, 0x33, 0x36,
+ 0x39, 0x3a, 0x3d, 0x40, 0x41, 0x45, 0x47, 0x4a,
+ 0x4c, 0x4d, 0x4c, 0x4a, 0x48, 0x45, 0x44, 0x41,
+ 0x42, 0x42, 0x42, 0x42, 0x42, 0x43, 0x43, 0x44,
+ 0x45, 0x47, 0x47, 0x48, 0x47, 0x48, 0x47, 0x47,
+ 0x48, 0x48, 0x46, 0x46, 0x46, 0x43, 0x43, 0x41,
+ 0x3f, 0x3e, 0x3b, 0x39, 0x38, 0x37, 0x37, 0x37,
+ 0x38, 0x38, 0x37, 0x39, 0x39, 0x3a, 0x3c, 0x3e,
+ 0x3e, 0x3f, 0x3f, 0x3f, 0x42, 0x43, 0x43, 0x45,
+ 0x47, 0x48, 0x4b, 0x4c, 0x4e, 0x50, 0x51, 0x54,
+ 0x56, 0x58, 0x5a, 0x5c, 0x5c, 0x5f, 0x5f, 0x5f,
+ 0x61, 0x60, 0x5f, 0x5f, 0x5e, 0x5b, 0x5c, 0x5b,
+ 0x59, 0x59, 0x57, 0x56, 0x55, 0x56, 0x57, 0x59,
+ 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5e, 0x5d,
+ 0x5e, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x51, 0x52,
+ 0x53, 0x55, 0x57, 0x58, 0x5c, 0x5e, 0x61, 0x65,
+ 0x69, 0x6b, 0x6c, 0x6b, 0x6a, 0x69, 0x67, 0x64,
+ 0x61, 0x5d, 0x59, 0x53, 0x4d, 0x48, 0x46, 0x45,
+ 0x44, 0x44, 0x43, },
+ { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24,
+ 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b,
+ 0x3e, 0x41, 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x4e,
+ 0x50, 0x51, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48,
+ 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x30,
+ 0x2f, 0x2d, 0x29, 0x27, 0x27, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x26, 0x27, 0x29, 0x2a,
+ 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3b, 0x3c, 0x3e,
+ 0x3f, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c,
+ 0x3a, 0x39, 0x36, 0x34, 0x31, 0x2d, 0x2c, 0x29,
+ 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a,
+ 0x19, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10,
+ 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e,
+ 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10,
+ 0x11, 0x13, 0x14, 0x14, 0x15, 0x16, 0x17, 0x19,
+ 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x22, 0x24,
+ 0x25, 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x35, 0x38,
+ 0x3a, 0x3d, 0x41, 0x42, 0x45, 0x48, 0x4c, 0x4e,
+ 0x4f, 0x4f, 0x4f, 0x4d, 0x4b, 0x49, 0x47, 0x47,
+ 0x46, 0x45, 0x45, 0x45, 0x44, 0x44, 0x46, 0x47,
+ 0x48, 0x49, 0x4b, 0x4b, 0x4a, 0x4b, 0x4b, 0x4a,
+ 0x4b, 0x4a, 0x49, 0x49, 0x48, 0x46, 0x46, 0x44,
+ 0x42, 0x41, 0x3d, 0x3b, 0x3a, 0x38, 0x38, 0x38,
+ 0x37, 0x37, 0x39, 0x38, 0x3a, 0x3a, 0x3c, 0x3c,
+ 0x3e, 0x40, 0x40, 0x41, 0x43, 0x43, 0x45, 0x46,
+ 0x48, 0x49, 0x4b, 0x4e, 0x4f, 0x50, 0x53, 0x55,
+ 0x57, 0x59, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+ 0x60, 0x60, 0x5f, 0x5f, 0x5e, 0x5c, 0x5b, 0x5a,
+ 0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x57, 0x58,
+ 0x59, 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5d,
+ 0x5c, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x52, 0x53,
+ 0x54, 0x57, 0x58, 0x58, 0x5b, 0x5e, 0x62, 0x65,
+ 0x69, 0x6b, 0x6d, 0x6c, 0x6a, 0x69, 0x67, 0x64,
+ 0x62, 0x5e, 0x59, 0x54, 0x4d, 0x48, 0x47, 0x46,
+ 0x45, 0x45, 0x44, },
+ { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24,
+ 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3b,
+ 0x3e, 0x42, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x4f,
+ 0x50, 0x51, 0x51, 0x50, 0x50, 0x4c, 0x4a, 0x47,
+ 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x31,
+ 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x29, 0x2b,
+ 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x40, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d,
+ 0x3b, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a,
+ 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1d, 0x1c, 0x1b,
+ 0x19, 0x17, 0x17, 0x16, 0x15, 0x14, 0x12, 0x11,
+ 0x11, 0x11, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0x0f, 0x0f, 0x10, 0x11, 0x10, 0x11, 0x11, 0x12,
+ 0x11, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1b,
+ 0x1c, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x23, 0x25,
+ 0x27, 0x2a, 0x2c, 0x2f, 0x31, 0x35, 0x38, 0x3b,
+ 0x3d, 0x40, 0x44, 0x47, 0x49, 0x4c, 0x4f, 0x51,
+ 0x53, 0x53, 0x53, 0x51, 0x50, 0x4e, 0x4c, 0x4b,
+ 0x4a, 0x49, 0x49, 0x49, 0x49, 0x4a, 0x4a, 0x4d,
+ 0x4e, 0x4e, 0x4f, 0x50, 0x4f, 0x50, 0x51, 0x50,
+ 0x50, 0x4e, 0x4d, 0x4c, 0x4b, 0x48, 0x48, 0x47,
+ 0x44, 0x42, 0x3f, 0x3d, 0x3b, 0x3a, 0x39, 0x39,
+ 0x39, 0x38, 0x39, 0x3b, 0x3a, 0x3c, 0x3e, 0x3d,
+ 0x40, 0x40, 0x40, 0x42, 0x42, 0x42, 0x45, 0x46,
+ 0x47, 0x49, 0x4c, 0x4e, 0x50, 0x50, 0x53, 0x56,
+ 0x58, 0x59, 0x5d, 0x5d, 0x5e, 0x60, 0x61, 0x61,
+ 0x62, 0x61, 0x60, 0x60, 0x5e, 0x5d, 0x5d, 0x5b,
+ 0x57, 0x58, 0x56, 0x55, 0x55, 0x56, 0x56, 0x59,
+ 0x59, 0x58, 0x5a, 0x5a, 0x5a, 0x5c, 0x5c, 0x5c,
+ 0x5b, 0x5b, 0x58, 0x57, 0x54, 0x53, 0x52, 0x53,
+ 0x54, 0x57, 0x58, 0x59, 0x5c, 0x5f, 0x63, 0x67,
+ 0x6b, 0x6d, 0x6e, 0x6e, 0x6b, 0x6a, 0x68, 0x64,
+ 0x62, 0x5e, 0x58, 0x53, 0x4f, 0x49, 0x47, 0x46,
+ 0x45, 0x45, 0x44, },
+ { 0x19, 0x20, 0x1e, 0x1e, 0x1f, 0x20, 0x22, 0x23,
+ 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+ 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4e,
+ 0x50, 0x51, 0x51, 0x4f, 0x4f, 0x4d, 0x49, 0x47,
+ 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x36, 0x32, 0x31,
+ 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x25, 0x25, 0x26, 0x28, 0x29, 0x2b,
+ 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d,
+ 0x3c, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a,
+ 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1e, 0x1d, 0x1b,
+ 0x1a, 0x17, 0x17, 0x17, 0x14, 0x14, 0x12, 0x11,
+ 0x11, 0x12, 0x11, 0x11, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x13, 0x14,
+ 0x14, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1c, 0x1e,
+ 0x1e, 0x1f, 0x22, 0x23, 0x23, 0x24, 0x25, 0x27,
+ 0x2a, 0x2d, 0x2f, 0x31, 0x35, 0x38, 0x3a, 0x3e,
+ 0x41, 0x44, 0x48, 0x4b, 0x4d, 0x51, 0x53, 0x55,
+ 0x57, 0x57, 0x56, 0x55, 0x54, 0x52, 0x52, 0x50,
+ 0x4e, 0x50, 0x4e, 0x4d, 0x4d, 0x4d, 0x4f, 0x51,
+ 0x51, 0x52, 0x54, 0x55, 0x55, 0x55, 0x57, 0x55,
+ 0x54, 0x53, 0x52, 0x4e, 0x4d, 0x4b, 0x4a, 0x49,
+ 0x46, 0x44, 0x41, 0x3f, 0x3d, 0x3b, 0x3a, 0x3a,
+ 0x39, 0x39, 0x39, 0x39, 0x3a, 0x3b, 0x3d, 0x3e,
+ 0x3f, 0x40, 0x41, 0x42, 0x44, 0x44, 0x45, 0x47,
+ 0x49, 0x49, 0x4a, 0x4d, 0x50, 0x51, 0x53, 0x57,
+ 0x5a, 0x5b, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x62,
+ 0x63, 0x62, 0x60, 0x60, 0x5e, 0x5c, 0x5c, 0x59,
+ 0x58, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x54,
+ 0x56, 0x56, 0x57, 0x58, 0x58, 0x59, 0x5a, 0x59,
+ 0x58, 0x57, 0x56, 0x55, 0x54, 0x52, 0x53, 0x53,
+ 0x53, 0x56, 0x57, 0x59, 0x5b, 0x5e, 0x62, 0x66,
+ 0x6a, 0x6c, 0x6d, 0x6e, 0x6b, 0x69, 0x67, 0x64,
+ 0x61, 0x5d, 0x58, 0x54, 0x50, 0x4a, 0x47, 0x46,
+ 0x45, 0x45, 0x44, },
+ { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23,
+ 0x25, 0x27, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3b,
+ 0x3d, 0x42, 0x45, 0x49, 0x4a, 0x4d, 0x4e, 0x4e,
+ 0x51, 0x52, 0x50, 0x4f, 0x4f, 0x4c, 0x49, 0x48,
+ 0x45, 0x42, 0x3e, 0x3b, 0x39, 0x36, 0x32, 0x32,
+ 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x25, 0x28, 0x29, 0x2b,
+ 0x2d, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3e, 0x3c,
+ 0x3c, 0x3a, 0x37, 0x33, 0x32, 0x2f, 0x2d, 0x2b,
+ 0x28, 0x26, 0x25, 0x22, 0x20, 0x1e, 0x1d, 0x1b,
+ 0x1a, 0x17, 0x17, 0x16, 0x14, 0x14, 0x12, 0x11,
+ 0x12, 0x11, 0x11, 0x11, 0x11, 0x10, 0x10, 0x10,
+ 0x10, 0x11, 0x12, 0x12, 0x12, 0x13, 0x14, 0x14,
+ 0x16, 0x18, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f,
+ 0x21, 0x22, 0x23, 0x25, 0x26, 0x26, 0x28, 0x2a,
+ 0x2c, 0x2e, 0x32, 0x34, 0x39, 0x39, 0x3d, 0x41,
+ 0x45, 0x47, 0x4c, 0x4e, 0x51, 0x54, 0x56, 0x58,
+ 0x5b, 0x5c, 0x5a, 0x59, 0x58, 0x56, 0x55, 0x53,
+ 0x53, 0x52, 0x52, 0x51, 0x52, 0x52, 0x53, 0x55,
+ 0x57, 0x58, 0x5a, 0x5a, 0x59, 0x5b, 0x59, 0x59,
+ 0x58, 0x57, 0x55, 0x53, 0x51, 0x4e, 0x4c, 0x4a,
+ 0x48, 0x46, 0x43, 0x40, 0x3e, 0x3c, 0x3b, 0x3b,
+ 0x38, 0x39, 0x38, 0x39, 0x3a, 0x3d, 0x3d, 0x3e,
+ 0x3f, 0x40, 0x41, 0x43, 0x44, 0x45, 0x46, 0x48,
+ 0x4a, 0x4b, 0x4d, 0x4e, 0x50, 0x52, 0x54, 0x56,
+ 0x59, 0x5c, 0x5e, 0x5f, 0x60, 0x62, 0x62, 0x63,
+ 0x63, 0x63, 0x61, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b,
+ 0x59, 0x56, 0x56, 0x55, 0x54, 0x53, 0x53, 0x54,
+ 0x55, 0x54, 0x55, 0x55, 0x55, 0x57, 0x58, 0x57,
+ 0x57, 0x56, 0x55, 0x54, 0x54, 0x52, 0x52, 0x53,
+ 0x54, 0x55, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x65,
+ 0x69, 0x6b, 0x6d, 0x6e, 0x6a, 0x69, 0x67, 0x63,
+ 0x61, 0x5d, 0x58, 0x54, 0x4f, 0x4b, 0x48, 0x47,
+ 0x46, 0x45, 0x45, },
+ { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23,
+ 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b,
+ 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4e, 0x4f,
+ 0x51, 0x52, 0x50, 0x50, 0x4f, 0x4c, 0x4a, 0x48,
+ 0x45, 0x42, 0x3f, 0x3b, 0x39, 0x36, 0x32, 0x31,
+ 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b,
+ 0x2d, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f,
+ 0x3f, 0x40, 0x42, 0x43, 0x42, 0x40, 0x3e, 0x3c,
+ 0x3c, 0x3a, 0x37, 0x34, 0x32, 0x2f, 0x2d, 0x2c,
+ 0x2a, 0x27, 0x26, 0x23, 0x20, 0x1e, 0x1d, 0x1c,
+ 0x1a, 0x18, 0x18, 0x17, 0x15, 0x16, 0x14, 0x12,
+ 0x12, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11, 0x12,
+ 0x12, 0x12, 0x13, 0x14, 0x14, 0x14, 0x15, 0x16,
+ 0x17, 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x20, 0x22,
+ 0x24, 0x25, 0x26, 0x27, 0x28, 0x2a, 0x2c, 0x2c,
+ 0x2f, 0x32, 0x35, 0x37, 0x3b, 0x3c, 0x41, 0x45,
+ 0x48, 0x4c, 0x50, 0x52, 0x54, 0x57, 0x5a, 0x5c,
+ 0x5f, 0x5f, 0x5f, 0x5d, 0x5c, 0x5b, 0x5a, 0x58,
+ 0x57, 0x57, 0x57, 0x56, 0x56, 0x57, 0x57, 0x5a,
+ 0x5c, 0x5e, 0x5f, 0x61, 0x5f, 0x5f, 0x5f, 0x5e,
+ 0x5d, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x4f, 0x4e,
+ 0x4a, 0x47, 0x46, 0x42, 0x41, 0x3e, 0x3d, 0x3c,
+ 0x3b, 0x3a, 0x39, 0x39, 0x3b, 0x3c, 0x3d, 0x3f,
+ 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x49, 0x49,
+ 0x4b, 0x4c, 0x4e, 0x4f, 0x51, 0x54, 0x57, 0x58,
+ 0x5b, 0x5d, 0x61, 0x61, 0x61, 0x63, 0x65, 0x65,
+ 0x64, 0x64, 0x62, 0x61, 0x60, 0x5e, 0x5d, 0x5c,
+ 0x59, 0x58, 0x56, 0x54, 0x53, 0x53, 0x53, 0x54,
+ 0x54, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, 0x55,
+ 0x56, 0x55, 0x54, 0x53, 0x53, 0x52, 0x52, 0x53,
+ 0x55, 0x56, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x66,
+ 0x69, 0x6b, 0x6d, 0x6d, 0x6b, 0x69, 0x67, 0x64,
+ 0x61, 0x5d, 0x58, 0x55, 0x50, 0x4b, 0x48, 0x47,
+ 0x46, 0x46, 0x46, },
+ { 0x1a, 0x20, 0x1e, 0x1f, 0x1f, 0x21, 0x22, 0x23,
+ 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b,
+ 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f,
+ 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48,
+ 0x45, 0x42, 0x3f, 0x3b, 0x38, 0x36, 0x32, 0x31,
+ 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b,
+ 0x2e, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f,
+ 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c,
+ 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b,
+ 0x29, 0x26, 0x24, 0x24, 0x20, 0x1f, 0x1d, 0x1d,
+ 0x1a, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14,
+ 0x13, 0x12, 0x13, 0x13, 0x13, 0x12, 0x12, 0x13,
+ 0x13, 0x14, 0x15, 0x15, 0x14, 0x15, 0x16, 0x18,
+ 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x24,
+ 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2c, 0x2d, 0x2f,
+ 0x32, 0x35, 0x37, 0x3a, 0x3c, 0x3e, 0x44, 0x48,
+ 0x4c, 0x50, 0x54, 0x56, 0x58, 0x5b, 0x5e, 0x60,
+ 0x61, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5e,
+ 0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x5b, 0x5c, 0x5e,
+ 0x60, 0x63, 0x64, 0x65, 0x63, 0x62, 0x63, 0x63,
+ 0x61, 0x60, 0x5e, 0x5b, 0x58, 0x55, 0x51, 0x4f,
+ 0x4c, 0x4a, 0x47, 0x44, 0x42, 0x41, 0x3e, 0x3c,
+ 0x3b, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3e, 0x3f,
+ 0x40, 0x42, 0x43, 0x45, 0x46, 0x47, 0x49, 0x4a,
+ 0x4c, 0x4c, 0x4f, 0x51, 0x52, 0x55, 0x58, 0x5b,
+ 0x5c, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x64, 0x65,
+ 0x66, 0x65, 0x63, 0x62, 0x5f, 0x5e, 0x5e, 0x5c,
+ 0x5b, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x53,
+ 0x52, 0x52, 0x52, 0x52, 0x52, 0x53, 0x55, 0x55,
+ 0x55, 0x53, 0x53, 0x53, 0x52, 0x51, 0x52, 0x52,
+ 0x55, 0x55, 0x58, 0x58, 0x5b, 0x5d, 0x61, 0x65,
+ 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64,
+ 0x61, 0x5e, 0x58, 0x54, 0x4f, 0x4b, 0x49, 0x48,
+ 0x47, 0x46, 0x45, },
+ { 0x19, 0x20, 0x1d, 0x1f, 0x1f, 0x20, 0x23, 0x23,
+ 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b,
+ 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f,
+ 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48,
+ 0x44, 0x42, 0x3f, 0x3a, 0x38, 0x36, 0x32, 0x30,
+ 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+ 0x23, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2b,
+ 0x2e, 0x30, 0x34, 0x36, 0x39, 0x3b, 0x3d, 0x3f,
+ 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c,
+ 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b,
+ 0x29, 0x27, 0x25, 0x24, 0x21, 0x1f, 0x1e, 0x1c,
+ 0x1b, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14,
+ 0x13, 0x12, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
+ 0x13, 0x14, 0x15, 0x14, 0x14, 0x14, 0x17, 0x19,
+ 0x1a, 0x1c, 0x1e, 0x20, 0x21, 0x23, 0x24, 0x26,
+ 0x29, 0x29, 0x2b, 0x2c, 0x2d, 0x2e, 0x30, 0x31,
+ 0x34, 0x38, 0x3b, 0x3c, 0x3f, 0x42, 0x47, 0x4c,
+ 0x50, 0x54, 0x57, 0x5b, 0x5c, 0x5e, 0x62, 0x63,
+ 0x66, 0x66, 0x66, 0x65, 0x64, 0x63, 0x61, 0x62,
+ 0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5f, 0x60, 0x62,
+ 0x65, 0x67, 0x69, 0x6a, 0x69, 0x68, 0x69, 0x67,
+ 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x58, 0x54, 0x51,
+ 0x4e, 0x4b, 0x49, 0x45, 0x43, 0x41, 0x40, 0x3e,
+ 0x3c, 0x3a, 0x3b, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x41, 0x42, 0x44, 0x46, 0x46, 0x48, 0x49, 0x4b,
+ 0x4d, 0x50, 0x51, 0x53, 0x55, 0x57, 0x58, 0x5c,
+ 0x5f, 0x60, 0x63, 0x64, 0x64, 0x65, 0x66, 0x66,
+ 0x66, 0x65, 0x65, 0x63, 0x61, 0x5f, 0x5e, 0x5c,
+ 0x5a, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x52,
+ 0x53, 0x52, 0x52, 0x52, 0x52, 0x53, 0x53, 0x53,
+ 0x54, 0x53, 0x53, 0x52, 0x53, 0x51, 0x53, 0x53,
+ 0x55, 0x57, 0x58, 0x59, 0x5b, 0x5d, 0x62, 0x64,
+ 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64,
+ 0x61, 0x5d, 0x57, 0x54, 0x50, 0x4a, 0x48, 0x47,
+ 0x46, 0x45, 0x45, },
diff --git a/tests/tcg/hexagon/hvx_histogram_row.S b/tests/tcg/hexagon/hvx_histogram_row.S
new file mode 100644
index 0000000000..5e42c33145
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram_row.S
@@ -0,0 +1,294 @@
+/*
+ * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*
+ * void hvx_histogram_row(uint8_t *src, => r0
+ * int stride, => r1
+ * int width, => r2
+ * int height, => r3
+ * int *hist => r4)
+ */
+ .text
+ .p2align 2
+ .global hvx_histogram_row
+ .type hvx_histogram_row, @function
+hvx_histogram_row:
+ { r2 = lsr(r2, #7) /* size / VLEN */
+ r5 = and(r2, #127) /* size % VLEN */
+ v1 = #0
+ v0 = #0
+ }
+ /*
+ * Step 1: Clean the whole vector register file
+ */
+ { v3:2 = v1:0
+ v5:4 = v1:0
+ p0 = cmp.gt(r2, #0) /* P0 = (width / VLEN > 0) */
+ p1 = cmp.eq(r5, #0) /* P1 = (width % VLEN == 0) */
+ }
+ { q0 = vsetq(r5)
+ v7:6 = v1:0
+ }
+ { v9:8 = v1:0
+ v11:10 = v1:0
+ }
+ { v13:12 = v1:0
+ v15:14 = v1:0
+ }
+ { v17:16 = v1:0
+ v19:18 = v1:0
+ }
+ { v21:20 = v1:0
+ v23:22 = v1:0
+ }
+ { v25:24 = v1:0
+ v27:26 = v1:0
+ }
+ { v29:28 = v1:0
+ v31:30 = v1:0
+ r10 = add(r0, r1) /* R10 = &src[2 * stride] */
+ loop1(.outerloop, r3)
+ }
+
+ /*
+ * Step 2: vhist
+ */
+ .falign
+.outerloop:
+ { if (!p0) jump .loopend
+ loop0(.innerloop, r2)
+ }
+
+ .falign
+.innerloop:
+ { v12.tmp = vmem(R0++#1)
+ vhist
+ }:endloop0
+
+ .falign
+.loopend:
+ if (p1) jump .skip /* if (width % VLEN == 0) done with current row */
+ { v13.tmp = vmem(r0 + #0)
+ vhist(q0)
+ }
+
+ .falign
+.skip:
+ { r0 = r10 /* R0 = &src[(i + 1) * stride] */
+ r10 = add(r10, r1) /* R10 = &src[(i + 2) * stride] */
+ }:endloop1
+
+
+ /*
+ * Step 3: Sum up the data
+ */
+ { v0.h = vshuff(v0.h)
+ r10 = ##0x00010001
+ }
+ v1.h = vshuff(v1.h)
+ { V2.h = vshuff(v2.h)
+ v0.w = vdmpy(v0.h, r10.h):sat
+ }
+ { v3.h = vshuff(v3.h)
+ v1.w = vdmpy(v1.h, r10.h):sat
+ }
+ { v4.h = vshuff(V4.h)
+ v2.w = vdmpy(v2.h, r10.h):sat
+ }
+ { v5.h = vshuff(v5.h)
+ v3.w = vdmpy(v3.h, r10.h):sat
+ }
+ { v6.h = vshuff(v6.h)
+ v4.w = vdmpy(v4.h, r10.h):sat
+ }
+ { v7.h = vshuff(v7.h)
+ v5.w = vdmpy(v5.h, r10.h):sat
+ }
+ { v8.h = vshuff(V8.h)
+ v6.w = vdmpy(v6.h, r10.h):sat
+ }
+ { v9.h = vshuff(V9.h)
+ v7.w = vdmpy(v7.h, r10.h):sat
+ }
+ { v10.h = vshuff(v10.h)
+ v8.w = vdmpy(v8.h, r10.h):sat
+ }
+ { v11.h = vshuff(v11.h)
+ v9.w = vdmpy(v9.h, r10.h):sat
+ }
+ { v12.h = vshuff(v12.h)
+ v10.w = vdmpy(v10.h, r10.h):sat
+ }
+ { v13.h = vshuff(V13.h)
+ v11.w = vdmpy(v11.h, r10.h):sat
+ }
+ { v14.h = vshuff(v14.h)
+ v12.w = vdmpy(v12.h, r10.h):sat
+ }
+ { v15.h = vshuff(v15.h)
+ v13.w = vdmpy(v13.h, r10.h):sat
+ }
+ { v16.h = vshuff(v16.h)
+ v14.w = vdmpy(v14.h, r10.h):sat
+ }
+ { v17.h = vshuff(v17.h)
+ v15.w = vdmpy(v15.h, r10.h):sat
+ }
+ { v18.h = vshuff(v18.h)
+ v16.w = vdmpy(v16.h, r10.h):sat
+ }
+ { v19.h = vshuff(v19.h)
+ v17.w = vdmpy(v17.h, r10.h):sat
+ }
+ { v20.h = vshuff(v20.h)
+ v18.W = vdmpy(v18.h, r10.h):sat
+ }
+ { v21.h = vshuff(v21.h)
+ v19.w = vdmpy(v19.h, r10.h):sat
+ }
+ { v22.h = vshuff(v22.h)
+ v20.w = vdmpy(v20.h, r10.h):sat
+ }
+ { v23.h = vshuff(v23.h)
+ v21.w = vdmpy(v21.h, r10.h):sat
+ }
+ { v24.h = vshuff(v24.h)
+ v22.w = vdmpy(v22.h, r10.h):sat
+ }
+ { v25.h = vshuff(v25.h)
+ v23.w = vdmpy(v23.h, r10.h):sat
+ }
+ { v26.h = vshuff(v26.h)
+ v24.w = vdmpy(v24.h, r10.h):sat
+ }
+ { v27.h = vshuff(V27.h)
+ v25.w = vdmpy(v25.h, r10.h):sat
+ }
+ { v28.h = vshuff(v28.h)
+ v26.w = vdmpy(v26.h, r10.h):sat
+ }
+ { v29.h = vshuff(v29.h)
+ v27.w = vdmpy(v27.h, r10.h):sat
+ }
+ { v30.h = vshuff(v30.h)
+ v28.w = vdmpy(v28.h, r10.h):sat
+ }
+ { v31.h = vshuff(v31.h)
+ v29.w = vdmpy(v29.h, r10.h):sat
+ r28 = #32
+ }
+ { vshuff(v1, v0, r28)
+ v30.w = vdmpy(v30.h, r10.h):sat
+ }
+ { vshuff(v3, v2, r28)
+ v31.w = vdmpy(v31.h, r10.h):sat
+ }
+ { vshuff(v5, v4, r28)
+ v0.w = vadd(v1.w, v0.w)
+ v2.w = vadd(v3.w, v2.w)
+ }
+ { vshuff(v7, v6, r28)
+ r7 = #64
+ }
+ { vshuff(v9, v8, r28)
+ v4.w = vadd(v5.w, v4.w)
+ v6.w = vadd(v7.w, v6.w)
+ }
+ vshuff(v11, v10, r28)
+ { vshuff(v13, v12, r28)
+ v8.w = vadd(v9.w, v8.w)
+ v10.w = vadd(v11.w, v10.w)
+ }
+ vshuff(v15, v14, r28)
+ { vshuff(v17, v16, r28)
+ v12.w = vadd(v13.w, v12.w)
+ v14.w = vadd(v15.w, v14.w)
+ }
+ vshuff(v19, v18, r28)
+ { vshuff(v21, v20, r28)
+ v16.w = vadd(v17.w, v16.w)
+ v18.w = vadd(v19.w, v18.w)
+ }
+ vshuff(v23, v22, r28)
+ { vshuff(v25, v24, r28)
+ v20.w = vadd(v21.w, v20.w)
+ v22.w = vadd(v23.w, v22.w)
+ }
+ vshuff(v27, v26, r28)
+ { vshuff(v29, v28, r28)
+ v24.w = vadd(v25.w, v24.w)
+ v26.w = vadd(v27.w, v26.w)
+ }
+ vshuff(v31, v30, r28)
+ { v28.w = vadd(v29.w, v28.w)
+ vshuff(v2, v0, r7)
+ }
+ { v30.w = vadd(v31.w, v30.w)
+ vshuff(v6, v4, r7)
+ v0.w = vadd(v0.w, v2.w)
+ }
+ { vshuff(v10, v8, r7)
+ v1.tmp = vmem(r4 + #0) /* update hist[0-31] */
+ v0.w = vadd(v0.w, v1.w)
+ vmem(r4++#1) = v0.new
+ }
+ { vshuff(v14, v12, r7)
+ v4.w = vadd(v4.w, v6.w)
+ v8.w = vadd(v8.w, v10.w)
+ }
+ { vshuff(v18, v16, r7)
+ v1.tmp = vmem(r4 + #0) /* update hist[32-63] */
+ v4.w = vadd(v4.w, v1.w)
+ vmem(r4++#1) = v4.new
+ }
+ { vshuff(v22, v20, r7)
+ v12.w = vadd(v12.w, v14.w)
+ V16.w = vadd(v16.w, v18.w)
+ }
+ { vshuff(v26, v24, r7)
+ v1.tmp = vmem(r4 + #0) /* update hist[64-95] */
+ v8.w = vadd(v8.w, v1.w)
+ vmem(r4++#1) = v8.new
+ }
+ { vshuff(v30, v28, r7)
+ v1.tmp = vmem(r4 + #0) /* update hist[96-127] */
+ v12.w = vadd(v12.w, v1.w)
+ vmem(r4++#1) = v12.new
+ }
+
+ { v20.w = vadd(v20.w, v22.w)
+ v1.tmp = vmem(r4 + #0) /* update hist[128-159] */
+ v16.w = vadd(v16.w, v1.w)
+ vmem(r4++#1) = v16.new
+ }
+ { v24.w = vadd(v24.w, v26.w)
+ v1.tmp = vmem(r4 + #0) /* update hist[160-191] */
+ v20.w = vadd(v20.w, v1.w)
+ vmem(r4++#1) = v20.new
+ }
+ { v28.w = vadd(v28.w, v30.w)
+ v1.tmp = vmem(r4 + #0) /* update hist[192-223] */
+ v24.w = vadd(v24.w, v1.w)
+ vmem(r4++#1) = v24.new
+ }
+ { v1.tmp = vmem(r4 + #0) /* update hist[224-255] */
+ v28.w = vadd(v28.w, v1.w)
+ vmem(r4++#1) = v28.new
+ }
+ jumpr r31
+ .size hvx_histogram_row, .-hvx_histogram_row
diff --git a/tests/tcg/hexagon/hvx_histogram_row.h b/tests/tcg/hexagon/hvx_histogram_row.h
new file mode 100644
index 0000000000..6a4531a92d
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram_row.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HVX_HISTOGRAM_ROW_H
+#define HVX_HISTOGRAM_ROW_H
+
+void hvx_histogram_row(uint8_t *src, int stride, int width, int height,
+ int *hist);
+
+#endif
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
new file mode 100644
index 0000000000..312bb98b41
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+
+int err;
+
+static void __check(int line, int i, int j, uint64_t result, uint64_t expect)
+{
+ if (result != expect) {
+ printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n",
+ line, i, j, result, expect);
+ err++;
+ }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+#define MAX_VEC_SIZE_BYTES 128
+
+typedef union {
+ uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
+ int64_t d[MAX_VEC_SIZE_BYTES / 8];
+ uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
+ int32_t w[MAX_VEC_SIZE_BYTES / 4];
+ uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
+ int16_t h[MAX_VEC_SIZE_BYTES / 2];
+ uint8_t ub[MAX_VEC_SIZE_BYTES / 1];
+ int8_t b[MAX_VEC_SIZE_BYTES / 1];
+} MMVector;
+
+#define BUFSIZE 16
+#define OUTSIZE 16
+#define MASKMOD 3
+
+MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+
+#define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \
+static void check_output_##FIELD(int line, size_t num_vectors) \
+{ \
+ for (int i = 0; i < num_vectors; i++) { \
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
+ __check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \
+ } \
+ } \
+}
+
+CHECK_OUTPUT_FUNC(d, 8)
+CHECK_OUTPUT_FUNC(w, 4)
+CHECK_OUTPUT_FUNC(h, 2)
+CHECK_OUTPUT_FUNC(b, 1)
+
+static void init_buffers(void)
+{
+ int counter0 = 0;
+ int counter1 = 17;
+ for (int i = 0; i < BUFSIZE; i++) {
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) {
+ buffer0[i].b[j] = counter0++;
+ buffer1[i].b[j] = counter1++;
+ }
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+ mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1;
+ }
+ }
+}
+
+static void test_load_tmp(void)
+{
+ void *p0 = buffer0;
+ void *p1 = buffer1;
+ void *pout = output;
+
+ for (int i = 0; i < BUFSIZE; i++) {
+ /*
+ * Load into v12 as .tmp, then use it in the next packet
+ * Should get the new value within the same packet and
+ * the old value in the next packet
+ */
+ asm("v3 = vmem(%0 + #0)\n\t"
+ "r1 = #1\n\t"
+ "v12 = vsplat(r1)\n\t"
+ "{\n\t"
+ " v12.tmp = vmem(%1 + #0)\n\t"
+ " v4.w = vadd(v12.w, v3.w)\n\t"
+ "}\n\t"
+ "v4.w = vadd(v4.w, v12.w)\n\t"
+ "vmem(%2 + #0) = v4\n\t"
+ : : "r"(p0), "r"(p1), "r"(pout)
+ : "r1", "v12", "v3", "v4", "v6", "memory");
+ p0 += sizeof(MMVector);
+ p1 += sizeof(MMVector);
+ pout += sizeof(MMVector);
+
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+ expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1;
+ }
+ }
+
+ check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_load_cur(void)
+{
+ void *p0 = buffer0;
+ void *pout = output;
+
+ for (int i = 0; i < BUFSIZE; i++) {
+ asm("{\n\t"
+ " v2.cur = vmem(%0 + #0)\n\t"
+ " vmem(%1 + #0) = v2\n\t"
+ "}\n\t"
+ : : "r"(p0), "r"(pout) : "v2", "memory");
+ p0 += sizeof(MMVector);
+ pout += sizeof(MMVector);
+
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+ expect[i].uw[j] = buffer0[i].uw[j];
+ }
+ }
+
+ check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_load_aligned(void)
+{
+ /* Aligned loads ignore the low bits of the address */
+ void *p0 = buffer0;
+ void *pout = output;
+ const size_t offset = 13;
+
+ p0 += offset; /* Create an unaligned address */
+ asm("v2 = vmem(%0 + #0)\n\t"
+ "vmem(%1 + #0) = v2\n\t"
+ : : "r"(p0), "r"(pout) : "v2", "memory");
+
+ expect[0] = buffer0[0];
+
+ check_output_w(__LINE__, 1);
+}
+
+static void test_load_unaligned(void)
+{
+ void *p0 = buffer0;
+ void *pout = output;
+ const size_t offset = 12;
+
+ p0 += offset; /* Create an unaligned address */
+ asm("v2 = vmemu(%0 + #0)\n\t"
+ "vmem(%1 + #0) = v2\n\t"
+ : : "r"(p0), "r"(pout) : "v2", "memory");
+
+ memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector));
+
+ check_output_w(__LINE__, 1);
+}
+
+static void test_store_aligned(void)
+{
+ /* Aligned stores ignore the low bits of the address */
+ void *p0 = buffer0;
+ void *pout = output;
+ const size_t offset = 13;
+
+ pout += offset; /* Create an unaligned address */
+ asm("v2 = vmem(%0 + #0)\n\t"
+ "vmem(%1 + #0) = v2\n\t"
+ : : "r"(p0), "r"(pout) : "v2", "memory");
+
+ expect[0] = buffer0[0];
+
+ check_output_w(__LINE__, 1);
+}
+
+static void test_store_unaligned(void)
+{
+ void *p0 = buffer0;
+ void *pout = output;
+ const size_t offset = 12;
+
+ pout += offset; /* Create an unaligned address */
+ asm("v2 = vmem(%0 + #0)\n\t"
+ "vmemu(%1 + #0) = v2\n\t"
+ : : "r"(p0), "r"(pout) : "v2", "memory");
+
+ memcpy(expect, buffer0, 2 * sizeof(MMVector));
+ memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector));
+
+ check_output_w(__LINE__, 2);
+}
+
+static void test_masked_store(bool invert)
+{
+ void *p0 = buffer0;
+ void *pmask = mask;
+ void *pout = output;
+
+ memset(expect, 0xff, sizeof(expect));
+ memset(output, 0xff, sizeof(expect));
+
+ for (int i = 0; i < BUFSIZE; i++) {
+ if (invert) {
+ asm("r4 = #0\n\t"
+ "v4 = vsplat(r4)\n\t"
+ "v5 = vmem(%0 + #0)\n\t"
+ "q0 = vcmp.eq(v4.w, v5.w)\n\t"
+ "v5 = vmem(%1)\n\t"
+ "if (!q0) vmem(%2) = v5\n\t" /* Inverted test */
+ : : "r"(pmask), "r"(p0), "r"(pout)
+ : "r4", "v4", "v5", "q0", "memory");
+ } else {
+ asm("r4 = #0\n\t"
+ "v4 = vsplat(r4)\n\t"
+ "v5 = vmem(%0 + #0)\n\t"
+ "q0 = vcmp.eq(v4.w, v5.w)\n\t"
+ "v5 = vmem(%1)\n\t"
+ "if (q0) vmem(%2) = v5\n\t" /* Non-inverted test */
+ : : "r"(pmask), "r"(p0), "r"(pout)
+ : "r4", "v4", "v5", "q0", "memory");
+ }
+ p0 += sizeof(MMVector);
+ pmask += sizeof(MMVector);
+ pout += sizeof(MMVector);
+
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+ if (invert) {
+ if (i + j % MASKMOD != 0) {
+ expect[i].w[j] = buffer0[i].w[j];
+ }
+ } else {
+ if (i + j % MASKMOD == 0) {
+ expect[i].w[j] = buffer0[i].w[j];
+ }
+ }
+ }
+ }
+
+ check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_new_value_store(void)
+{
+ void *p0 = buffer0;
+ void *pout = output;
+
+ asm("{\n\t"
+ " v2 = vmem(%0 + #0)\n\t"
+ " vmem(%1 + #0) = v2.new\n\t"
+ "}\n\t"
+ : : "r"(p0), "r"(pout) : "v2", "memory");
+
+ expect[0] = buffer0[0];
+
+ check_output_w(__LINE__, 1);
+}
+
+static void test_max_temps()
+{
+ void *p0 = buffer0;
+ void *pout = output;
+
+ asm("v0 = vmem(%0 + #0)\n\t"
+ "v1 = vmem(%0 + #1)\n\t"
+ "v2 = vmem(%0 + #2)\n\t"
+ "v3 = vmem(%0 + #3)\n\t"
+ "v4 = vmem(%0 + #4)\n\t"
+ "{\n\t"
+ " v1:0.w = vadd(v3:2.w, v1:0.w)\n\t"
+ " v2.b = vshuffe(v3.b, v2.b)\n\t"
+ " v3.w = vadd(v1.w, v4.w)\n\t"
+ " v4.tmp = vmem(%0 + #5)\n\t"
+ "}\n\t"
+ "vmem(%1 + #0) = v0\n\t"
+ "vmem(%1 + #1) = v1\n\t"
+ "vmem(%1 + #2) = v2\n\t"
+ "vmem(%1 + #3) = v3\n\t"
+ "vmem(%1 + #4) = v4\n\t"
+ : : "r"(p0), "r"(pout) : "memory");
+
+ /* The first two vectors come from the vadd-pair instruction */
+ for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+ expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i];
+ expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i];
+ }
+ /* The third vector comes from the vshuffe instruction */
+ for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) {
+ expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) |
+ (buffer0[3].uh[i] & 0xff) << 8;
+ }
+ /* The fourth vector comes from the vadd-single instruction */
+ for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+ expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i];
+ }
+ /*
+ * The fifth vector comes from the load to v4
+ * make sure the .tmp is dropped
+ */
+ expect[4] = buffer0[4];
+
+ check_output_b(__LINE__, 5);
+}
+
+#define VEC_OP1(ASM, EL, IN, OUT) \
+ asm("v2 = vmem(%0 + #0)\n\t" \
+ "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \
+ "vmem(%1 + #0) = v2\n\t" \
+ : : "r"(IN), "r"(OUT) : "v2", "memory")
+
+#define VEC_OP2(ASM, EL, IN0, IN1, OUT) \
+ asm("v2 = vmem(%0 + #0)\n\t" \
+ "v3 = vmem(%1 + #0)\n\t" \
+ "v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \
+ "vmem(%2 + #0) = v2\n\t" \
+ : : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory")
+
+#define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
+static void test_##NAME(void) \
+{ \
+ void *pin = buffer0; \
+ void *pout = output; \
+ for (int i = 0; i < BUFSIZE; i++) { \
+ VEC_OP1(ASM, EL, pin, pout); \
+ pin += sizeof(MMVector); \
+ pout += sizeof(MMVector); \
+ } \
+ for (int i = 0; i < BUFSIZE; i++) { \
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
+ expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \
+ } \
+ } \
+ check_output_##FIELD(__LINE__, BUFSIZE); \
+}
+
+#define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
+static void test_##NAME(void) \
+{ \
+ void *p0 = buffer0; \
+ void *p1 = buffer1; \
+ void *pout = output; \
+ for (int i = 0; i < BUFSIZE; i++) { \
+ VEC_OP2(ASM, EL, p0, p1, pout); \
+ p0 += sizeof(MMVector); \
+ p1 += sizeof(MMVector); \
+ pout += sizeof(MMVector); \
+ } \
+ for (int i = 0; i < BUFSIZE; i++) { \
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
+ expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \
+ } \
+ } \
+ check_output_##FIELD(__LINE__, BUFSIZE); \
+}
+
+#define THRESHOLD 31
+
+#define PRED_OP2(ASM, IN0, IN1, OUT, INV) \
+ asm("r4 = #%3\n\t" \
+ "v1.b = vsplat(r4)\n\t" \
+ "v2 = vmem(%0 + #0)\n\t" \
+ "q0 = vcmp.gt(v2.b, v1.b)\n\t" \
+ "v3 = vmem(%1 + #0)\n\t" \
+ "q1 = vcmp.gt(v3.b, v1.b)\n\t" \
+ "q2 = " #ASM "(q0, " INV "q1)\n\t" \
+ "r4 = #0xff\n\t" \
+ "v1.b = vsplat(r4)\n\t" \
+ "if (q2) vmem(%2 + #0) = v1\n\t" \
+ : : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \
+ : "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory")
+
+#define TEST_PRED_OP2(NAME, ASM, OP, INV) \
+static void test_##NAME(bool invert) \
+{ \
+ void *p0 = buffer0; \
+ void *p1 = buffer1; \
+ void *pout = output; \
+ memset(output, 0, sizeof(expect)); \
+ for (int i = 0; i < BUFSIZE; i++) { \
+ PRED_OP2(ASM, p0, p1, pout, INV); \
+ p0 += sizeof(MMVector); \
+ p1 += sizeof(MMVector); \
+ pout += sizeof(MMVector); \
+ } \
+ for (int i = 0; i < BUFSIZE; i++) { \
+ for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \
+ bool p0 = (buffer0[i].b[j] > THRESHOLD); \
+ bool p1 = (buffer1[i].b[j] > THRESHOLD); \
+ if (invert) { \
+ expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \
+ } else { \
+ expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \
+ } \
+ } \
+ } \
+ check_output_b(__LINE__, BUFSIZE); \
+}
+
+TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +)
+TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +)
+TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +)
+TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -)
+TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -)
+TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -)
+TEST_VEC_OP2(vxor, vxor, , d, 8, ^)
+TEST_VEC_OP2(vand, vand, , d, 8, &)
+TEST_VEC_OP2(vor, vor, , d, 8, |)
+TEST_VEC_OP1(vnot, vnot, , d, 8, ~)
+
+TEST_PRED_OP2(pred_or, or, |, "")
+TEST_PRED_OP2(pred_or_n, or, |, "!")
+TEST_PRED_OP2(pred_and, and, &, "")
+TEST_PRED_OP2(pred_and_n, and, &, "!")
+TEST_PRED_OP2(pred_xor, xor, ^, "")
+
+int main()
+{
+ init_buffers();
+
+ test_load_tmp();
+ test_load_cur();
+ test_load_aligned();
+ test_load_unaligned();
+ test_store_aligned();
+ test_store_unaligned();
+ test_masked_store(false);
+ test_masked_store(true);
+ test_new_value_store();
+ test_max_temps();
+
+ test_vadd_w();
+ test_vadd_h();
+ test_vadd_b();
+ test_vsub_w();
+ test_vsub_h();
+ test_vsub_b();
+ test_vxor();
+ test_vand();
+ test_vor();
+ test_vnot();
+
+ test_pred_or(false);
+ test_pred_or_n(true);
+ test_pred_and(false);
+ test_pred_and_n(true);
+ test_pred_xor(false);
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c
new file mode 100644
index 0000000000..b93eb18133
--- /dev/null
+++ b/tests/tcg/hexagon/scatter_gather.c
@@ -0,0 +1,1011 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This example tests the HVX scatter/gather instructions
+ *
+ * See section 5.13 of the V68 HVX Programmer's Reference
+ *
+ * There are 3 main classes operations
+ * _16 16-bit elements and 16-bit offsets
+ * _32 32-bit elements and 32-bit offsets
+ * _16_32 16-bit elements and 32-bit offsets
+ *
+ * There are also masked and accumulate versions
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+typedef long HVX_Vector __attribute__((__vector_size__(128)))
+ __attribute__((aligned(128)));
+typedef long HVX_VectorPair __attribute__((__vector_size__(256)))
+ __attribute__((aligned(128)));
+typedef long HVX_VectorPred __attribute__((__vector_size__(128)))
+ __attribute__((aligned(128)));
+
+#define VSCATTER_16(BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS)
+#define VSCATTER_32(BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \
+ __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS)
+
+#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \
+ __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF)
+#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
+ __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
+#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \
+ __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF)
+#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
+ __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
+#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \
+ __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF)
+#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
+ __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
+
+#define VSHUFF_H(V) \
+ __builtin_HEXAGON_V6_vshuffh_128B(V)
+#define VSPLAT_H(X) \
+ __builtin_HEXAGON_V6_lvsplath_128B(X)
+#define VAND_VAL(PRED, VAL) \
+ __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL)
+#define VDEAL_H(V) \
+ __builtin_HEXAGON_V6_vdealh_128B(V)
+
+int err;
+
+/* define the number of rows/cols in a square matrix */
+#define MATRIX_SIZE 64
+
+/* define the size of the scatter buffer */
+#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE)
+
+/* fake vtcm - put buffers together and force alignment */
+static struct {
+ unsigned short vscatter16[SCATTER_BUFFER_SIZE];
+ unsigned short vgather16[MATRIX_SIZE];
+ unsigned int vscatter32[SCATTER_BUFFER_SIZE];
+ unsigned int vgather32[MATRIX_SIZE];
+ unsigned short vscatter16_32[SCATTER_BUFFER_SIZE];
+ unsigned short vgather16_32[MATRIX_SIZE];
+} vtcm __attribute__((aligned(0x10000)));
+
+/* declare the arrays of reference values */
+unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE];
+unsigned short vgather16_ref[MATRIX_SIZE];
+unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE];
+unsigned int vgather32_ref[MATRIX_SIZE];
+unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE];
+unsigned short vgather16_32_ref[MATRIX_SIZE];
+
+/* declare the arrays of offsets */
+unsigned short half_offsets[MATRIX_SIZE];
+unsigned int word_offsets[MATRIX_SIZE];
+
+/* declare the arrays of values */
+unsigned short half_values[MATRIX_SIZE];
+unsigned short half_values_acc[MATRIX_SIZE];
+unsigned short half_values_masked[MATRIX_SIZE];
+unsigned int word_values[MATRIX_SIZE];
+unsigned int word_values_acc[MATRIX_SIZE];
+unsigned int word_values_masked[MATRIX_SIZE];
+
+/* declare the arrays of predicates */
+unsigned short half_predicates[MATRIX_SIZE];
+unsigned int word_predicates[MATRIX_SIZE];
+
+/* make this big enough for all the intrinsics */
+const size_t region_len = sizeof(vtcm);
+
+/* optionally add sync instructions */
+#define SYNC_VECTOR 1
+
+static void sync_scatter(void *addr)
+{
+#if SYNC_VECTOR
+ /*
+ * Do the scatter release followed by a dummy load to complete the
+ * synchronization. Normally the dummy load would be deferred as
+ * long as possible to minimize stalls.
+ */
+ asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(addr));
+ /* use volatile to force the load */
+ volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy;
+#endif
+}
+
+static void sync_gather(void *addr)
+{
+#if SYNC_VECTOR
+ /* use volatile to force the load */
+ volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy;
+#endif
+}
+
+/* optionally print the results */
+#define PRINT_DATA 0
+
+#define FILL_CHAR '.'
+
+/* fill vtcm scratch with ee */
+void prefill_vtcm_scratch(void)
+{
+ memset(&vtcm, FILL_CHAR, sizeof(vtcm));
+}
+
+/* create byte offsets to be a diagonal of the matrix with 16 bit elements */
+void create_offsets_values_preds_16(void)
+{
+ unsigned short half_element = 0;
+ unsigned short half_element_masked = 0;
+ char letter = 'A';
+ char letter_masked = '@';
+
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ half_offsets[i] = i * (2 * MATRIX_SIZE + 2);
+
+ half_element = 0;
+ half_element_masked = 0;
+ for (int j = 0; j < 2; j++) {
+ half_element |= letter << j * 8;
+ half_element_masked |= letter_masked << j * 8;
+ }
+
+ half_values[i] = half_element;
+ half_values_acc[i] = ((i % 10) << 8) + (i % 10);
+ half_values_masked[i] = half_element_masked;
+
+ letter++;
+ /* reset to 'A' */
+ if (letter == 'M') {
+ letter = 'A';
+ }
+
+ half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0;
+ }
+}
+
+/* create byte offsets to be a diagonal of the matrix with 32 bit elements */
+void create_offsets_values_preds_32(void)
+{
+ unsigned int word_element = 0;
+ unsigned int word_element_masked = 0;
+ char letter = 'A';
+ char letter_masked = '&';
+
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ word_offsets[i] = i * (4 * MATRIX_SIZE + 4);
+
+ word_element = 0;
+ word_element_masked = 0;
+ for (int j = 0; j < 4; j++) {
+ word_element |= letter << j * 8;
+ word_element_masked |= letter_masked << j * 8;
+ }
+
+ word_values[i] = word_element;
+ word_values_acc[i] = ((i % 10) << 8) + (i % 10);
+ word_values_masked[i] = word_element_masked;
+
+ letter++;
+ /* reset to 'A' */
+ if (letter == 'M') {
+ letter = 'A';
+ }
+
+ word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0;
+ }
+}
+
+/*
+ * create byte offsets to be a diagonal of the matrix with 16 bit elements
+ * and 32 bit offsets
+ */
+void create_offsets_values_preds_16_32(void)
+{
+ unsigned short half_element = 0;
+ unsigned short half_element_masked = 0;
+ char letter = 'D';
+ char letter_masked = '$';
+
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ word_offsets[i] = i * (2 * MATRIX_SIZE + 2);
+
+ half_element = 0;
+ half_element_masked = 0;
+ for (int j = 0; j < 2; j++) {
+ half_element |= letter << j * 8;
+ half_element_masked |= letter_masked << j * 8;
+ }
+
+ half_values[i] = half_element;
+ half_values_acc[i] = ((i % 10) << 8) + (i % 10);
+ half_values_masked[i] = half_element_masked;
+
+ letter++;
+ /* reset to 'A' */
+ if (letter == 'P') {
+ letter = 'D';
+ }
+
+ half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0;
+ }
+}
+
+/* scatter the 16 bit elements using intrinsics */
+void vector_scatter_16(void)
+{
+ /* copy the offsets and values to vectors */
+ HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+ HVX_Vector values = *(HVX_Vector *)half_values;
+
+ VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values);
+
+ sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter-accumulate the 16 bit elements using intrinsics */
+void vector_scatter_16_acc(void)
+{
+ /* copy the offsets and values to vectors */
+ HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+ HVX_Vector values = *(HVX_Vector *)half_values_acc;
+
+ VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values);
+
+ sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter the 16 bit elements using intrinsics */
+void vector_scatter_16_masked(void)
+{
+ /* copy the offsets and values to vectors */
+ HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+ HVX_Vector values = *(HVX_Vector *)half_values_masked;
+ HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+ HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
+
+ VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values);
+
+ sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter the 32 bit elements using intrinsics */
+void vector_scatter_32(void)
+{
+ /* copy the offsets and values to vectors */
+ HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+ HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+ HVX_Vector valueslo = *(HVX_Vector *)word_values;
+ HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2];
+
+ VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo);
+ VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi);
+
+ sync_scatter(vtcm.vscatter32);
+}
+
+/* scatter-acc the 32 bit elements using intrinsics */
+void vector_scatter_32_acc(void)
+{
+ /* copy the offsets and values to vectors */
+ HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+ HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+ HVX_Vector valueslo = *(HVX_Vector *)word_values_acc;
+ HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2];
+
+ VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo);
+ VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi);
+
+ sync_scatter(vtcm.vscatter32);
+}
+
+/* scatter the 32 bit elements using intrinsics */
+void vector_scatter_32_masked(void)
+{
+ /* copy the offsets and values to vectors */
+ HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+ HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+ HVX_Vector valueslo = *(HVX_Vector *)word_values_masked;
+ HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2];
+ HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
+ HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+ HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
+ HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
+
+ VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo,
+ valueslo);
+ VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi,
+ valueshi);
+
+ sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32(void)
+{
+ HVX_VectorPair offsets;
+ HVX_Vector values;
+
+ /* get the word offsets in a vector pair */
+ offsets = *(HVX_VectorPair *)word_offsets;
+
+ /* these values need to be shuffled for the scatter */
+ values = *(HVX_Vector *)half_values;
+ values = VSHUFF_H(values);
+
+ VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values);
+
+ sync_scatter(vtcm.vscatter16_32);
+}
+
+/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32_acc(void)
+{
+ HVX_VectorPair offsets;
+ HVX_Vector values;
+
+ /* get the word offsets in a vector pair */
+ offsets = *(HVX_VectorPair *)word_offsets;
+
+ /* these values need to be shuffled for the scatter */
+ values = *(HVX_Vector *)half_values_acc;
+ values = VSHUFF_H(values);
+
+ VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values);
+
+ sync_scatter(vtcm.vscatter16_32);
+}
+
+/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32_masked(void)
+{
+ HVX_VectorPair offsets;
+ HVX_Vector values;
+ HVX_Vector pred_reg;
+
+ /* get the word offsets in a vector pair */
+ offsets = *(HVX_VectorPair *)word_offsets;
+
+ /* these values need to be shuffled for the scatter */
+ values = *(HVX_Vector *)half_values_masked;
+ values = VSHUFF_H(values);
+
+ pred_reg = *(HVX_Vector *)half_predicates;
+ pred_reg = VSHUFF_H(pred_reg);
+ HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
+
+ VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets,
+ values);
+
+ sync_scatter(vtcm.vscatter16_32);
+}
+
+/* gather the elements from the scatter16 buffer */
+void vector_gather_16(void)
+{
+ HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16;
+ HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+
+ VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets);
+
+ sync_gather(vgather);
+}
+
+static unsigned short gather_16_masked_init(void)
+{
+ char letter = '?';
+ return letter | (letter << 8);
+}
+
+void vector_gather_16_masked(void)
+{
+ HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16;
+ HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+ HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+ HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
+
+ *vgather = VSPLAT_H(gather_16_masked_init());
+ VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets);
+
+ sync_gather(vgather);
+}
+
+/* gather the elements from the scatter32 buffer */
+void vector_gather_32(void)
+{
+ HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32;
+ HVX_Vector *vgatherhi =
+ (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2));
+ HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+ HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+
+ VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo);
+ VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi);
+
+ sync_gather(vgatherhi);
+}
+
+static unsigned int gather_32_masked_init(void)
+{
+ char letter = '?';
+ return letter | (letter << 8) | (letter << 16) | (letter << 24);
+}
+
+void vector_gather_32_masked(void)
+{
+ HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32;
+ HVX_Vector *vgatherhi =
+ (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2));
+ HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+ HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+ HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
+ HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
+ HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+ HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
+
+ *vgatherlo = VSPLAT_H(gather_32_masked_init());
+ *vgatherhi = VSPLAT_H(gather_32_masked_init());
+ VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len,
+ offsetslo);
+ VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len,
+ offsetshi);
+
+ sync_gather(vgatherlo);
+ sync_gather(vgatherhi);
+}
+
+/* gather the elements from the scatter16_32 buffer */
+void vector_gather_16_32(void)
+{
+ HVX_Vector *vgather;
+ HVX_VectorPair offsets;
+ HVX_Vector values;
+
+ /* get the vtcm address to gather from */
+ vgather = (HVX_Vector *)&vtcm.vgather16_32;
+
+ /* get the word offsets in a vector pair */
+ offsets = *(HVX_VectorPair *)word_offsets;
+
+ VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets);
+
+ /* deal the elements to get the order back */
+ values = *(HVX_Vector *)vgather;
+ values = VDEAL_H(values);
+
+ /* write it back to vtcm address */
+ *(HVX_Vector *)vgather = values;
+}
+
+void vector_gather_16_32_masked(void)
+{
+ HVX_Vector *vgather;
+ HVX_VectorPair offsets;
+ HVX_Vector pred_reg;
+ HVX_VectorPred preds;
+ HVX_Vector values;
+
+ /* get the vtcm address to gather from */
+ vgather = (HVX_Vector *)&vtcm.vgather16_32;
+
+ /* get the word offsets in a vector pair */
+ offsets = *(HVX_VectorPair *)word_offsets;
+ pred_reg = *(HVX_Vector *)half_predicates;
+ pred_reg = VSHUFF_H(pred_reg);
+ preds = VAND_VAL(pred_reg, ~0);
+
+ *vgather = VSPLAT_H(gather_16_masked_init());
+ VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len,
+ offsets);
+
+ /* deal the elements to get the order back */
+ values = *(HVX_Vector *)vgather;
+ values = VDEAL_H(values);
+
+ /* write it back to vtcm address */
+ *(HVX_Vector *)vgather = values;
+}
+
+static void check_buffer(const char *name, void *c, void *r, size_t size)
+{
+ char *check = (char *)c;
+ char *ref = (char *)r;
+ for (int i = 0; i < size; i++) {
+ if (check[i] != ref[i]) {
+ printf("ERROR %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i,
+ check[i], check[i], ref[i], ref[i]);
+ err++;
+ }
+ }
+}
+
+/*
+ * These scalar functions are the C equivalents of the vector functions that
+ * use HVX
+ */
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16(unsigned short *vscatter16)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vscatter16[half_offsets[i] / 2] = half_values[i];
+ }
+}
+
+void check_scatter_16()
+{
+ memset(vscatter16_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+ scalar_scatter_16(vscatter16_ref);
+ check_buffer(__func__, vtcm.vscatter16, vscatter16_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16_acc(unsigned short *vscatter16)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vscatter16[half_offsets[i] / 2] += half_values_acc[i];
+ }
+}
+
+void check_scatter_16_acc()
+{
+ memset(vscatter16_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+ scalar_scatter_16(vscatter16_ref);
+ scalar_scatter_16_acc(vscatter16_ref);
+ check_buffer(__func__, vtcm.vscatter16, vscatter16_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16_masked(unsigned short *vscatter16)
+{
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ if (half_predicates[i]) {
+ vscatter16[half_offsets[i] / 2] = half_values_masked[i];
+ }
+ }
+
+}
+
+void check_scatter_16_masked()
+{
+ memset(vscatter16_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+ scalar_scatter_16(vscatter16_ref);
+ scalar_scatter_16_acc(vscatter16_ref);
+ scalar_scatter_16_masked(vscatter16_ref);
+ check_buffer(__func__, vtcm.vscatter16, vscatter16_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32(unsigned int *vscatter32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vscatter32[word_offsets[i] / 4] = word_values[i];
+ }
+}
+
+void check_scatter_32()
+{
+ memset(vscatter32_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+ scalar_scatter_32(vscatter32_ref);
+ check_buffer(__func__, vtcm.vscatter32, vscatter32_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32_acc(unsigned int *vscatter32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vscatter32[word_offsets[i] / 4] += word_values_acc[i];
+ }
+}
+
+void check_scatter_32_acc()
+{
+ memset(vscatter32_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+ scalar_scatter_32(vscatter32_ref);
+ scalar_scatter_32_acc(vscatter32_ref);
+ check_buffer(__func__, vtcm.vscatter32, vscatter32_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32_masked(unsigned int *vscatter32)
+{
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ if (word_predicates[i]) {
+ vscatter32[word_offsets[i] / 4] = word_values_masked[i];
+ }
+ }
+}
+
+void check_scatter_32_masked()
+{
+ memset(vscatter32_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+ scalar_scatter_32(vscatter32_ref);
+ scalar_scatter_32_acc(vscatter32_ref);
+ scalar_scatter_32_masked(vscatter32_ref);
+ check_buffer(__func__, vtcm.vscatter32, vscatter32_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_16_32(unsigned short *vscatter16_32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vscatter16_32[word_offsets[i] / 2] = half_values[i];
+ }
+}
+
+void check_scatter_16_32()
+{
+ memset(vscatter16_32_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+ scalar_scatter_16_32(vscatter16_32_ref);
+ check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_16_32_acc(unsigned short *vscatter16_32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vscatter16_32[word_offsets[i] / 2] += half_values_acc[i];
+ }
+}
+
+void check_scatter_16_32_acc()
+{
+ memset(vscatter16_32_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+ scalar_scatter_16_32(vscatter16_32_ref);
+ scalar_scatter_16_32_acc(vscatter16_32_ref);
+ check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+void scalar_scatter_16_32_masked(unsigned short *vscatter16_32)
+{
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ if (half_predicates[i]) {
+ vscatter16_32[word_offsets[i] / 2] = half_values_masked[i];
+ }
+ }
+}
+
+void check_scatter_16_32_masked()
+{
+ memset(vscatter16_32_ref, FILL_CHAR,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+ scalar_scatter_16_32(vscatter16_32_ref);
+ scalar_scatter_16_32_acc(vscatter16_32_ref);
+ scalar_scatter_16_32_masked(vscatter16_32_ref);
+ check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref,
+ SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_16(unsigned short *vgather16)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2];
+ }
+}
+
+void check_gather_16()
+{
+ memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short));
+ scalar_gather_16(vgather16_ref);
+ check_buffer(__func__, vtcm.vgather16, vgather16_ref,
+ MATRIX_SIZE * sizeof(unsigned short));
+}
+
+void scalar_gather_16_masked(unsigned short *vgather16)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ if (half_predicates[i]) {
+ vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2];
+ }
+ }
+}
+
+void check_gather_16_masked()
+{
+ memset(vgather16_ref, gather_16_masked_init(),
+ MATRIX_SIZE * sizeof(unsigned short));
+ scalar_gather_16_masked(vgather16_ref);
+ check_buffer(__func__, vtcm.vgather16, vgather16_ref,
+ MATRIX_SIZE * sizeof(unsigned short));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_32(unsigned int *vgather32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4];
+ }
+}
+
+void check_gather_32(void)
+{
+ memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int));
+ scalar_gather_32(vgather32_ref);
+ check_buffer(__func__, vtcm.vgather32, vgather32_ref,
+ MATRIX_SIZE * sizeof(unsigned int));
+}
+
+void scalar_gather_32_masked(unsigned int *vgather32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ if (word_predicates[i]) {
+ vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4];
+ }
+ }
+}
+
+
+void check_gather_32_masked(void)
+{
+ memset(vgather32_ref, gather_32_masked_init(),
+ MATRIX_SIZE * sizeof(unsigned int));
+ scalar_gather_32_masked(vgather32_ref);
+ check_buffer(__func__, vtcm.vgather32,
+ vgather32_ref, MATRIX_SIZE * sizeof(unsigned int));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_16_32(unsigned short *vgather16_32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2];
+ }
+}
+
+void check_gather_16_32(void)
+{
+ memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short));
+ scalar_gather_16_32(vgather16_32_ref);
+ check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref,
+ MATRIX_SIZE * sizeof(unsigned short));
+}
+
+void scalar_gather_16_32_masked(unsigned short *vgather16_32)
+{
+ for (int i = 0; i < MATRIX_SIZE; ++i) {
+ if (half_predicates[i]) {
+ vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2];
+ }
+ }
+
+}
+
+void check_gather_16_32_masked(void)
+{
+ memset(vgather16_32_ref, gather_16_masked_init(),
+ MATRIX_SIZE * sizeof(unsigned short));
+ scalar_gather_16_32_masked(vgather16_32_ref);
+ check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref,
+ MATRIX_SIZE * sizeof(unsigned short));
+}
+
+/* print scatter16 buffer */
+void print_scatter16_buffer(void)
+{
+ if (PRINT_DATA) {
+ printf("\n\nPrinting the 16 bit scatter buffer");
+
+ for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+ if ((i % MATRIX_SIZE) == 0) {
+ printf("\n");
+ }
+ for (int j = 0; j < 2; j++) {
+ printf("%c", (char)((vtcm.vscatter16[i] >> j * 8) & 0xff));
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+/* print the gather 16 buffer */
+void print_gather_result_16(void)
+{
+ if (PRINT_DATA) {
+ printf("\n\nPrinting the 16 bit gather result\n");
+
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ for (int j = 0; j < 2; j++) {
+ printf("%c", (char)((vtcm.vgather16[i] >> j * 8) & 0xff));
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+/* print the scatter32 buffer */
+void print_scatter32_buffer(void)
+{
+ if (PRINT_DATA) {
+ printf("\n\nPrinting the 32 bit scatter buffer");
+
+ for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+ if ((i % MATRIX_SIZE) == 0) {
+ printf("\n");
+ }
+ for (int j = 0; j < 4; j++) {
+ printf("%c", (char)((vtcm.vscatter32[i] >> j * 8) & 0xff));
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+/* print the gather 32 buffer */
+void print_gather_result_32(void)
+{
+ if (PRINT_DATA) {
+ printf("\n\nPrinting the 32 bit gather result\n");
+
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ for (int j = 0; j < 4; j++) {
+ printf("%c", (char)((vtcm.vgather32[i] >> j * 8) & 0xff));
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+/* print the scatter16_32 buffer */
+void print_scatter16_32_buffer(void)
+{
+ if (PRINT_DATA) {
+ printf("\n\nPrinting the 16_32 bit scatter buffer");
+
+ for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+ if ((i % MATRIX_SIZE) == 0) {
+ printf("\n");
+ }
+ for (int j = 0; j < 2; j++) {
+ printf("%c",
+ (unsigned char)((vtcm.vscatter16_32[i] >> j * 8) & 0xff));
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+/* print the gather 16_32 buffer */
+void print_gather_result_16_32(void)
+{
+ if (PRINT_DATA) {
+ printf("\n\nPrinting the 16_32 bit gather result\n");
+
+ for (int i = 0; i < MATRIX_SIZE; i++) {
+ for (int j = 0; j < 2; j++) {
+ printf("%c",
+ (unsigned char)((vtcm.vgather16_32[i] >> j * 8) & 0xff));
+ }
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+int main()
+{
+ prefill_vtcm_scratch();
+
+ /* 16 bit elements with 16 bit offsets */
+ create_offsets_values_preds_16();
+
+ vector_scatter_16();
+ print_scatter16_buffer();
+ check_scatter_16();
+
+ vector_gather_16();
+ print_gather_result_16();
+ check_gather_16();
+
+ vector_gather_16_masked();
+ print_gather_result_16();
+ check_gather_16_masked();
+
+ vector_scatter_16_acc();
+ print_scatter16_buffer();
+ check_scatter_16_acc();
+
+ vector_scatter_16_masked();
+ print_scatter16_buffer();
+ check_scatter_16_masked();
+
+ /* 32 bit elements with 32 bit offsets */
+ create_offsets_values_preds_32();
+
+ vector_scatter_32();
+ print_scatter32_buffer();
+ check_scatter_32();
+
+ vector_gather_32();
+ print_gather_result_32();
+ check_gather_32();
+
+ vector_gather_32_masked();
+ print_gather_result_32();
+ check_gather_32_masked();
+
+ vector_scatter_32_acc();
+ print_scatter32_buffer();
+ check_scatter_32_acc();
+
+ vector_scatter_32_masked();
+ print_scatter32_buffer();
+ check_scatter_32_masked();
+
+ /* 16 bit elements with 32 bit offsets */
+ create_offsets_values_preds_16_32();
+
+ vector_scatter_16_32();
+ print_scatter16_32_buffer();
+ check_scatter_16_32();
+
+ vector_gather_16_32();
+ print_gather_result_16_32();
+ check_gather_16_32();
+
+ vector_gather_16_32_masked();
+ print_gather_result_16_32();
+ check_gather_16_32_masked();
+
+ vector_scatter_16_32_acc();
+ print_scatter16_32_buffer();
+ check_scatter_16_32_acc();
+
+ vector_scatter_16_32_masked();
+ print_scatter16_32_buffer();
+ check_scatter_16_32_masked();
+
+ puts(err ? "FAIL" : "PASS");
+ return err;
+}
diff --git a/tests/tcg/hexagon/vector_add_int.c b/tests/tcg/hexagon/vector_add_int.c
new file mode 100644
index 0000000000..d6010ea14b
--- /dev/null
+++ b/tests/tcg/hexagon/vector_add_int.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+int gA[401];
+int gB[401];
+int gC[401];
+
+void vector_add_int()
+{
+ int i;
+ for (i = 0; i < 400; i++) {
+ gA[i] = gB[i] + gC[i];
+ }
+}
+
+int main()
+{
+ int error = 0;
+ int i;
+ for (i = 0; i < 400; i++) {
+ gB[i] = i * 2;
+ gC[i] = i * 3;
+ }
+ gA[400] = 17;
+ vector_add_int();
+ for (i = 0; i < 400; i++) {
+ if (gA[i] != i * 5) {
+ error++;
+ printf("ERROR: gB[%d] = %d\t", i, gB[i]);
+ printf("gC[%d] = %d\t", i, gC[i]);
+ printf("gA[%d] = %d\n", i, gA[i]);
+ }
+ }
+ if (gA[400] != 17) {
+ error++;
+ printf("ERROR: Overran the buffer\n");
+ }
+ if (!error) {
+ printf("PASS\n");
+ return 0;
+ } else {
+ printf("FAIL\n");
+ return 1;
+ }
+}