diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/tcg/hexagon/hvx_histogram.c | 88 | ||||
-rw-r--r-- | tests/tcg/hexagon/hvx_histogram_input.h | 717 | ||||
-rw-r--r-- | tests/tcg/hexagon/hvx_histogram_row.S | 294 | ||||
-rw-r--r-- | tests/tcg/hexagon/hvx_histogram_row.h | 24 | ||||
-rw-r--r-- | tests/tcg/hexagon/hvx_misc.c | 469 | ||||
-rw-r--r-- | tests/tcg/hexagon/scatter_gather.c | 1011 | ||||
-rw-r--r-- | tests/tcg/hexagon/vector_add_int.c | 61 |
7 files changed, 2664 insertions, 0 deletions
diff --git a/tests/tcg/hexagon/hvx_histogram.c b/tests/tcg/hexagon/hvx_histogram.c new file mode 100644 index 0000000000..43377a9abb --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram.c @@ -0,0 +1,88 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include "hvx_histogram_row.h" + +const int vector_len = 128; +const int width = 275; +const int height = 20; +const int stride = (width + vector_len - 1) & -vector_len; + +int err; + +static uint8_t input[height][stride] __attribute__((aligned(128))) = { +#include "hvx_histogram_input.h" +}; + +static int result[256] __attribute__((aligned(128))); +static int expect[256] __attribute__((aligned(128))); + +static void check(void) +{ + for (int i = 0; i < 256; i++) { + int res = result[i]; + int exp = expect[i]; + if (res != exp) { + printf("ERROR at %3d: 0x%04x != 0x%04x\n", + i, res, exp); + err++; + } + } +} + +static void ref_histogram(uint8_t *src, int stride, int width, int height, + int *hist) +{ + for (int i = 0; i < 256; i++) { + hist[i] = 0; + } + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + hist[src[i * stride + j]]++; + } + } +} + +static void hvx_histogram(uint8_t *src, int stride, int width, int height, + int *hist) +{ + int n = 8192 / width; + + for (int i = 0; i < 256; i++) { + hist[i] = 0; + } + + for (int i = 0; i < height; i += n) { + int k = height - i > n ? n : height - i; + hvx_histogram_row(src, stride, width, k, hist); + src += n * stride; + } +} + +int main() +{ + ref_histogram(&input[0][0], stride, width, height, expect); + hvx_histogram(&input[0][0], stride, width, height, result); + check(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/hvx_histogram_input.h b/tests/tcg/hexagon/hvx_histogram_input.h new file mode 100644 index 0000000000..2f9109255e --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_input.h @@ -0,0 +1,717 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + { 0x26, 0x32, 0x2e, 0x2e, 0x2d, 0x2c, 0x2d, 0x2d, + 0x2c, 0x2e, 0x31, 0x33, 0x36, 0x39, 0x3b, 0x3f, + 0x42, 0x46, 0x4a, 0x4c, 0x51, 0x53, 0x53, 0x54, + 0x56, 0x57, 0x58, 0x57, 0x56, 0x52, 0x51, 0x4f, + 0x4c, 0x49, 0x47, 0x42, 0x3e, 0x3b, 0x38, 0x35, + 0x33, 0x30, 0x2e, 0x2c, 0x2b, 0x2a, 0x2a, 0x28, + 0x28, 0x27, 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2e, + 0x2f, 0x33, 0x36, 0x38, 0x3c, 0x3d, 0x40, 0x42, + 0x43, 0x42, 0x43, 0x44, 0x43, 0x41, 0x40, 0x3b, + 0x3b, 0x3a, 0x38, 0x35, 0x32, 0x2f, 0x2c, 0x29, + 0x27, 0x26, 0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x19, + 0x17, 0x15, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0d, 0x0e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0d, 0x0c, 0x0f, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x12, 0x14, 0x16, 0x17, 0x19, + 0x1c, 0x1d, 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2f, + 0x31, 0x33, 0x36, 0x38, 0x39, 0x3a, 0x3b, 0x3c, + 0x3c, 0x3d, 0x3e, 0x3e, 0x3c, 0x3b, 0x3a, 0x39, + 0x39, 0x3a, 0x3a, 0x3a, 0x3a, 0x3c, 0x3e, 0x43, + 0x47, 0x4a, 0x4d, 0x51, 0x51, 0x54, 0x56, 0x56, + 0x57, 0x56, 0x53, 0x4f, 0x4b, 0x47, 0x43, 0x41, + 0x3e, 0x3c, 0x3a, 0x37, 0x36, 0x33, 0x32, 0x34, + 0x34, 0x34, 0x34, 0x35, 0x36, 0x39, 0x3d, 0x3d, + 0x3f, 0x40, 0x40, 0x40, 0x40, 0x3e, 0x40, 0x40, + 0x42, 0x44, 0x47, 0x48, 0x4b, 0x4e, 0x56, 0x5c, + 0x62, 0x68, 0x6f, 0x73, 0x76, 0x79, 0x7a, 0x7c, + 0x7e, 0x7c, 0x78, 0x72, 0x6e, 0x69, 0x65, 0x60, + 0x5b, 0x56, 0x52, 0x4d, 0x4a, 0x48, 0x47, 0x46, + 0x44, 0x43, 0x42, 0x41, 0x41, 0x41, 0x40, 0x40, + 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3b, 0x38, 0x37, + 0x36, 0x35, 0x36, 0x35, 0x36, 0x37, 0x38, 0x3c, + 0x3d, 0x3f, 0x42, 0x44, 0x46, 0x48, 0x4b, 0x4c, + 0x4e, 0x4e, 0x4d, 0x4c, 0x4a, 0x48, 0x49, 0x49, + 0x4b, 0x4d, 0x4e, }, + { 0x23, 0x2d, 0x29, 0x29, 0x28, 0x28, 0x29, 0x29, + 0x28, 0x2b, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a, + 0x3d, 0x41, 0x44, 0x47, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48, + 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30, + 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, + 0x25, 0x24, 0x24, 0x24, 0x26, 0x28, 0x28, 0x2a, + 0x2b, 0x2e, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3c, 0x3b, 0x38, + 0x37, 0x35, 0x33, 0x30, 0x2e, 0x2b, 0x27, 0x25, + 0x24, 0x21, 0x20, 0x1d, 0x1b, 0x1a, 0x18, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x10, 0x11, 0x10, 0x0e, + 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b, + 0x0b, 0x0b, 0x0c, 0x0b, 0x0b, 0x09, 0x0a, 0x0b, + 0x0b, 0x0a, 0x0a, 0x0c, 0x0c, 0x0c, 0x0d, 0x0e, + 0x0e, 0x0f, 0x0f, 0x11, 0x12, 0x15, 0x15, 0x17, + 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2a, + 0x2d, 0x30, 0x33, 0x34, 0x35, 0x35, 0x37, 0x37, + 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x36, 0x37, + 0x35, 0x36, 0x35, 0x35, 0x36, 0x37, 0x3a, 0x3e, + 0x40, 0x43, 0x48, 0x49, 0x4b, 0x4c, 0x4d, 0x4e, + 0x4f, 0x4f, 0x4c, 0x48, 0x45, 0x41, 0x3e, 0x3b, + 0x3a, 0x37, 0x36, 0x33, 0x32, 0x31, 0x30, 0x31, + 0x32, 0x31, 0x31, 0x31, 0x31, 0x34, 0x37, 0x38, + 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, 0x3b, 0x3d, 0x3e, + 0x3f, 0x40, 0x43, 0x44, 0x47, 0x4b, 0x4f, 0x56, + 0x5a, 0x60, 0x66, 0x69, 0x6a, 0x6e, 0x71, 0x72, + 0x73, 0x72, 0x6d, 0x69, 0x66, 0x60, 0x5c, 0x59, + 0x54, 0x50, 0x4d, 0x48, 0x46, 0x44, 0x44, 0x43, + 0x42, 0x41, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d, + 0x3d, 0x3d, 0x3c, 0x3a, 0x39, 0x38, 0x35, 0x35, + 0x34, 0x34, 0x35, 0x34, 0x35, 0x36, 0x39, 0x3c, + 0x3d, 0x3e, 0x41, 0x43, 0x44, 0x46, 0x48, 0x49, + 0x4a, 0x49, 0x48, 0x47, 0x45, 0x43, 0x43, 0x44, + 0x45, 0x47, 0x48, }, + { 0x23, 0x2d, 0x2a, 0x2a, 0x29, 0x29, 0x2a, 0x2a, + 0x29, 0x2c, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a, + 0x3d, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48, + 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30, + 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, + 0x25, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, + 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3e, 0x3d, 0x3e, 0x3c, 0x3c, 0x3a, + 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26, + 0x24, 0x21, 0x20, 0x1e, 0x1c, 0x1b, 0x18, 0x17, + 0x16, 0x14, 0x13, 0x12, 0x10, 0x10, 0x0f, 0x0e, + 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0b, 0x0b, 0x0c, 0x0c, 0x0c, 0x0b, 0x0b, 0x0c, + 0x0c, 0x0b, 0x0b, 0x0c, 0x0d, 0x0c, 0x0e, 0x0e, + 0x0e, 0x0f, 0x11, 0x11, 0x13, 0x14, 0x16, 0x18, + 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2b, + 0x2d, 0x31, 0x33, 0x34, 0x36, 0x37, 0x38, 0x38, + 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x37, 0x37, + 0x35, 0x36, 0x35, 0x36, 0x35, 0x38, 0x3a, 0x3e, + 0x40, 0x41, 0x45, 0x47, 0x49, 0x4a, 0x4c, 0x4d, + 0x4e, 0x4d, 0x4a, 0x47, 0x44, 0x40, 0x3d, 0x3b, + 0x39, 0x37, 0x34, 0x34, 0x32, 0x31, 0x31, 0x33, + 0x32, 0x31, 0x32, 0x33, 0x32, 0x36, 0x38, 0x39, + 0x3b, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3e, 0x3e, + 0x41, 0x42, 0x43, 0x45, 0x48, 0x4c, 0x50, 0x56, + 0x5b, 0x5f, 0x62, 0x67, 0x69, 0x6c, 0x6e, 0x6e, + 0x70, 0x6f, 0x6b, 0x67, 0x63, 0x5e, 0x5b, 0x58, + 0x54, 0x51, 0x4e, 0x4a, 0x48, 0x46, 0x46, 0x46, + 0x45, 0x46, 0x44, 0x43, 0x44, 0x43, 0x42, 0x42, + 0x41, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, 0x3a, 0x39, + 0x39, 0x39, 0x38, 0x37, 0x37, 0x3a, 0x3e, 0x40, + 0x42, 0x43, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4c, + 0x4c, 0x4b, 0x4a, 0x48, 0x46, 0x44, 0x43, 0x45, + 0x45, 0x46, 0x47, }, + { 0x21, 0x2b, 0x28, 0x28, 0x28, 0x28, 0x29, 0x29, + 0x28, 0x2a, 0x2d, 0x30, 0x32, 0x34, 0x37, 0x3a, + 0x3c, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2b, 0x2a, 0x28, 0x27, 0x26, 0x25, + 0x24, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, + 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26, + 0x25, 0x21, 0x20, 0x1e, 0x1c, 0x19, 0x19, 0x18, + 0x17, 0x15, 0x15, 0x12, 0x11, 0x11, 0x11, 0x0f, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0e, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x16, 0x18, + 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x28, 0x29, 0x2d, + 0x2f, 0x32, 0x34, 0x35, 0x36, 0x37, 0x38, 0x38, + 0x39, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36, + 0x35, 0x35, 0x37, 0x35, 0x36, 0x37, 0x3a, 0x3d, + 0x3e, 0x41, 0x43, 0x46, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x49, 0x47, 0x45, 0x42, 0x3f, 0x3d, 0x3b, + 0x3a, 0x38, 0x36, 0x34, 0x32, 0x32, 0x32, 0x32, + 0x32, 0x31, 0x33, 0x32, 0x34, 0x37, 0x38, 0x38, + 0x3a, 0x3b, 0x3d, 0x3d, 0x3d, 0x3e, 0x3f, 0x41, + 0x42, 0x44, 0x44, 0x46, 0x49, 0x4d, 0x50, 0x54, + 0x58, 0x5c, 0x61, 0x63, 0x65, 0x69, 0x6a, 0x6c, + 0x6d, 0x6c, 0x68, 0x64, 0x61, 0x5c, 0x59, 0x57, + 0x53, 0x51, 0x4f, 0x4c, 0x4a, 0x48, 0x48, 0x49, + 0x49, 0x48, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46, + 0x45, 0x44, 0x42, 0x41, 0x3f, 0x3e, 0x3c, 0x3c, + 0x3c, 0x3d, 0x3c, 0x3c, 0x3c, 0x3e, 0x41, 0x43, + 0x46, 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4e, + 0x4e, 0x4d, 0x4b, 0x49, 0x47, 0x44, 0x44, 0x45, + 0x45, 0x45, 0x46, }, + { 0x22, 0x2b, 0x27, 0x27, 0x27, 0x27, 0x28, 0x28, + 0x28, 0x2a, 0x2c, 0x2f, 0x30, 0x34, 0x37, 0x3b, + 0x3d, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x47, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2b, 0x2a, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3b, + 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, 0x3d, 0x3c, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26, + 0x25, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x19, 0x18, + 0x16, 0x15, 0x14, 0x12, 0x10, 0x11, 0x11, 0x0f, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0c, 0x0d, 0x0c, + 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x15, 0x18, + 0x19, 0x1d, 0x1f, 0x21, 0x24, 0x27, 0x2a, 0x2c, + 0x30, 0x33, 0x35, 0x36, 0x37, 0x38, 0x39, 0x39, + 0x3a, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x39, 0x3a, + 0x3d, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x46, 0x46, + 0x47, 0x46, 0x44, 0x42, 0x40, 0x3d, 0x3a, 0x39, + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3c, 0x3c, 0x3e, 0x3e, 0x3e, 0x41, 0x43, + 0x44, 0x45, 0x46, 0x48, 0x49, 0x4c, 0x51, 0x54, + 0x56, 0x5a, 0x5f, 0x61, 0x63, 0x65, 0x67, 0x69, + 0x6a, 0x69, 0x67, 0x61, 0x5f, 0x5b, 0x58, 0x56, + 0x54, 0x51, 0x50, 0x4e, 0x4c, 0x4a, 0x4b, 0x4c, + 0x4c, 0x4b, 0x4b, 0x4b, 0x4b, 0x49, 0x4a, 0x49, + 0x49, 0x48, 0x46, 0x44, 0x42, 0x41, 0x40, 0x3f, + 0x3f, 0x40, 0x40, 0x40, 0x40, 0x42, 0x46, 0x49, + 0x4b, 0x4c, 0x4f, 0x4f, 0x50, 0x52, 0x51, 0x51, + 0x50, 0x4f, 0x4c, 0x4a, 0x48, 0x46, 0x45, 0x44, + 0x44, 0x45, 0x46, }, + { 0x21, 0x2a, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, + 0x27, 0x29, 0x2d, 0x2f, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x2f, + 0x2f, 0x2d, 0x2a, 0x2a, 0x27, 0x26, 0x25, 0x24, + 0x22, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3c, + 0x3d, 0x3e, 0x3f, 0x40, 0x3f, 0x3d, 0x3d, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26, + 0x25, 0x22, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x18, + 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f, + 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0c, + 0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x13, 0x13, 0x14, 0x15, 0x17, 0x19, + 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x27, 0x2a, 0x2e, + 0x31, 0x33, 0x35, 0x38, 0x39, 0x3a, 0x3b, 0x3b, + 0x3c, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x38, 0x37, + 0x36, 0x36, 0x37, 0x36, 0x37, 0x38, 0x38, 0x3a, + 0x3b, 0x3e, 0x40, 0x40, 0x41, 0x42, 0x43, 0x42, + 0x43, 0x42, 0x40, 0x40, 0x3f, 0x3c, 0x3b, 0x39, + 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x33, + 0x32, 0x32, 0x34, 0x35, 0x35, 0x36, 0x39, 0x39, + 0x3a, 0x3c, 0x3c, 0x3f, 0x40, 0x41, 0x43, 0x45, + 0x45, 0x47, 0x48, 0x4a, 0x4b, 0x4d, 0x50, 0x53, + 0x56, 0x59, 0x5c, 0x5f, 0x60, 0x65, 0x64, 0x66, + 0x68, 0x66, 0x64, 0x61, 0x5e, 0x5a, 0x59, 0x56, + 0x54, 0x52, 0x51, 0x50, 0x4e, 0x4c, 0x4d, 0x4f, + 0x4f, 0x4f, 0x50, 0x50, 0x4f, 0x4f, 0x4e, 0x4d, + 0x4c, 0x4b, 0x49, 0x47, 0x45, 0x44, 0x43, 0x43, + 0x42, 0x43, 0x44, 0x44, 0x46, 0x47, 0x49, 0x4d, + 0x4f, 0x51, 0x53, 0x54, 0x53, 0x54, 0x54, 0x53, + 0x53, 0x51, 0x4e, 0x4b, 0x4a, 0x47, 0x45, 0x44, + 0x44, 0x45, 0x46, }, + { 0x20, 0x28, 0x26, 0x26, 0x25, 0x24, 0x27, 0x27, + 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4a, 0x49, + 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3f, 0x40, 0x3e, 0x3d, 0x3d, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2f, 0x2c, 0x29, 0x27, + 0x25, 0x21, 0x21, 0x1f, 0x1c, 0x1d, 0x19, 0x18, + 0x16, 0x15, 0x15, 0x13, 0x12, 0x11, 0x11, 0x0f, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x10, + 0x10, 0x10, 0x12, 0x13, 0x15, 0x16, 0x18, 0x1a, + 0x1c, 0x1d, 0x20, 0x22, 0x25, 0x27, 0x2a, 0x2e, + 0x30, 0x34, 0x38, 0x39, 0x3a, 0x3b, 0x3b, 0x3b, + 0x3c, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, + 0x36, 0x36, 0x38, 0x37, 0x37, 0x37, 0x38, 0x3a, + 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x40, 0x40, + 0x42, 0x40, 0x3f, 0x3e, 0x3d, 0x3b, 0x3a, 0x39, + 0x37, 0x36, 0x36, 0x35, 0x34, 0x34, 0x33, 0x33, + 0x33, 0x34, 0x35, 0x35, 0x35, 0x36, 0x38, 0x39, + 0x3a, 0x3b, 0x3d, 0x3f, 0x42, 0x43, 0x45, 0x45, + 0x46, 0x48, 0x49, 0x4b, 0x4b, 0x4d, 0x50, 0x53, + 0x56, 0x57, 0x5a, 0x5c, 0x5e, 0x61, 0x63, 0x65, + 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x59, 0x58, 0x56, + 0x55, 0x54, 0x52, 0x51, 0x50, 0x51, 0x51, 0x52, + 0x52, 0x52, 0x52, 0x52, 0x51, 0x51, 0x51, 0x50, + 0x4f, 0x4e, 0x4c, 0x4a, 0x47, 0x46, 0x45, 0x45, + 0x45, 0x46, 0x46, 0x46, 0x4a, 0x4c, 0x4d, 0x52, + 0x54, 0x56, 0x58, 0x58, 0x56, 0x57, 0x57, 0x56, + 0x55, 0x53, 0x50, 0x4d, 0x49, 0x45, 0x44, 0x44, + 0x43, 0x44, 0x45, }, + { 0x1f, 0x27, 0x24, 0x23, 0x25, 0x24, 0x25, 0x26, + 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3d, 0x41, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x49, + 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x25, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3e, 0x3f, 0x3f, 0x40, 0x3e, 0x3d, 0x3c, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x30, 0x2c, 0x29, 0x28, + 0x25, 0x23, 0x22, 0x1f, 0x1c, 0x1c, 0x18, 0x18, + 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0c, 0x0b, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0d, 0x0e, 0x0e, 0x0f, 0x0d, 0x0f, 0x10, 0x10, + 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x19, 0x1a, + 0x1c, 0x1f, 0x20, 0x23, 0x26, 0x28, 0x2a, 0x2e, + 0x31, 0x35, 0x38, 0x39, 0x3a, 0x3c, 0x3d, 0x3d, + 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, 0x3a, 0x39, 0x39, + 0x38, 0x37, 0x38, 0x38, 0x37, 0x38, 0x39, 0x3a, + 0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x3f, 0x40, 0x3f, + 0x41, 0x40, 0x3e, 0x3e, 0x3d, 0x3b, 0x3b, 0x39, + 0x37, 0x37, 0x35, 0x36, 0x34, 0x34, 0x34, 0x35, + 0x35, 0x34, 0x34, 0x35, 0x35, 0x37, 0x38, 0x39, + 0x3a, 0x3c, 0x3f, 0x3f, 0x43, 0x43, 0x45, 0x47, + 0x48, 0x48, 0x4a, 0x4b, 0x4e, 0x4d, 0x51, 0x53, + 0x56, 0x58, 0x59, 0x5b, 0x5d, 0x60, 0x62, 0x63, + 0x64, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x57, 0x56, + 0x55, 0x54, 0x53, 0x52, 0x51, 0x51, 0x52, 0x52, + 0x54, 0x54, 0x55, 0x55, 0x55, 0x54, 0x54, 0x53, + 0x52, 0x50, 0x4e, 0x4d, 0x4b, 0x4a, 0x48, 0x48, + 0x48, 0x48, 0x4a, 0x4b, 0x4d, 0x4f, 0x52, 0x55, + 0x58, 0x5a, 0x5b, 0x5b, 0x5b, 0x5b, 0x5a, 0x59, + 0x58, 0x55, 0x51, 0x4e, 0x4a, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1e, 0x26, 0x23, 0x23, 0x25, 0x24, 0x25, 0x26, + 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x48, + 0x46, 0x44, 0x3f, 0x3b, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3d, + 0x3e, 0x3f, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3b, + 0x38, 0x37, 0x34, 0x32, 0x30, 0x2c, 0x2a, 0x27, + 0x26, 0x23, 0x22, 0x20, 0x1d, 0x1b, 0x1a, 0x19, + 0x17, 0x15, 0x15, 0x13, 0x12, 0x12, 0x11, 0x0f, + 0x11, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0e, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x11, + 0x11, 0x13, 0x16, 0x15, 0x15, 0x18, 0x1a, 0x1b, + 0x1d, 0x20, 0x22, 0x24, 0x27, 0x29, 0x2c, 0x30, + 0x33, 0x37, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3e, + 0x40, 0x40, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3a, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3b, 0x3d, + 0x3d, 0x3f, 0x40, 0x40, 0x3f, 0x41, 0x41, 0x41, + 0x41, 0x41, 0x40, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, + 0x3a, 0x39, 0x37, 0x36, 0x36, 0x35, 0x35, 0x36, + 0x36, 0x35, 0x35, 0x36, 0x36, 0x38, 0x39, 0x39, + 0x3b, 0x3c, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x47, + 0x48, 0x48, 0x4b, 0x4c, 0x4d, 0x4f, 0x51, 0x53, + 0x56, 0x56, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x62, + 0x63, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x59, 0x57, + 0x56, 0x54, 0x54, 0x53, 0x52, 0x53, 0x53, 0x55, + 0x56, 0x56, 0x57, 0x57, 0x57, 0x57, 0x56, 0x56, + 0x55, 0x53, 0x51, 0x4f, 0x4d, 0x4b, 0x49, 0x4b, + 0x4b, 0x4c, 0x4d, 0x4e, 0x51, 0x53, 0x55, 0x58, + 0x5b, 0x5c, 0x60, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, + 0x5a, 0x57, 0x53, 0x4f, 0x4b, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1d, 0x25, 0x22, 0x22, 0x23, 0x23, 0x24, 0x25, + 0x25, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x47, + 0x45, 0x43, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2b, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3e, 0x3d, 0x3b, + 0x39, 0x36, 0x34, 0x32, 0x30, 0x2d, 0x2a, 0x26, + 0x26, 0x24, 0x22, 0x1f, 0x1d, 0x1c, 0x1a, 0x19, + 0x18, 0x16, 0x15, 0x14, 0x12, 0x12, 0x12, 0x10, + 0x10, 0x0f, 0x0e, 0x10, 0x0e, 0x0e, 0x0d, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e, + 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x11, 0x12, + 0x13, 0x14, 0x16, 0x16, 0x18, 0x1a, 0x1b, 0x1c, + 0x1e, 0x21, 0x23, 0x25, 0x28, 0x2a, 0x2e, 0x32, + 0x34, 0x38, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42, + 0x43, 0x43, 0x43, 0x42, 0x40, 0x3e, 0x3e, 0x3c, + 0x3b, 0x3b, 0x3c, 0x3a, 0x3b, 0x3b, 0x3e, 0x3e, + 0x40, 0x3f, 0x41, 0x41, 0x41, 0x42, 0x42, 0x43, + 0x42, 0x41, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3c, + 0x3b, 0x3a, 0x39, 0x37, 0x36, 0x35, 0x36, 0x37, + 0x35, 0x36, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, + 0x3b, 0x3d, 0x3e, 0x40, 0x41, 0x41, 0x44, 0x46, + 0x48, 0x48, 0x4a, 0x4c, 0x4d, 0x4f, 0x51, 0x53, + 0x55, 0x57, 0x59, 0x5a, 0x5b, 0x5e, 0x5f, 0x61, + 0x62, 0x61, 0x60, 0x5e, 0x5c, 0x5a, 0x59, 0x58, + 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, 0x54, 0x55, + 0x57, 0x57, 0x58, 0x59, 0x5a, 0x58, 0x59, 0x58, + 0x57, 0x55, 0x53, 0x52, 0x4f, 0x4e, 0x4d, 0x4d, + 0x4d, 0x4f, 0x51, 0x50, 0x54, 0x56, 0x59, 0x5c, + 0x5f, 0x61, 0x64, 0x64, 0x63, 0x61, 0x5e, 0x5e, + 0x5c, 0x59, 0x54, 0x50, 0x4c, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1c, 0x24, 0x21, 0x21, 0x21, 0x22, 0x23, 0x23, + 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4c, 0x50, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4a, 0x49, + 0x45, 0x42, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2b, 0x2f, 0x32, 0x34, 0x38, 0x39, 0x3c, 0x3d, + 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3e, 0x3c, 0x3a, + 0x39, 0x37, 0x35, 0x33, 0x30, 0x2d, 0x2b, 0x28, + 0x26, 0x23, 0x23, 0x20, 0x1e, 0x1b, 0x19, 0x19, + 0x17, 0x16, 0x15, 0x14, 0x12, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0e, 0x10, 0x0e, 0x0d, 0x0c, 0x0c, + 0x0c, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e, + 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x12, 0x14, + 0x14, 0x14, 0x16, 0x18, 0x19, 0x1b, 0x1c, 0x1e, + 0x20, 0x23, 0x26, 0x27, 0x29, 0x2c, 0x2f, 0x33, + 0x36, 0x38, 0x3b, 0x3e, 0x3e, 0x42, 0x43, 0x46, + 0x46, 0x46, 0x46, 0x44, 0x42, 0x41, 0x3f, 0x3e, + 0x3d, 0x3d, 0x3e, 0x3d, 0x3d, 0x3e, 0x3e, 0x40, + 0x40, 0x40, 0x43, 0x43, 0x42, 0x43, 0x45, 0x43, + 0x43, 0x43, 0x42, 0x42, 0x41, 0x40, 0x40, 0x3e, + 0x3c, 0x3a, 0x3a, 0x38, 0x36, 0x36, 0x36, 0x36, + 0x37, 0x37, 0x36, 0x38, 0x38, 0x39, 0x3b, 0x3b, + 0x3e, 0x3e, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x46, + 0x46, 0x49, 0x4c, 0x4c, 0x4d, 0x4f, 0x51, 0x54, + 0x56, 0x57, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x60, + 0x61, 0x61, 0x60, 0x5f, 0x5c, 0x5a, 0x59, 0x58, + 0x57, 0x57, 0x55, 0x54, 0x53, 0x55, 0x55, 0x58, + 0x58, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, 0x5b, 0x5b, + 0x5a, 0x59, 0x56, 0x54, 0x53, 0x4e, 0x4e, 0x50, + 0x50, 0x51, 0x52, 0x52, 0x57, 0x59, 0x5d, 0x60, + 0x63, 0x63, 0x66, 0x66, 0x66, 0x64, 0x63, 0x61, + 0x60, 0x5b, 0x55, 0x51, 0x4d, 0x48, 0x45, 0x44, + 0x43, 0x43, 0x43, }, + { 0x1b, 0x23, 0x20, 0x21, 0x22, 0x22, 0x23, 0x24, + 0x26, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3d, 0x42, 0x46, 0x49, 0x4a, 0x4c, 0x4f, 0x4f, + 0x50, 0x50, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x49, + 0x45, 0x42, 0x3e, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3f, 0x3d, 0x3b, + 0x3a, 0x38, 0x36, 0x33, 0x30, 0x2d, 0x2b, 0x29, + 0x27, 0x24, 0x24, 0x21, 0x1e, 0x1c, 0x1b, 0x1a, + 0x18, 0x17, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f, + 0x10, 0x0f, 0x0e, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x0e, 0x0f, + 0x10, 0x11, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, + 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x1d, 0x1e, 0x20, + 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x31, 0x35, + 0x37, 0x39, 0x3c, 0x3f, 0x40, 0x43, 0x46, 0x47, + 0x4a, 0x49, 0x48, 0x46, 0x45, 0x43, 0x42, 0x41, + 0x3f, 0x40, 0x3f, 0x3f, 0x40, 0x3f, 0x41, 0x43, + 0x43, 0x43, 0x44, 0x45, 0x45, 0x45, 0x45, 0x45, + 0x45, 0x45, 0x44, 0x43, 0x43, 0x42, 0x42, 0x40, + 0x3e, 0x3d, 0x3c, 0x39, 0x38, 0x38, 0x38, 0x38, + 0x38, 0x36, 0x38, 0x39, 0x39, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3e, 0x3f, 0x41, 0x42, 0x42, 0x43, 0x45, + 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x50, 0x53, 0x54, + 0x57, 0x58, 0x5a, 0x5c, 0x5b, 0x5e, 0x60, 0x61, + 0x60, 0x60, 0x5f, 0x5f, 0x5d, 0x5b, 0x5b, 0x59, + 0x58, 0x57, 0x56, 0x55, 0x55, 0x55, 0x57, 0x59, + 0x5b, 0x5b, 0x5d, 0x5c, 0x5c, 0x5e, 0x5e, 0x5e, + 0x5d, 0x5b, 0x59, 0x56, 0x54, 0x51, 0x51, 0x51, + 0x52, 0x55, 0x56, 0x56, 0x5a, 0x5d, 0x5f, 0x63, + 0x66, 0x68, 0x6b, 0x6b, 0x68, 0x67, 0x66, 0x64, + 0x61, 0x5d, 0x57, 0x52, 0x4f, 0x49, 0x46, 0x45, + 0x43, 0x43, 0x43, }, + { 0x1a, 0x22, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, + 0x26, 0x27, 0x2a, 0x2d, 0x31, 0x33, 0x37, 0x3b, + 0x3d, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x48, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2d, 0x2f, 0x32, 0x35, 0x39, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c, + 0x3a, 0x38, 0x36, 0x33, 0x31, 0x2d, 0x2c, 0x29, + 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, + 0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10, + 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, + 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x11, 0x12, 0x12, 0x13, 0x15, 0x15, 0x16, 0x16, + 0x17, 0x18, 0x1a, 0x1b, 0x1c, 0x1e, 0x1f, 0x21, + 0x22, 0x25, 0x27, 0x2a, 0x2c, 0x2e, 0x33, 0x36, + 0x39, 0x3a, 0x3d, 0x40, 0x41, 0x45, 0x47, 0x4a, + 0x4c, 0x4d, 0x4c, 0x4a, 0x48, 0x45, 0x44, 0x41, + 0x42, 0x42, 0x42, 0x42, 0x42, 0x43, 0x43, 0x44, + 0x45, 0x47, 0x47, 0x48, 0x47, 0x48, 0x47, 0x47, + 0x48, 0x48, 0x46, 0x46, 0x46, 0x43, 0x43, 0x41, + 0x3f, 0x3e, 0x3b, 0x39, 0x38, 0x37, 0x37, 0x37, + 0x38, 0x38, 0x37, 0x39, 0x39, 0x3a, 0x3c, 0x3e, + 0x3e, 0x3f, 0x3f, 0x3f, 0x42, 0x43, 0x43, 0x45, + 0x47, 0x48, 0x4b, 0x4c, 0x4e, 0x50, 0x51, 0x54, + 0x56, 0x58, 0x5a, 0x5c, 0x5c, 0x5f, 0x5f, 0x5f, + 0x61, 0x60, 0x5f, 0x5f, 0x5e, 0x5b, 0x5c, 0x5b, + 0x59, 0x59, 0x57, 0x56, 0x55, 0x56, 0x57, 0x59, + 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5e, 0x5d, + 0x5e, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x51, 0x52, + 0x53, 0x55, 0x57, 0x58, 0x5c, 0x5e, 0x61, 0x65, + 0x69, 0x6b, 0x6c, 0x6b, 0x6a, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x59, 0x53, 0x4d, 0x48, 0x46, 0x45, + 0x44, 0x44, 0x43, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24, + 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3e, 0x41, 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x30, + 0x2f, 0x2d, 0x29, 0x27, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x26, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3b, 0x3c, 0x3e, + 0x3f, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c, + 0x3a, 0x39, 0x36, 0x34, 0x31, 0x2d, 0x2c, 0x29, + 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, + 0x19, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10, + 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x11, 0x13, 0x14, 0x14, 0x15, 0x16, 0x17, 0x19, + 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x22, 0x24, + 0x25, 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x35, 0x38, + 0x3a, 0x3d, 0x41, 0x42, 0x45, 0x48, 0x4c, 0x4e, + 0x4f, 0x4f, 0x4f, 0x4d, 0x4b, 0x49, 0x47, 0x47, + 0x46, 0x45, 0x45, 0x45, 0x44, 0x44, 0x46, 0x47, + 0x48, 0x49, 0x4b, 0x4b, 0x4a, 0x4b, 0x4b, 0x4a, + 0x4b, 0x4a, 0x49, 0x49, 0x48, 0x46, 0x46, 0x44, + 0x42, 0x41, 0x3d, 0x3b, 0x3a, 0x38, 0x38, 0x38, + 0x37, 0x37, 0x39, 0x38, 0x3a, 0x3a, 0x3c, 0x3c, + 0x3e, 0x40, 0x40, 0x41, 0x43, 0x43, 0x45, 0x46, + 0x48, 0x49, 0x4b, 0x4e, 0x4f, 0x50, 0x53, 0x55, + 0x57, 0x59, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, + 0x60, 0x60, 0x5f, 0x5f, 0x5e, 0x5c, 0x5b, 0x5a, + 0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5d, + 0x5c, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x52, 0x53, + 0x54, 0x57, 0x58, 0x58, 0x5b, 0x5e, 0x62, 0x65, + 0x69, 0x6b, 0x6d, 0x6c, 0x6a, 0x69, 0x67, 0x64, + 0x62, 0x5e, 0x59, 0x54, 0x4d, 0x48, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24, + 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x42, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x51, 0x50, 0x50, 0x4c, 0x4a, 0x47, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x31, + 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x29, 0x2b, + 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x40, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d, + 0x3b, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a, + 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1d, 0x1c, 0x1b, + 0x19, 0x17, 0x17, 0x16, 0x15, 0x14, 0x12, 0x11, + 0x11, 0x11, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x10, 0x11, 0x10, 0x11, 0x11, 0x12, + 0x11, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1b, + 0x1c, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x23, 0x25, + 0x27, 0x2a, 0x2c, 0x2f, 0x31, 0x35, 0x38, 0x3b, + 0x3d, 0x40, 0x44, 0x47, 0x49, 0x4c, 0x4f, 0x51, + 0x53, 0x53, 0x53, 0x51, 0x50, 0x4e, 0x4c, 0x4b, + 0x4a, 0x49, 0x49, 0x49, 0x49, 0x4a, 0x4a, 0x4d, + 0x4e, 0x4e, 0x4f, 0x50, 0x4f, 0x50, 0x51, 0x50, + 0x50, 0x4e, 0x4d, 0x4c, 0x4b, 0x48, 0x48, 0x47, + 0x44, 0x42, 0x3f, 0x3d, 0x3b, 0x3a, 0x39, 0x39, + 0x39, 0x38, 0x39, 0x3b, 0x3a, 0x3c, 0x3e, 0x3d, + 0x40, 0x40, 0x40, 0x42, 0x42, 0x42, 0x45, 0x46, + 0x47, 0x49, 0x4c, 0x4e, 0x50, 0x50, 0x53, 0x56, + 0x58, 0x59, 0x5d, 0x5d, 0x5e, 0x60, 0x61, 0x61, + 0x62, 0x61, 0x60, 0x60, 0x5e, 0x5d, 0x5d, 0x5b, + 0x57, 0x58, 0x56, 0x55, 0x55, 0x56, 0x56, 0x59, + 0x59, 0x58, 0x5a, 0x5a, 0x5a, 0x5c, 0x5c, 0x5c, + 0x5b, 0x5b, 0x58, 0x57, 0x54, 0x53, 0x52, 0x53, + 0x54, 0x57, 0x58, 0x59, 0x5c, 0x5f, 0x63, 0x67, + 0x6b, 0x6d, 0x6e, 0x6e, 0x6b, 0x6a, 0x68, 0x64, + 0x62, 0x5e, 0x58, 0x53, 0x4f, 0x49, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x19, 0x20, 0x1e, 0x1e, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x51, 0x4f, 0x4f, 0x4d, 0x49, 0x47, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x36, 0x32, 0x31, + 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x25, 0x25, 0x26, 0x28, 0x29, 0x2b, + 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d, + 0x3c, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a, + 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1e, 0x1d, 0x1b, + 0x1a, 0x17, 0x17, 0x17, 0x14, 0x14, 0x12, 0x11, + 0x11, 0x12, 0x11, 0x11, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x13, 0x14, + 0x14, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1c, 0x1e, + 0x1e, 0x1f, 0x22, 0x23, 0x23, 0x24, 0x25, 0x27, + 0x2a, 0x2d, 0x2f, 0x31, 0x35, 0x38, 0x3a, 0x3e, + 0x41, 0x44, 0x48, 0x4b, 0x4d, 0x51, 0x53, 0x55, + 0x57, 0x57, 0x56, 0x55, 0x54, 0x52, 0x52, 0x50, + 0x4e, 0x50, 0x4e, 0x4d, 0x4d, 0x4d, 0x4f, 0x51, + 0x51, 0x52, 0x54, 0x55, 0x55, 0x55, 0x57, 0x55, + 0x54, 0x53, 0x52, 0x4e, 0x4d, 0x4b, 0x4a, 0x49, + 0x46, 0x44, 0x41, 0x3f, 0x3d, 0x3b, 0x3a, 0x3a, + 0x39, 0x39, 0x39, 0x39, 0x3a, 0x3b, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x42, 0x44, 0x44, 0x45, 0x47, + 0x49, 0x49, 0x4a, 0x4d, 0x50, 0x51, 0x53, 0x57, + 0x5a, 0x5b, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x62, + 0x63, 0x62, 0x60, 0x60, 0x5e, 0x5c, 0x5c, 0x59, + 0x58, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x54, + 0x56, 0x56, 0x57, 0x58, 0x58, 0x59, 0x5a, 0x59, + 0x58, 0x57, 0x56, 0x55, 0x54, 0x52, 0x53, 0x53, + 0x53, 0x56, 0x57, 0x59, 0x5b, 0x5e, 0x62, 0x66, + 0x6a, 0x6c, 0x6d, 0x6e, 0x6b, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x58, 0x54, 0x50, 0x4a, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x49, 0x4a, 0x4d, 0x4e, 0x4e, + 0x51, 0x52, 0x50, 0x4f, 0x4f, 0x4c, 0x49, 0x48, + 0x45, 0x42, 0x3e, 0x3b, 0x39, 0x36, 0x32, 0x32, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x25, 0x28, 0x29, 0x2b, + 0x2d, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x33, 0x32, 0x2f, 0x2d, 0x2b, + 0x28, 0x26, 0x25, 0x22, 0x20, 0x1e, 0x1d, 0x1b, + 0x1a, 0x17, 0x17, 0x16, 0x14, 0x14, 0x12, 0x11, + 0x12, 0x11, 0x11, 0x11, 0x11, 0x10, 0x10, 0x10, + 0x10, 0x11, 0x12, 0x12, 0x12, 0x13, 0x14, 0x14, + 0x16, 0x18, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, + 0x21, 0x22, 0x23, 0x25, 0x26, 0x26, 0x28, 0x2a, + 0x2c, 0x2e, 0x32, 0x34, 0x39, 0x39, 0x3d, 0x41, + 0x45, 0x47, 0x4c, 0x4e, 0x51, 0x54, 0x56, 0x58, + 0x5b, 0x5c, 0x5a, 0x59, 0x58, 0x56, 0x55, 0x53, + 0x53, 0x52, 0x52, 0x51, 0x52, 0x52, 0x53, 0x55, + 0x57, 0x58, 0x5a, 0x5a, 0x59, 0x5b, 0x59, 0x59, + 0x58, 0x57, 0x55, 0x53, 0x51, 0x4e, 0x4c, 0x4a, + 0x48, 0x46, 0x43, 0x40, 0x3e, 0x3c, 0x3b, 0x3b, + 0x38, 0x39, 0x38, 0x39, 0x3a, 0x3d, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x43, 0x44, 0x45, 0x46, 0x48, + 0x4a, 0x4b, 0x4d, 0x4e, 0x50, 0x52, 0x54, 0x56, + 0x59, 0x5c, 0x5e, 0x5f, 0x60, 0x62, 0x62, 0x63, + 0x63, 0x63, 0x61, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, + 0x59, 0x56, 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, + 0x55, 0x54, 0x55, 0x55, 0x55, 0x57, 0x58, 0x57, + 0x57, 0x56, 0x55, 0x54, 0x54, 0x52, 0x52, 0x53, + 0x54, 0x55, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x65, + 0x69, 0x6b, 0x6d, 0x6e, 0x6a, 0x69, 0x67, 0x63, + 0x61, 0x5d, 0x58, 0x54, 0x4f, 0x4b, 0x48, 0x47, + 0x46, 0x45, 0x45, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4e, 0x4f, + 0x51, 0x52, 0x50, 0x50, 0x4f, 0x4c, 0x4a, 0x48, + 0x45, 0x42, 0x3f, 0x3b, 0x39, 0x36, 0x32, 0x31, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b, + 0x2d, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x42, 0x43, 0x42, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x32, 0x2f, 0x2d, 0x2c, + 0x2a, 0x27, 0x26, 0x23, 0x20, 0x1e, 0x1d, 0x1c, + 0x1a, 0x18, 0x18, 0x17, 0x15, 0x16, 0x14, 0x12, + 0x12, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11, 0x12, + 0x12, 0x12, 0x13, 0x14, 0x14, 0x14, 0x15, 0x16, + 0x17, 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x20, 0x22, + 0x24, 0x25, 0x26, 0x27, 0x28, 0x2a, 0x2c, 0x2c, + 0x2f, 0x32, 0x35, 0x37, 0x3b, 0x3c, 0x41, 0x45, + 0x48, 0x4c, 0x50, 0x52, 0x54, 0x57, 0x5a, 0x5c, + 0x5f, 0x5f, 0x5f, 0x5d, 0x5c, 0x5b, 0x5a, 0x58, + 0x57, 0x57, 0x57, 0x56, 0x56, 0x57, 0x57, 0x5a, + 0x5c, 0x5e, 0x5f, 0x61, 0x5f, 0x5f, 0x5f, 0x5e, + 0x5d, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x4f, 0x4e, + 0x4a, 0x47, 0x46, 0x42, 0x41, 0x3e, 0x3d, 0x3c, + 0x3b, 0x3a, 0x39, 0x39, 0x3b, 0x3c, 0x3d, 0x3f, + 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x49, 0x49, + 0x4b, 0x4c, 0x4e, 0x4f, 0x51, 0x54, 0x57, 0x58, + 0x5b, 0x5d, 0x61, 0x61, 0x61, 0x63, 0x65, 0x65, + 0x64, 0x64, 0x62, 0x61, 0x60, 0x5e, 0x5d, 0x5c, + 0x59, 0x58, 0x56, 0x54, 0x53, 0x53, 0x53, 0x54, + 0x54, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, 0x55, + 0x56, 0x55, 0x54, 0x53, 0x53, 0x52, 0x52, 0x53, + 0x55, 0x56, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x66, + 0x69, 0x6b, 0x6d, 0x6d, 0x6b, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x58, 0x55, 0x50, 0x4b, 0x48, 0x47, + 0x46, 0x46, 0x46, }, + { 0x1a, 0x20, 0x1e, 0x1f, 0x1f, 0x21, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f, + 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x45, 0x42, 0x3f, 0x3b, 0x38, 0x36, 0x32, 0x31, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b, + 0x2e, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b, + 0x29, 0x26, 0x24, 0x24, 0x20, 0x1f, 0x1d, 0x1d, + 0x1a, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14, + 0x13, 0x12, 0x13, 0x13, 0x13, 0x12, 0x12, 0x13, + 0x13, 0x14, 0x15, 0x15, 0x14, 0x15, 0x16, 0x18, + 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x24, + 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2c, 0x2d, 0x2f, + 0x32, 0x35, 0x37, 0x3a, 0x3c, 0x3e, 0x44, 0x48, + 0x4c, 0x50, 0x54, 0x56, 0x58, 0x5b, 0x5e, 0x60, + 0x61, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5e, + 0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x5b, 0x5c, 0x5e, + 0x60, 0x63, 0x64, 0x65, 0x63, 0x62, 0x63, 0x63, + 0x61, 0x60, 0x5e, 0x5b, 0x58, 0x55, 0x51, 0x4f, + 0x4c, 0x4a, 0x47, 0x44, 0x42, 0x41, 0x3e, 0x3c, + 0x3b, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3e, 0x3f, + 0x40, 0x42, 0x43, 0x45, 0x46, 0x47, 0x49, 0x4a, + 0x4c, 0x4c, 0x4f, 0x51, 0x52, 0x55, 0x58, 0x5b, + 0x5c, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x64, 0x65, + 0x66, 0x65, 0x63, 0x62, 0x5f, 0x5e, 0x5e, 0x5c, + 0x5b, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x53, + 0x52, 0x52, 0x52, 0x52, 0x52, 0x53, 0x55, 0x55, + 0x55, 0x53, 0x53, 0x53, 0x52, 0x51, 0x52, 0x52, + 0x55, 0x55, 0x58, 0x58, 0x5b, 0x5d, 0x61, 0x65, + 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64, + 0x61, 0x5e, 0x58, 0x54, 0x4f, 0x4b, 0x49, 0x48, + 0x47, 0x46, 0x45, }, + { 0x19, 0x20, 0x1d, 0x1f, 0x1f, 0x20, 0x23, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f, + 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x44, 0x42, 0x3f, 0x3a, 0x38, 0x36, 0x32, 0x30, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2b, + 0x2e, 0x30, 0x34, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b, + 0x29, 0x27, 0x25, 0x24, 0x21, 0x1f, 0x1e, 0x1c, + 0x1b, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14, + 0x13, 0x12, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x14, 0x15, 0x14, 0x14, 0x14, 0x17, 0x19, + 0x1a, 0x1c, 0x1e, 0x20, 0x21, 0x23, 0x24, 0x26, + 0x29, 0x29, 0x2b, 0x2c, 0x2d, 0x2e, 0x30, 0x31, + 0x34, 0x38, 0x3b, 0x3c, 0x3f, 0x42, 0x47, 0x4c, + 0x50, 0x54, 0x57, 0x5b, 0x5c, 0x5e, 0x62, 0x63, + 0x66, 0x66, 0x66, 0x65, 0x64, 0x63, 0x61, 0x62, + 0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5f, 0x60, 0x62, + 0x65, 0x67, 0x69, 0x6a, 0x69, 0x68, 0x69, 0x67, + 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x58, 0x54, 0x51, + 0x4e, 0x4b, 0x49, 0x45, 0x43, 0x41, 0x40, 0x3e, + 0x3c, 0x3a, 0x3b, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x41, 0x42, 0x44, 0x46, 0x46, 0x48, 0x49, 0x4b, + 0x4d, 0x50, 0x51, 0x53, 0x55, 0x57, 0x58, 0x5c, + 0x5f, 0x60, 0x63, 0x64, 0x64, 0x65, 0x66, 0x66, + 0x66, 0x65, 0x65, 0x63, 0x61, 0x5f, 0x5e, 0x5c, + 0x5a, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x52, + 0x53, 0x52, 0x52, 0x52, 0x52, 0x53, 0x53, 0x53, + 0x54, 0x53, 0x53, 0x52, 0x53, 0x51, 0x53, 0x53, + 0x55, 0x57, 0x58, 0x59, 0x5b, 0x5d, 0x62, 0x64, + 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64, + 0x61, 0x5d, 0x57, 0x54, 0x50, 0x4a, 0x48, 0x47, + 0x46, 0x45, 0x45, }, diff --git a/tests/tcg/hexagon/hvx_histogram_row.S b/tests/tcg/hexagon/hvx_histogram_row.S new file mode 100644 index 0000000000..5e42c33145 --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_row.S @@ -0,0 +1,294 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +/* + * void hvx_histogram_row(uint8_t *src, => r0 + * int stride, => r1 + * int width, => r2 + * int height, => r3 + * int *hist => r4) + */ + .text + .p2align 2 + .global hvx_histogram_row + .type hvx_histogram_row, @function +hvx_histogram_row: + { r2 = lsr(r2, #7) /* size / VLEN */ + r5 = and(r2, #127) /* size % VLEN */ + v1 = #0 + v0 = #0 + } + /* + * Step 1: Clean the whole vector register file + */ + { v3:2 = v1:0 + v5:4 = v1:0 + p0 = cmp.gt(r2, #0) /* P0 = (width / VLEN > 0) */ + p1 = cmp.eq(r5, #0) /* P1 = (width % VLEN == 0) */ + } + { q0 = vsetq(r5) + v7:6 = v1:0 + } + { v9:8 = v1:0 + v11:10 = v1:0 + } + { v13:12 = v1:0 + v15:14 = v1:0 + } + { v17:16 = v1:0 + v19:18 = v1:0 + } + { v21:20 = v1:0 + v23:22 = v1:0 + } + { v25:24 = v1:0 + v27:26 = v1:0 + } + { v29:28 = v1:0 + v31:30 = v1:0 + r10 = add(r0, r1) /* R10 = &src[2 * stride] */ + loop1(.outerloop, r3) + } + + /* + * Step 2: vhist + */ + .falign +.outerloop: + { if (!p0) jump .loopend + loop0(.innerloop, r2) + } + + .falign +.innerloop: + { v12.tmp = vmem(R0++#1) + vhist + }:endloop0 + + .falign +.loopend: + if (p1) jump .skip /* if (width % VLEN == 0) done with current row */ + { v13.tmp = vmem(r0 + #0) + vhist(q0) + } + + .falign +.skip: + { r0 = r10 /* R0 = &src[(i + 1) * stride] */ + r10 = add(r10, r1) /* R10 = &src[(i + 2) * stride] */ + }:endloop1 + + + /* + * Step 3: Sum up the data + */ + { v0.h = vshuff(v0.h) + r10 = ##0x00010001 + } + v1.h = vshuff(v1.h) + { V2.h = vshuff(v2.h) + v0.w = vdmpy(v0.h, r10.h):sat + } + { v3.h = vshuff(v3.h) + v1.w = vdmpy(v1.h, r10.h):sat + } + { v4.h = vshuff(V4.h) + v2.w = vdmpy(v2.h, r10.h):sat + } + { v5.h = vshuff(v5.h) + v3.w = vdmpy(v3.h, r10.h):sat + } + { v6.h = vshuff(v6.h) + v4.w = vdmpy(v4.h, r10.h):sat + } + { v7.h = vshuff(v7.h) + v5.w = vdmpy(v5.h, r10.h):sat + } + { v8.h = vshuff(V8.h) + v6.w = vdmpy(v6.h, r10.h):sat + } + { v9.h = vshuff(V9.h) + v7.w = vdmpy(v7.h, r10.h):sat + } + { v10.h = vshuff(v10.h) + v8.w = vdmpy(v8.h, r10.h):sat + } + { v11.h = vshuff(v11.h) + v9.w = vdmpy(v9.h, r10.h):sat + } + { v12.h = vshuff(v12.h) + v10.w = vdmpy(v10.h, r10.h):sat + } + { v13.h = vshuff(V13.h) + v11.w = vdmpy(v11.h, r10.h):sat + } + { v14.h = vshuff(v14.h) + v12.w = vdmpy(v12.h, r10.h):sat + } + { v15.h = vshuff(v15.h) + v13.w = vdmpy(v13.h, r10.h):sat + } + { v16.h = vshuff(v16.h) + v14.w = vdmpy(v14.h, r10.h):sat + } + { v17.h = vshuff(v17.h) + v15.w = vdmpy(v15.h, r10.h):sat + } + { v18.h = vshuff(v18.h) + v16.w = vdmpy(v16.h, r10.h):sat + } + { v19.h = vshuff(v19.h) + v17.w = vdmpy(v17.h, r10.h):sat + } + { v20.h = vshuff(v20.h) + v18.W = vdmpy(v18.h, r10.h):sat + } + { v21.h = vshuff(v21.h) + v19.w = vdmpy(v19.h, r10.h):sat + } + { v22.h = vshuff(v22.h) + v20.w = vdmpy(v20.h, r10.h):sat + } + { v23.h = vshuff(v23.h) + v21.w = vdmpy(v21.h, r10.h):sat + } + { v24.h = vshuff(v24.h) + v22.w = vdmpy(v22.h, r10.h):sat + } + { v25.h = vshuff(v25.h) + v23.w = vdmpy(v23.h, r10.h):sat + } + { v26.h = vshuff(v26.h) + v24.w = vdmpy(v24.h, r10.h):sat + } + { v27.h = vshuff(V27.h) + v25.w = vdmpy(v25.h, r10.h):sat + } + { v28.h = vshuff(v28.h) + v26.w = vdmpy(v26.h, r10.h):sat + } + { v29.h = vshuff(v29.h) + v27.w = vdmpy(v27.h, r10.h):sat + } + { v30.h = vshuff(v30.h) + v28.w = vdmpy(v28.h, r10.h):sat + } + { v31.h = vshuff(v31.h) + v29.w = vdmpy(v29.h, r10.h):sat + r28 = #32 + } + { vshuff(v1, v0, r28) + v30.w = vdmpy(v30.h, r10.h):sat + } + { vshuff(v3, v2, r28) + v31.w = vdmpy(v31.h, r10.h):sat + } + { vshuff(v5, v4, r28) + v0.w = vadd(v1.w, v0.w) + v2.w = vadd(v3.w, v2.w) + } + { vshuff(v7, v6, r28) + r7 = #64 + } + { vshuff(v9, v8, r28) + v4.w = vadd(v5.w, v4.w) + v6.w = vadd(v7.w, v6.w) + } + vshuff(v11, v10, r28) + { vshuff(v13, v12, r28) + v8.w = vadd(v9.w, v8.w) + v10.w = vadd(v11.w, v10.w) + } + vshuff(v15, v14, r28) + { vshuff(v17, v16, r28) + v12.w = vadd(v13.w, v12.w) + v14.w = vadd(v15.w, v14.w) + } + vshuff(v19, v18, r28) + { vshuff(v21, v20, r28) + v16.w = vadd(v17.w, v16.w) + v18.w = vadd(v19.w, v18.w) + } + vshuff(v23, v22, r28) + { vshuff(v25, v24, r28) + v20.w = vadd(v21.w, v20.w) + v22.w = vadd(v23.w, v22.w) + } + vshuff(v27, v26, r28) + { vshuff(v29, v28, r28) + v24.w = vadd(v25.w, v24.w) + v26.w = vadd(v27.w, v26.w) + } + vshuff(v31, v30, r28) + { v28.w = vadd(v29.w, v28.w) + vshuff(v2, v0, r7) + } + { v30.w = vadd(v31.w, v30.w) + vshuff(v6, v4, r7) + v0.w = vadd(v0.w, v2.w) + } + { vshuff(v10, v8, r7) + v1.tmp = vmem(r4 + #0) /* update hist[0-31] */ + v0.w = vadd(v0.w, v1.w) + vmem(r4++#1) = v0.new + } + { vshuff(v14, v12, r7) + v4.w = vadd(v4.w, v6.w) + v8.w = vadd(v8.w, v10.w) + } + { vshuff(v18, v16, r7) + v1.tmp = vmem(r4 + #0) /* update hist[32-63] */ + v4.w = vadd(v4.w, v1.w) + vmem(r4++#1) = v4.new + } + { vshuff(v22, v20, r7) + v12.w = vadd(v12.w, v14.w) + V16.w = vadd(v16.w, v18.w) + } + { vshuff(v26, v24, r7) + v1.tmp = vmem(r4 + #0) /* update hist[64-95] */ + v8.w = vadd(v8.w, v1.w) + vmem(r4++#1) = v8.new + } + { vshuff(v30, v28, r7) + v1.tmp = vmem(r4 + #0) /* update hist[96-127] */ + v12.w = vadd(v12.w, v1.w) + vmem(r4++#1) = v12.new + } + + { v20.w = vadd(v20.w, v22.w) + v1.tmp = vmem(r4 + #0) /* update hist[128-159] */ + v16.w = vadd(v16.w, v1.w) + vmem(r4++#1) = v16.new + } + { v24.w = vadd(v24.w, v26.w) + v1.tmp = vmem(r4 + #0) /* update hist[160-191] */ + v20.w = vadd(v20.w, v1.w) + vmem(r4++#1) = v20.new + } + { v28.w = vadd(v28.w, v30.w) + v1.tmp = vmem(r4 + #0) /* update hist[192-223] */ + v24.w = vadd(v24.w, v1.w) + vmem(r4++#1) = v24.new + } + { v1.tmp = vmem(r4 + #0) /* update hist[224-255] */ + v28.w = vadd(v28.w, v1.w) + vmem(r4++#1) = v28.new + } + jumpr r31 + .size hvx_histogram_row, .-hvx_histogram_row diff --git a/tests/tcg/hexagon/hvx_histogram_row.h b/tests/tcg/hexagon/hvx_histogram_row.h new file mode 100644 index 0000000000..6a4531a92d --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_row.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HVX_HISTOGRAM_ROW_H +#define HVX_HISTOGRAM_ROW_H + +void hvx_histogram_row(uint8_t *src, int stride, int width, int height, + int *hist); + +#endif diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c new file mode 100644 index 0000000000..312bb98b41 --- /dev/null +++ b/tests/tcg/hexagon/hvx_misc.c @@ -0,0 +1,469 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +int err; + +static void __check(int line, int i, int j, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n", + line, i, j, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +#define MAX_VEC_SIZE_BYTES 128 + +typedef union { + uint64_t ud[MAX_VEC_SIZE_BYTES / 8]; + int64_t d[MAX_VEC_SIZE_BYTES / 8]; + uint32_t uw[MAX_VEC_SIZE_BYTES / 4]; + int32_t w[MAX_VEC_SIZE_BYTES / 4]; + uint16_t uh[MAX_VEC_SIZE_BYTES / 2]; + int16_t h[MAX_VEC_SIZE_BYTES / 2]; + uint8_t ub[MAX_VEC_SIZE_BYTES / 1]; + int8_t b[MAX_VEC_SIZE_BYTES / 1]; +} MMVector; + +#define BUFSIZE 16 +#define OUTSIZE 16 +#define MASKMOD 3 + +MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); + +#define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \ +static void check_output_##FIELD(int line, size_t num_vectors) \ +{ \ + for (int i = 0; i < num_vectors; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + __check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \ + } \ + } \ +} + +CHECK_OUTPUT_FUNC(d, 8) +CHECK_OUTPUT_FUNC(w, 4) +CHECK_OUTPUT_FUNC(h, 2) +CHECK_OUTPUT_FUNC(b, 1) + +static void init_buffers(void) +{ + int counter0 = 0; + int counter1 = 17; + for (int i = 0; i < BUFSIZE; i++) { + for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { + buffer0[i].b[j] = counter0++; + buffer1[i].b[j] = counter1++; + } + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1; + } + } +} + +static void test_load_tmp(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Load into v12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "{\n\t" + " v12.tmp = vmem(%1 + #0)\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "vmem(%2 + #0) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "r1", "v12", "v3", "v4", "v6", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_load_cur(void) +{ + void *p0 = buffer0; + void *pout = output; + + for (int i = 0; i < BUFSIZE; i++) { + asm("{\n\t" + " v2.cur = vmem(%0 + #0)\n\t" + " vmem(%1 + #0) = v2\n\t" + "}\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + p0 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].uw[j] = buffer0[i].uw[j]; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_load_aligned(void) +{ + /* Aligned loads ignore the low bits of the address */ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 13; + + p0 += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_load_unaligned(void) +{ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 12; + + p0 += offset; /* Create an unaligned address */ + asm("v2 = vmemu(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector)); + + check_output_w(__LINE__, 1); +} + +static void test_store_aligned(void) +{ + /* Aligned stores ignore the low bits of the address */ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 13; + + pout += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_store_unaligned(void) +{ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 12; + + pout += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmemu(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + memcpy(expect, buffer0, 2 * sizeof(MMVector)); + memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector)); + + check_output_w(__LINE__, 2); +} + +static void test_masked_store(bool invert) +{ + void *p0 = buffer0; + void *pmask = mask; + void *pout = output; + + memset(expect, 0xff, sizeof(expect)); + memset(output, 0xff, sizeof(expect)); + + for (int i = 0; i < BUFSIZE; i++) { + if (invert) { + asm("r4 = #0\n\t" + "v4 = vsplat(r4)\n\t" + "v5 = vmem(%0 + #0)\n\t" + "q0 = vcmp.eq(v4.w, v5.w)\n\t" + "v5 = vmem(%1)\n\t" + "if (!q0) vmem(%2) = v5\n\t" /* Inverted test */ + : : "r"(pmask), "r"(p0), "r"(pout) + : "r4", "v4", "v5", "q0", "memory"); + } else { + asm("r4 = #0\n\t" + "v4 = vsplat(r4)\n\t" + "v5 = vmem(%0 + #0)\n\t" + "q0 = vcmp.eq(v4.w, v5.w)\n\t" + "v5 = vmem(%1)\n\t" + "if (q0) vmem(%2) = v5\n\t" /* Non-inverted test */ + : : "r"(pmask), "r"(p0), "r"(pout) + : "r4", "v4", "v5", "q0", "memory"); + } + p0 += sizeof(MMVector); + pmask += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + if (invert) { + if (i + j % MASKMOD != 0) { + expect[i].w[j] = buffer0[i].w[j]; + } + } else { + if (i + j % MASKMOD == 0) { + expect[i].w[j] = buffer0[i].w[j]; + } + } + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_new_value_store(void) +{ + void *p0 = buffer0; + void *pout = output; + + asm("{\n\t" + " v2 = vmem(%0 + #0)\n\t" + " vmem(%1 + #0) = v2.new\n\t" + "}\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_max_temps() +{ + void *p0 = buffer0; + void *pout = output; + + asm("v0 = vmem(%0 + #0)\n\t" + "v1 = vmem(%0 + #1)\n\t" + "v2 = vmem(%0 + #2)\n\t" + "v3 = vmem(%0 + #3)\n\t" + "v4 = vmem(%0 + #4)\n\t" + "{\n\t" + " v1:0.w = vadd(v3:2.w, v1:0.w)\n\t" + " v2.b = vshuffe(v3.b, v2.b)\n\t" + " v3.w = vadd(v1.w, v4.w)\n\t" + " v4.tmp = vmem(%0 + #5)\n\t" + "}\n\t" + "vmem(%1 + #0) = v0\n\t" + "vmem(%1 + #1) = v1\n\t" + "vmem(%1 + #2) = v2\n\t" + "vmem(%1 + #3) = v3\n\t" + "vmem(%1 + #4) = v4\n\t" + : : "r"(p0), "r"(pout) : "memory"); + + /* The first two vectors come from the vadd-pair instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i]; + expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i]; + } + /* The third vector comes from the vshuffe instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) { + expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) | + (buffer0[3].uh[i] & 0xff) << 8; + } + /* The fourth vector comes from the vadd-single instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i]; + } + /* + * The fifth vector comes from the load to v4 + * make sure the .tmp is dropped + */ + expect[4] = buffer0[4]; + + check_output_b(__LINE__, 5); +} + +#define VEC_OP1(ASM, EL, IN, OUT) \ + asm("v2 = vmem(%0 + #0)\n\t" \ + "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \ + "vmem(%1 + #0) = v2\n\t" \ + : : "r"(IN), "r"(OUT) : "v2", "memory") + +#define VEC_OP2(ASM, EL, IN0, IN1, OUT) \ + asm("v2 = vmem(%0 + #0)\n\t" \ + "v3 = vmem(%1 + #0)\n\t" \ + "v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \ + "vmem(%2 + #0) = v2\n\t" \ + : : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory") + +#define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \ +static void test_##NAME(void) \ +{ \ + void *pin = buffer0; \ + void *pout = output; \ + for (int i = 0; i < BUFSIZE; i++) { \ + VEC_OP1(ASM, EL, pin, pout); \ + pin += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \ + } \ + } \ + check_output_##FIELD(__LINE__, BUFSIZE); \ +} + +#define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \ +static void test_##NAME(void) \ +{ \ + void *p0 = buffer0; \ + void *p1 = buffer1; \ + void *pout = output; \ + for (int i = 0; i < BUFSIZE; i++) { \ + VEC_OP2(ASM, EL, p0, p1, pout); \ + p0 += sizeof(MMVector); \ + p1 += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \ + } \ + } \ + check_output_##FIELD(__LINE__, BUFSIZE); \ +} + +#define THRESHOLD 31 + +#define PRED_OP2(ASM, IN0, IN1, OUT, INV) \ + asm("r4 = #%3\n\t" \ + "v1.b = vsplat(r4)\n\t" \ + "v2 = vmem(%0 + #0)\n\t" \ + "q0 = vcmp.gt(v2.b, v1.b)\n\t" \ + "v3 = vmem(%1 + #0)\n\t" \ + "q1 = vcmp.gt(v3.b, v1.b)\n\t" \ + "q2 = " #ASM "(q0, " INV "q1)\n\t" \ + "r4 = #0xff\n\t" \ + "v1.b = vsplat(r4)\n\t" \ + "if (q2) vmem(%2 + #0) = v1\n\t" \ + : : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \ + : "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory") + +#define TEST_PRED_OP2(NAME, ASM, OP, INV) \ +static void test_##NAME(bool invert) \ +{ \ + void *p0 = buffer0; \ + void *p1 = buffer1; \ + void *pout = output; \ + memset(output, 0, sizeof(expect)); \ + for (int i = 0; i < BUFSIZE; i++) { \ + PRED_OP2(ASM, p0, p1, pout, INV); \ + p0 += sizeof(MMVector); \ + p1 += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \ + bool p0 = (buffer0[i].b[j] > THRESHOLD); \ + bool p1 = (buffer1[i].b[j] > THRESHOLD); \ + if (invert) { \ + expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \ + } else { \ + expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \ + } \ + } \ + } \ + check_output_b(__LINE__, BUFSIZE); \ +} + +TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +) +TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +) +TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +) +TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -) +TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -) +TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -) +TEST_VEC_OP2(vxor, vxor, , d, 8, ^) +TEST_VEC_OP2(vand, vand, , d, 8, &) +TEST_VEC_OP2(vor, vor, , d, 8, |) +TEST_VEC_OP1(vnot, vnot, , d, 8, ~) + +TEST_PRED_OP2(pred_or, or, |, "") +TEST_PRED_OP2(pred_or_n, or, |, "!") +TEST_PRED_OP2(pred_and, and, &, "") +TEST_PRED_OP2(pred_and_n, and, &, "!") +TEST_PRED_OP2(pred_xor, xor, ^, "") + +int main() +{ + init_buffers(); + + test_load_tmp(); + test_load_cur(); + test_load_aligned(); + test_load_unaligned(); + test_store_aligned(); + test_store_unaligned(); + test_masked_store(false); + test_masked_store(true); + test_new_value_store(); + test_max_temps(); + + test_vadd_w(); + test_vadd_h(); + test_vadd_b(); + test_vsub_w(); + test_vsub_h(); + test_vsub_b(); + test_vxor(); + test_vand(); + test_vor(); + test_vnot(); + + test_pred_or(false); + test_pred_or_n(true); + test_pred_and(false); + test_pred_and_n(true); + test_pred_xor(false); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c new file mode 100644 index 0000000000..b93eb18133 --- /dev/null +++ b/tests/tcg/hexagon/scatter_gather.c @@ -0,0 +1,1011 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This example tests the HVX scatter/gather instructions + * + * See section 5.13 of the V68 HVX Programmer's Reference + * + * There are 3 main classes operations + * _16 16-bit elements and 16-bit offsets + * _32 32-bit elements and 32-bit offsets + * _16_32 16-bit elements and 32-bit offsets + * + * There are also masked and accumulate versions + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> + +typedef long HVX_Vector __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); +typedef long HVX_VectorPair __attribute__((__vector_size__(256))) + __attribute__((aligned(128))); +typedef long HVX_VectorPred __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + +#define VSCATTER_16(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_32(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS) + +#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) +#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) +#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) + +#define VSHUFF_H(V) \ + __builtin_HEXAGON_V6_vshuffh_128B(V) +#define VSPLAT_H(X) \ + __builtin_HEXAGON_V6_lvsplath_128B(X) +#define VAND_VAL(PRED, VAL) \ + __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL) +#define VDEAL_H(V) \ + __builtin_HEXAGON_V6_vdealh_128B(V) + +int err; + +/* define the number of rows/cols in a square matrix */ +#define MATRIX_SIZE 64 + +/* define the size of the scatter buffer */ +#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE) + +/* fake vtcm - put buffers together and force alignment */ +static struct { + unsigned short vscatter16[SCATTER_BUFFER_SIZE]; + unsigned short vgather16[MATRIX_SIZE]; + unsigned int vscatter32[SCATTER_BUFFER_SIZE]; + unsigned int vgather32[MATRIX_SIZE]; + unsigned short vscatter16_32[SCATTER_BUFFER_SIZE]; + unsigned short vgather16_32[MATRIX_SIZE]; +} vtcm __attribute__((aligned(0x10000))); + +/* declare the arrays of reference values */ +unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_ref[MATRIX_SIZE]; +unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE]; +unsigned int vgather32_ref[MATRIX_SIZE]; +unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_32_ref[MATRIX_SIZE]; + +/* declare the arrays of offsets */ +unsigned short half_offsets[MATRIX_SIZE]; +unsigned int word_offsets[MATRIX_SIZE]; + +/* declare the arrays of values */ +unsigned short half_values[MATRIX_SIZE]; +unsigned short half_values_acc[MATRIX_SIZE]; +unsigned short half_values_masked[MATRIX_SIZE]; +unsigned int word_values[MATRIX_SIZE]; +unsigned int word_values_acc[MATRIX_SIZE]; +unsigned int word_values_masked[MATRIX_SIZE]; + +/* declare the arrays of predicates */ +unsigned short half_predicates[MATRIX_SIZE]; +unsigned int word_predicates[MATRIX_SIZE]; + +/* make this big enough for all the intrinsics */ +const size_t region_len = sizeof(vtcm); + +/* optionally add sync instructions */ +#define SYNC_VECTOR 1 + +static void sync_scatter(void *addr) +{ +#if SYNC_VECTOR + /* + * Do the scatter release followed by a dummy load to complete the + * synchronization. Normally the dummy load would be deferred as + * long as possible to minimize stalls. + */ + asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(addr)); + /* use volatile to force the load */ + volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy; +#endif +} + +static void sync_gather(void *addr) +{ +#if SYNC_VECTOR + /* use volatile to force the load */ + volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy; +#endif +} + +/* optionally print the results */ +#define PRINT_DATA 0 + +#define FILL_CHAR '.' + +/* fill vtcm scratch with ee */ +void prefill_vtcm_scratch(void) +{ + memset(&vtcm, FILL_CHAR, sizeof(vtcm)); +} + +/* create byte offsets to be a diagonal of the matrix with 16 bit elements */ +void create_offsets_values_preds_16(void) +{ + unsigned short half_element = 0; + unsigned short half_element_masked = 0; + char letter = 'A'; + char letter_masked = '@'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + half_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_element_masked = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_element_masked |= letter_masked << j * 8; + } + + half_values[i] = half_element; + half_values_acc[i] = ((i % 10) << 8) + (i % 10); + half_values_masked[i] = half_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + + half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0; + } +} + +/* create byte offsets to be a diagonal of the matrix with 32 bit elements */ +void create_offsets_values_preds_32(void) +{ + unsigned int word_element = 0; + unsigned int word_element_masked = 0; + char letter = 'A'; + char letter_masked = '&'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (4 * MATRIX_SIZE + 4); + + word_element = 0; + word_element_masked = 0; + for (int j = 0; j < 4; j++) { + word_element |= letter << j * 8; + word_element_masked |= letter_masked << j * 8; + } + + word_values[i] = word_element; + word_values_acc[i] = ((i % 10) << 8) + (i % 10); + word_values_masked[i] = word_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + + word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0; + } +} + +/* + * create byte offsets to be a diagonal of the matrix with 16 bit elements + * and 32 bit offsets + */ +void create_offsets_values_preds_16_32(void) +{ + unsigned short half_element = 0; + unsigned short half_element_masked = 0; + char letter = 'D'; + char letter_masked = '$'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_element_masked = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_element_masked |= letter_masked << j * 8; + } + + half_values[i] = half_element; + half_values_acc[i] = ((i % 10) << 8) + (i % 10); + half_values_masked[i] = half_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'P') { + letter = 'D'; + } + + half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0; + } +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values; + + VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter-accumulate the 16 bit elements using intrinsics */ +void vector_scatter_16_acc(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values_acc; + + VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16_masked(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values_masked; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2]; + + VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo); + VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi); + + sync_scatter(vtcm.vscatter32); +} + +/* scatter-acc the 32 bit elements using intrinsics */ +void vector_scatter_32_acc(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values_acc; + HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2]; + + VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo); + VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi); + + sync_scatter(vtcm.vscatter32); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32_masked(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values_masked; + HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); + HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); + + VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo, + valueslo); + VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi, + valueshi); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values; + values = VSHUFF_H(values); + + VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32_acc(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values_acc; + values = VSHUFF_H(values); + + VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32_masked(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + HVX_Vector pred_reg; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values_masked; + values = VSHUFF_H(values); + + pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = VSHUFF_H(pred_reg); + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets, + values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* gather the elements from the scatter16 buffer */ +void vector_gather_16(void) +{ + HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + + VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets); + + sync_gather(vgather); +} + +static unsigned short gather_16_masked_init(void) +{ + char letter = '?'; + return letter | (letter << 8); +} + +void vector_gather_16_masked(void) +{ + HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + *vgather = VSPLAT_H(gather_16_masked_init()); + VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets); + + sync_gather(vgather); +} + +/* gather the elements from the scatter32 buffer */ +void vector_gather_32(void) +{ + HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; + HVX_Vector *vgatherhi = + (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + + VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo); + VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi); + + sync_gather(vgatherhi); +} + +static unsigned int gather_32_masked_init(void) +{ + char letter = '?'; + return letter | (letter << 8) | (letter << 16) | (letter << 24); +} + +void vector_gather_32_masked(void) +{ + HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; + HVX_Vector *vgatherhi = + (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); + + *vgatherlo = VSPLAT_H(gather_32_masked_init()); + *vgatherhi = VSPLAT_H(gather_32_masked_init()); + VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len, + offsetslo); + VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len, + offsetshi); + + sync_gather(vgatherlo); + sync_gather(vgatherhi); +} + +/* gather the elements from the scatter16_32 buffer */ +void vector_gather_16_32(void) +{ + HVX_Vector *vgather; + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the vtcm address to gather from */ + vgather = (HVX_Vector *)&vtcm.vgather16_32; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets); + + /* deal the elements to get the order back */ + values = *(HVX_Vector *)vgather; + values = VDEAL_H(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; +} + +void vector_gather_16_32_masked(void) +{ + HVX_Vector *vgather; + HVX_VectorPair offsets; + HVX_Vector pred_reg; + HVX_VectorPred preds; + HVX_Vector values; + + /* get the vtcm address to gather from */ + vgather = (HVX_Vector *)&vtcm.vgather16_32; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = VSHUFF_H(pred_reg); + preds = VAND_VAL(pred_reg, ~0); + + *vgather = VSPLAT_H(gather_16_masked_init()); + VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len, + offsets); + + /* deal the elements to get the order back */ + values = *(HVX_Vector *)vgather; + values = VDEAL_H(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; +} + +static void check_buffer(const char *name, void *c, void *r, size_t size) +{ + char *check = (char *)c; + char *ref = (char *)r; + for (int i = 0; i < size; i++) { + if (check[i] != ref[i]) { + printf("ERROR %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i, + check[i], check[i], ref[i], ref[i]); + err++; + } + } +} + +/* + * These scalar functions are the C equivalents of the vector functions that + * use HVX + */ + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] = half_values[i]; + } +} + +void check_scatter_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16_acc(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] += half_values_acc[i]; + } +} + +void check_scatter_16_acc() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_16_acc(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16_masked(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16[half_offsets[i] / 2] = half_values_masked[i]; + } + } + +} + +void check_scatter_16_masked() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_16_acc(vscatter16_ref); + scalar_scatter_16_masked(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] = word_values[i]; + } +} + +void check_scatter_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32_acc(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] += word_values_acc[i]; + } +} + +void check_scatter_32_acc() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_32_acc(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32_masked(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (word_predicates[i]) { + vscatter32[word_offsets[i] / 4] = word_values_masked[i]; + } + } +} + +void check_scatter_32_masked() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_32_acc(vscatter32_ref); + scalar_scatter_32_masked(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] = half_values[i]; + } +} + +void check_scatter_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32_acc(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] += half_values_acc[i]; + } +} + +void check_scatter_16_32_acc() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatter_16_32_acc(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void scalar_scatter_16_32_masked(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16_32[word_offsets[i] / 2] = half_values_masked[i]; + } + } +} + +void check_scatter_16_32_masked() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatter_16_32_acc(vscatter16_32_ref); + scalar_scatter_16_32_masked(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16(unsigned short *vgather16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2]; + } +} + +void check_gather_16() +{ + memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16(vgather16_ref); + check_buffer(__func__, vtcm.vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void scalar_gather_16_masked(unsigned short *vgather16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2]; + } + } +} + +void check_gather_16_masked() +{ + memset(vgather16_ref, gather_16_masked_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_masked(vgather16_ref); + check_buffer(__func__, vtcm.vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_32(unsigned int *vgather32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4]; + } +} + +void check_gather_32(void) +{ + memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32(vgather32_ref); + check_buffer(__func__, vtcm.vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +void scalar_gather_32_masked(unsigned int *vgather32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (word_predicates[i]) { + vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4]; + } + } +} + + +void check_gather_32_masked(void) +{ + memset(vgather32_ref, gather_32_masked_init(), + MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32_masked(vgather32_ref); + check_buffer(__func__, vtcm.vgather32, + vgather32_ref, MATRIX_SIZE * sizeof(unsigned int)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16_32(unsigned short *vgather16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2]; + } +} + +void check_gather_16_32(void) +{ + memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32(vgather16_32_ref); + check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void scalar_gather_16_32_masked(unsigned short *vgather16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2]; + } + } + +} + +void check_gather_16_32_masked(void) +{ + memset(vgather16_32_ref, gather_16_masked_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32_masked(vgather16_32_ref); + check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* print scatter16 buffer */ +void print_scatter16_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtcm.vscatter16[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 16 buffer */ +void print_gather_result_16(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtcm.vgather16[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the scatter32 buffer */ +void print_scatter32_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtcm.vscatter32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 32 buffer */ +void print_gather_result_32(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtcm.vgather32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the scatter16_32 buffer */ +void print_scatter16_32_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16_32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", + (unsigned char)((vtcm.vscatter16_32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 16_32 buffer */ +void print_gather_result_16_32(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16_32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", + (unsigned char)((vtcm.vgather16_32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +int main() +{ + prefill_vtcm_scratch(); + + /* 16 bit elements with 16 bit offsets */ + create_offsets_values_preds_16(); + + vector_scatter_16(); + print_scatter16_buffer(); + check_scatter_16(); + + vector_gather_16(); + print_gather_result_16(); + check_gather_16(); + + vector_gather_16_masked(); + print_gather_result_16(); + check_gather_16_masked(); + + vector_scatter_16_acc(); + print_scatter16_buffer(); + check_scatter_16_acc(); + + vector_scatter_16_masked(); + print_scatter16_buffer(); + check_scatter_16_masked(); + + /* 32 bit elements with 32 bit offsets */ + create_offsets_values_preds_32(); + + vector_scatter_32(); + print_scatter32_buffer(); + check_scatter_32(); + + vector_gather_32(); + print_gather_result_32(); + check_gather_32(); + + vector_gather_32_masked(); + print_gather_result_32(); + check_gather_32_masked(); + + vector_scatter_32_acc(); + print_scatter32_buffer(); + check_scatter_32_acc(); + + vector_scatter_32_masked(); + print_scatter32_buffer(); + check_scatter_32_masked(); + + /* 16 bit elements with 32 bit offsets */ + create_offsets_values_preds_16_32(); + + vector_scatter_16_32(); + print_scatter16_32_buffer(); + check_scatter_16_32(); + + vector_gather_16_32(); + print_gather_result_16_32(); + check_gather_16_32(); + + vector_gather_16_32_masked(); + print_gather_result_16_32(); + check_gather_16_32_masked(); + + vector_scatter_16_32_acc(); + print_scatter16_32_buffer(); + check_scatter_16_32_acc(); + + vector_scatter_16_32_masked(); + print_scatter16_32_buffer(); + check_scatter_16_32_masked(); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/vector_add_int.c b/tests/tcg/hexagon/vector_add_int.c new file mode 100644 index 0000000000..d6010ea14b --- /dev/null +++ b/tests/tcg/hexagon/vector_add_int.c @@ -0,0 +1,61 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +int gA[401]; +int gB[401]; +int gC[401]; + +void vector_add_int() +{ + int i; + for (i = 0; i < 400; i++) { + gA[i] = gB[i] + gC[i]; + } +} + +int main() +{ + int error = 0; + int i; + for (i = 0; i < 400; i++) { + gB[i] = i * 2; + gC[i] = i * 3; + } + gA[400] = 17; + vector_add_int(); + for (i = 0; i < 400; i++) { + if (gA[i] != i * 5) { + error++; + printf("ERROR: gB[%d] = %d\t", i, gB[i]); + printf("gC[%d] = %d\t", i, gC[i]); + printf("gA[%d] = %d\n", i, gA[i]); + } + } + if (gA[400] != 17) { + error++; + printf("ERROR: Overran the buffer\n"); + } + if (!error) { + printf("PASS\n"); + return 0; + } else { + printf("FAIL\n"); + return 1; + } +} |