diff options
128 files changed, 3310 insertions, 1305 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 564af9d633..95c957d587 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2902,9 +2902,11 @@ T: git https://gitlab.com/ehabkost/qemu.git machine-next Cryptodev Backends M: Gonglei <arei.gonglei@huawei.com> +M: zhenwei pi <pizhenwei@bytedance.com> S: Maintained F: include/sysemu/cryptodev*.h F: backends/cryptodev*.c +F: qapi/cryptodev.json Python library M: John Snow <jsnow@redhat.com> diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c index cda6ca3b71..39d0455280 100644 --- a/backends/cryptodev-builtin.c +++ b/backends/cryptodev-builtin.c @@ -59,6 +59,19 @@ struct CryptoDevBackendBuiltin { CryptoDevBackendBuiltinSession *sessions[MAX_NUM_SESSIONS]; }; +static void cryptodev_builtin_init_akcipher(CryptoDevBackend *backend) +{ + QCryptoAkCipherOptions opts; + + opts.alg = QCRYPTO_AKCIPHER_ALG_RSA; + opts.u.rsa.padding_alg = QCRYPTO_RSA_PADDING_ALG_RAW; + if (qcrypto_akcipher_supports(&opts)) { + backend->conf.crypto_services |= + (1u << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER); + backend->conf.akcipher_algo = 1u << VIRTIO_CRYPTO_AKCIPHER_RSA; + } +} + static void cryptodev_builtin_init( CryptoDevBackend *backend, Error **errp) { @@ -72,21 +85,18 @@ static void cryptodev_builtin_init( return; } - cc = cryptodev_backend_new_client( - "cryptodev-builtin", NULL); + cc = cryptodev_backend_new_client(); cc->info_str = g_strdup_printf("cryptodev-builtin0"); cc->queue_index = 0; - cc->type = CRYPTODEV_BACKEND_TYPE_BUILTIN; + cc->type = QCRYPTODEV_BACKEND_TYPE_BUILTIN; backend->conf.peers.ccs[0] = cc; backend->conf.crypto_services = - 1u << VIRTIO_CRYPTO_SERVICE_CIPHER | - 1u << VIRTIO_CRYPTO_SERVICE_HASH | - 1u << VIRTIO_CRYPTO_SERVICE_MAC | - 1u << VIRTIO_CRYPTO_SERVICE_AKCIPHER; + 1u << QCRYPTODEV_BACKEND_SERVICE_CIPHER | + 1u << QCRYPTODEV_BACKEND_SERVICE_HASH | + 1u << QCRYPTODEV_BACKEND_SERVICE_MAC; backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC; backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1; - backend->conf.akcipher_algo = 1u << VIRTIO_CRYPTO_AKCIPHER_RSA; /* * Set the Maximum length of crypto request. * Why this value? Just avoid to overflow when @@ -95,6 +105,7 @@ static void cryptodev_builtin_init( backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendOpInfo); backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN; backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN; + cryptodev_builtin_init_akcipher(backend); cryptodev_backend_set_ready(backend, true); } @@ -528,17 +539,14 @@ static int cryptodev_builtin_asym_operation( static int cryptodev_builtin_operation( CryptoDevBackend *backend, - CryptoDevBackendOpInfo *op_info, - uint32_t queue_index, - CryptoDevCompletionFunc cb, - void *opaque) + CryptoDevBackendOpInfo *op_info) { CryptoDevBackendBuiltin *builtin = CRYPTODEV_BACKEND_BUILTIN(backend); CryptoDevBackendBuiltinSession *sess; CryptoDevBackendSymOpInfo *sym_op_info; CryptoDevBackendAsymOpInfo *asym_op_info; - enum CryptoDevBackendAlgType algtype = op_info->algtype; + QCryptodevBackendAlgType algtype = op_info->algtype; int status = -VIRTIO_CRYPTO_ERR; Error *local_error = NULL; @@ -550,11 +558,11 @@ static int cryptodev_builtin_operation( } sess = builtin->sessions[op_info->session_id]; - if (algtype == CRYPTODEV_BACKEND_ALG_SYM) { + if (algtype == QCRYPTODEV_BACKEND_ALG_SYM) { sym_op_info = op_info->u.sym_op_info; status = cryptodev_builtin_sym_operation(sess, sym_op_info, &local_error); - } else if (algtype == CRYPTODEV_BACKEND_ALG_ASYM) { + } else if (algtype == QCRYPTODEV_BACKEND_ALG_ASYM) { asym_op_info = op_info->u.asym_op_info; status = cryptodev_builtin_asym_operation(sess, op_info->op_code, asym_op_info, &local_error); @@ -563,8 +571,8 @@ static int cryptodev_builtin_operation( if (local_error) { error_report_err(local_error); } - if (cb) { - cb(opaque, status); + if (op_info->cb) { + op_info->cb(op_info->opaque, status); } return 0; } diff --git a/backends/cryptodev-hmp-cmds.c b/backends/cryptodev-hmp-cmds.c new file mode 100644 index 0000000000..4f7220bb13 --- /dev/null +++ b/backends/cryptodev-hmp-cmds.c @@ -0,0 +1,54 @@ +/* + * HMP commands related to cryptodev + * + * Copyright (c) 2023 Bytedance.Inc + * + * Authors: + * zhenwei pi<pizhenwei@bytedance.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "qapi/qapi-commands-cryptodev.h" +#include "qapi/qmp/qdict.h" + + +void hmp_info_cryptodev(Monitor *mon, const QDict *qdict) +{ + QCryptodevInfoList *il; + QCryptodevBackendServiceTypeList *sl; + QCryptodevBackendClientList *cl; + + for (il = qmp_query_cryptodev(NULL); il; il = il->next) { + g_autofree char *services = NULL; + QCryptodevInfo *info = il->value; + char *tmp_services; + + /* build a string like 'service=[akcipher|mac|hash|cipher]' */ + for (sl = info->service; sl; sl = sl->next) { + const char *service = QCryptodevBackendServiceType_str(sl->value); + + if (!services) { + services = g_strdup(service); + } else { + tmp_services = g_strjoin("|", services, service, NULL); + g_free(services); + services = tmp_services; + } + } + monitor_printf(mon, "%s: service=[%s]\n", info->id, services); + + for (cl = info->client; cl; cl = cl->next) { + QCryptodevBackendClient *client = cl->value; + monitor_printf(mon, " queue %" PRIu32 ": type=%s\n", + client->queue, + QCryptodevBackendType_str(client->type)); + } + } + + qapi_free_QCryptodevInfoList(il); +} diff --git a/backends/cryptodev-lkcf.c b/backends/cryptodev-lkcf.c index 133bd706a4..45aba1ff67 100644 --- a/backends/cryptodev-lkcf.c +++ b/backends/cryptodev-lkcf.c @@ -223,14 +223,14 @@ static void cryptodev_lkcf_init(CryptoDevBackend *backend, Error **errp) return; } - cc = cryptodev_backend_new_client("cryptodev-lkcf", NULL); + cc = cryptodev_backend_new_client(); cc->info_str = g_strdup_printf("cryptodev-lkcf0"); cc->queue_index = 0; - cc->type = CRYPTODEV_BACKEND_TYPE_LKCF; + cc->type = QCRYPTODEV_BACKEND_TYPE_LKCF; backend->conf.peers.ccs[0] = cc; backend->conf.crypto_services = - 1u << VIRTIO_CRYPTO_SERVICE_AKCIPHER; + 1u << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER; backend->conf.akcipher_algo = 1u << VIRTIO_CRYPTO_AKCIPHER_RSA; lkcf->running = true; @@ -469,15 +469,12 @@ static void *cryptodev_lkcf_worker(void *arg) static int cryptodev_lkcf_operation( CryptoDevBackend *backend, - CryptoDevBackendOpInfo *op_info, - uint32_t queue_index, - CryptoDevCompletionFunc cb, - void *opaque) + CryptoDevBackendOpInfo *op_info) { CryptoDevBackendLKCF *lkcf = CRYPTODEV_BACKEND_LKCF(backend); CryptoDevBackendLKCFSession *sess; - enum CryptoDevBackendAlgType algtype = op_info->algtype; + QCryptodevBackendAlgType algtype = op_info->algtype; CryptoDevLKCFTask *task; if (op_info->session_id >= MAX_SESSIONS || @@ -488,15 +485,15 @@ static int cryptodev_lkcf_operation( } sess = lkcf->sess[op_info->session_id]; - if (algtype != CRYPTODEV_BACKEND_ALG_ASYM) { + if (algtype != QCRYPTODEV_BACKEND_ALG_ASYM) { error_report("algtype not supported: %u", algtype); return -VIRTIO_CRYPTO_NOTSUPP; } task = g_new0(CryptoDevLKCFTask, 1); task->op_info = op_info; - task->cb = cb; - task->opaque = opaque; + task->cb = op_info->cb; + task->opaque = op_info->opaque; task->sess = sess; task->lkcf = lkcf; task->status = -VIRTIO_CRYPTO_ERR; diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c index ab3028e045..b1d9eb735f 100644 --- a/backends/cryptodev-vhost-user.c +++ b/backends/cryptodev-vhost-user.c @@ -67,7 +67,7 @@ cryptodev_vhost_user_get_vhost( { CryptoDevBackendVhostUser *s = CRYPTODEV_BACKEND_VHOST_USER(b); - assert(cc->type == CRYPTODEV_BACKEND_TYPE_VHOST_USER); + assert(cc->type == QCRYPTODEV_BACKEND_TYPE_VHOST_USER); assert(queue < MAX_CRYPTO_QUEUE_NUM); return s->vhost_crypto[queue]; @@ -198,12 +198,11 @@ static void cryptodev_vhost_user_init( s->opened = true; for (i = 0; i < queues; i++) { - cc = cryptodev_backend_new_client( - "cryptodev-vhost-user", NULL); + cc = cryptodev_backend_new_client(); cc->info_str = g_strdup_printf("cryptodev-vhost-user%zu to %s ", i, chr->label); cc->queue_index = i; - cc->type = CRYPTODEV_BACKEND_TYPE_VHOST_USER; + cc->type = QCRYPTODEV_BACKEND_TYPE_VHOST_USER; backend->conf.peers.ccs[i] = cc; @@ -222,9 +221,9 @@ static void cryptodev_vhost_user_init( cryptodev_vhost_user_event, NULL, s, NULL, true); backend->conf.crypto_services = - 1u << VIRTIO_CRYPTO_SERVICE_CIPHER | - 1u << VIRTIO_CRYPTO_SERVICE_HASH | - 1u << VIRTIO_CRYPTO_SERVICE_MAC; + 1u << QCRYPTODEV_BACKEND_SERVICE_CIPHER | + 1u << QCRYPTODEV_BACKEND_SERVICE_HASH | + 1u << QCRYPTODEV_BACKEND_SERVICE_MAC; backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC; backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1; diff --git a/backends/cryptodev-vhost.c b/backends/cryptodev-vhost.c index 74ea0ad63d..93523732f3 100644 --- a/backends/cryptodev-vhost.c +++ b/backends/cryptodev-vhost.c @@ -127,7 +127,7 @@ cryptodev_get_vhost(CryptoDevBackendClient *cc, switch (cc->type) { #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) - case CRYPTODEV_BACKEND_TYPE_VHOST_USER: + case QCRYPTODEV_BACKEND_TYPE_VHOST_USER: vhost_crypto = cryptodev_vhost_user_get_vhost(cc, b, queue); break; #endif @@ -195,7 +195,7 @@ int cryptodev_vhost_start(VirtIODevice *dev, int total_queues) * because vhost user doesn't interrupt masking/unmasking * properly. */ - if (cc->type == CRYPTODEV_BACKEND_TYPE_VHOST_USER) { + if (cc->type == QCRYPTODEV_BACKEND_TYPE_VHOST_USER) { dev->use_guest_notifier_mask = false; } } diff --git a/backends/cryptodev.c b/backends/cryptodev.c index 54ee8c81f5..94ca393cee 100644 --- a/backends/cryptodev.c +++ b/backends/cryptodev.c @@ -23,29 +23,92 @@ #include "qemu/osdep.h" #include "sysemu/cryptodev.h" +#include "sysemu/stats.h" #include "qapi/error.h" +#include "qapi/qapi-commands-cryptodev.h" +#include "qapi/qapi-types-stats.h" #include "qapi/visitor.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qemu/main-loop.h" #include "qom/object_interfaces.h" #include "hw/virtio/virtio-crypto.h" +#define SYM_ENCRYPT_OPS_STR "sym-encrypt-ops" +#define SYM_DECRYPT_OPS_STR "sym-decrypt-ops" +#define SYM_ENCRYPT_BYTES_STR "sym-encrypt-bytes" +#define SYM_DECRYPT_BYTES_STR "sym-decrypt-bytes" + +#define ASYM_ENCRYPT_OPS_STR "asym-encrypt-ops" +#define ASYM_DECRYPT_OPS_STR "asym-decrypt-ops" +#define ASYM_SIGN_OPS_STR "asym-sign-ops" +#define ASYM_VERIFY_OPS_STR "asym-verify-ops" +#define ASYM_ENCRYPT_BYTES_STR "asym-encrypt-bytes" +#define ASYM_DECRYPT_BYTES_STR "asym-decrypt-bytes" +#define ASYM_SIGN_BYTES_STR "asym-sign-bytes" +#define ASYM_VERIFY_BYTES_STR "asym-verify-bytes" + +typedef struct StatsArgs { + union StatsResultsType { + StatsResultList **stats; + StatsSchemaList **schema; + } result; + strList *names; + Error **errp; +} StatsArgs; static QTAILQ_HEAD(, CryptoDevBackendClient) crypto_clients; +static int qmp_query_cryptodev_foreach(Object *obj, void *data) +{ + CryptoDevBackend *backend; + QCryptodevInfoList **infolist = data; + uint32_t services, i; + + if (!object_dynamic_cast(obj, TYPE_CRYPTODEV_BACKEND)) { + return 0; + } + + QCryptodevInfo *info = g_new0(QCryptodevInfo, 1); + info->id = g_strdup(object_get_canonical_path_component(obj)); + + backend = CRYPTODEV_BACKEND(obj); + services = backend->conf.crypto_services; + for (i = 0; i < QCRYPTODEV_BACKEND_SERVICE__MAX; i++) { + if (services & (1 << i)) { + QAPI_LIST_PREPEND(info->service, i); + } + } + + for (i = 0; i < backend->conf.peers.queues; i++) { + CryptoDevBackendClient *cc = backend->conf.peers.ccs[i]; + QCryptodevBackendClient *client = g_new0(QCryptodevBackendClient, 1); + + client->queue = cc->queue_index; + client->type = cc->type; + QAPI_LIST_PREPEND(info->client, client); + } + + QAPI_LIST_PREPEND(*infolist, info); + + return 0; +} -CryptoDevBackendClient * -cryptodev_backend_new_client(const char *model, - const char *name) +QCryptodevInfoList *qmp_query_cryptodev(Error **errp) +{ + QCryptodevInfoList *list = NULL; + Object *objs = container_get(object_get_root(), "/objects"); + + object_child_foreach(objs, qmp_query_cryptodev_foreach, &list); + + return list; +} + +CryptoDevBackendClient *cryptodev_backend_new_client(void) { CryptoDevBackendClient *cc; cc = g_new0(CryptoDevBackendClient, 1); - cc->model = g_strdup(model); - if (name) { - cc->name = g_strdup(name); - } - QTAILQ_INSERT_TAIL(&crypto_clients, cc, next); return cc; @@ -55,8 +118,6 @@ void cryptodev_backend_free_client( CryptoDevBackendClient *cc) { QTAILQ_REMOVE(&crypto_clients, cc, next); - g_free(cc->name); - g_free(cc->model); g_free(cc->info_str); g_free(cc); } @@ -71,6 +132,9 @@ void cryptodev_backend_cleanup( if (bc->cleanup) { bc->cleanup(backend, errp); } + + g_free(backend->sym_stat); + g_free(backend->asym_stat); } int cryptodev_backend_create_session( @@ -107,38 +171,111 @@ int cryptodev_backend_close_session( static int cryptodev_backend_operation( CryptoDevBackend *backend, - CryptoDevBackendOpInfo *op_info, - uint32_t queue_index, - CryptoDevCompletionFunc cb, - void *opaque) + CryptoDevBackendOpInfo *op_info) { CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(backend); if (bc->do_op) { - return bc->do_op(backend, op_info, queue_index, cb, opaque); + return bc->do_op(backend, op_info); } return -VIRTIO_CRYPTO_NOTSUPP; } +static int cryptodev_backend_account(CryptoDevBackend *backend, + CryptoDevBackendOpInfo *op_info) +{ + enum QCryptodevBackendAlgType algtype = op_info->algtype; + int len; + + if (algtype == QCRYPTODEV_BACKEND_ALG_ASYM) { + CryptoDevBackendAsymOpInfo *asym_op_info = op_info->u.asym_op_info; + len = asym_op_info->src_len; + switch (op_info->op_code) { + case VIRTIO_CRYPTO_AKCIPHER_ENCRYPT: + CryptodevAsymStatIncEncrypt(backend, len); + break; + case VIRTIO_CRYPTO_AKCIPHER_DECRYPT: + CryptodevAsymStatIncDecrypt(backend, len); + break; + case VIRTIO_CRYPTO_AKCIPHER_SIGN: + CryptodevAsymStatIncSign(backend, len); + break; + case VIRTIO_CRYPTO_AKCIPHER_VERIFY: + CryptodevAsymStatIncVerify(backend, len); + break; + default: + return -VIRTIO_CRYPTO_NOTSUPP; + } + } else if (algtype == QCRYPTODEV_BACKEND_ALG_SYM) { + CryptoDevBackendSymOpInfo *sym_op_info = op_info->u.sym_op_info; + len = sym_op_info->src_len; + switch (op_info->op_code) { + case VIRTIO_CRYPTO_CIPHER_ENCRYPT: + CryptodevSymStatIncEncrypt(backend, len); + break; + case VIRTIO_CRYPTO_CIPHER_DECRYPT: + CryptodevSymStatIncDecrypt(backend, len); + break; + default: + return -VIRTIO_CRYPTO_NOTSUPP; + } + } else { + error_report("Unsupported cryptodev alg type: %" PRIu32 "", algtype); + return -VIRTIO_CRYPTO_NOTSUPP; + } + + return len; +} + +static void cryptodev_backend_throttle_timer_cb(void *opaque) +{ + CryptoDevBackend *backend = (CryptoDevBackend *)opaque; + CryptoDevBackendOpInfo *op_info, *tmpop; + int ret; + + QTAILQ_FOREACH_SAFE(op_info, &backend->opinfos, next, tmpop) { + QTAILQ_REMOVE(&backend->opinfos, op_info, next); + ret = cryptodev_backend_account(backend, op_info); + if (ret < 0) { + op_info->cb(op_info->opaque, ret); + continue; + } + + throttle_account(&backend->ts, true, ret); + cryptodev_backend_operation(backend, op_info); + if (throttle_enabled(&backend->tc) && + throttle_schedule_timer(&backend->ts, &backend->tt, true)) { + break; + } + } +} + int cryptodev_backend_crypto_operation( CryptoDevBackend *backend, - void *opaque1, - uint32_t queue_index, - CryptoDevCompletionFunc cb, void *opaque2) + CryptoDevBackendOpInfo *op_info) { - VirtIOCryptoReq *req = opaque1; - CryptoDevBackendOpInfo *op_info = &req->op_info; - enum CryptoDevBackendAlgType algtype = req->flags; + int ret; - if ((algtype != CRYPTODEV_BACKEND_ALG_SYM) - && (algtype != CRYPTODEV_BACKEND_ALG_ASYM)) { - error_report("Unsupported cryptodev alg type: %" PRIu32 "", algtype); - return -VIRTIO_CRYPTO_NOTSUPP; + if (!throttle_enabled(&backend->tc)) { + goto do_account; + } + + if (throttle_schedule_timer(&backend->ts, &backend->tt, true) || + !QTAILQ_EMPTY(&backend->opinfos)) { + QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next); + return 0; } - return cryptodev_backend_operation(backend, op_info, queue_index, - cb, opaque2); +do_account: + ret = cryptodev_backend_account(backend, op_info); + if (ret < 0) { + return ret; + } + + throttle_account(&backend->ts, true, ret); + + return cryptodev_backend_operation(backend, op_info); } static void @@ -169,15 +306,111 @@ cryptodev_backend_set_queues(Object *obj, Visitor *v, const char *name, backend->conf.peers.queues = value; } +static void cryptodev_backend_set_throttle(CryptoDevBackend *backend, int field, + uint64_t value, Error **errp) +{ + uint64_t orig = backend->tc.buckets[field].avg; + bool enabled = throttle_enabled(&backend->tc); + + if (orig == value) { + return; + } + + backend->tc.buckets[field].avg = value; + if (!throttle_enabled(&backend->tc)) { + throttle_timers_destroy(&backend->tt); + cryptodev_backend_throttle_timer_cb(backend); /* drain opinfos */ + return; + } + + if (!throttle_is_valid(&backend->tc, errp)) { + backend->tc.buckets[field].avg = orig; /* revert change */ + return; + } + + if (!enabled) { + throttle_init(&backend->ts); + throttle_timers_init(&backend->tt, qemu_get_aio_context(), + QEMU_CLOCK_REALTIME, + cryptodev_backend_throttle_timer_cb, /* FIXME */ + cryptodev_backend_throttle_timer_cb, backend); + } + + throttle_config(&backend->ts, QEMU_CLOCK_REALTIME, &backend->tc); +} + +static void cryptodev_backend_get_bps(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + uint64_t value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg; + + visit_type_uint64(v, name, &value, errp); +} + +static void cryptodev_backend_set_bps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + uint64_t value; + + if (!visit_type_uint64(v, name, &value, errp)) { + return; + } + + cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp); +} + +static void cryptodev_backend_get_ops(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + uint64_t value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg; + + visit_type_uint64(v, name, &value, errp); +} + +static void cryptodev_backend_set_ops(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + uint64_t value; + + if (!visit_type_uint64(v, name, &value, errp)) { + return; + } + + cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp); +} + static void cryptodev_backend_complete(UserCreatable *uc, Error **errp) { CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc); CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc); + uint32_t services; + uint64_t value; + + QTAILQ_INIT(&backend->opinfos); + value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg; + cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp); + value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg; + cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp); if (bc->init) { bc->init(backend, errp); } + + services = backend->conf.crypto_services; + if (services & (1 << QCRYPTODEV_BACKEND_SERVICE_CIPHER)) { + backend->sym_stat = g_new0(CryptodevBackendSymStat, 1); + } + + if (services & (1 << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER)) { + backend->asym_stat = g_new0(CryptodevBackendAsymStat, 1); + } } void cryptodev_backend_set_used(CryptoDevBackend *backend, bool used) @@ -208,8 +441,12 @@ cryptodev_backend_can_be_deleted(UserCreatable *uc) static void cryptodev_backend_instance_init(Object *obj) { + CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); + /* Initialize devices' queues property to 1 */ object_property_set_int(obj, "queues", 1, NULL); + + throttle_config_init(&backend->tc); } static void cryptodev_backend_finalize(Object *obj) @@ -217,6 +454,137 @@ static void cryptodev_backend_finalize(Object *obj) CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj); cryptodev_backend_cleanup(backend, NULL); + if (throttle_enabled(&backend->tc)) { + throttle_timers_destroy(&backend->tt); + } +} + +static StatsList *cryptodev_backend_stats_add(const char *name, int64_t *val, + StatsList *stats_list) +{ + Stats *stats = g_new0(Stats, 1); + + stats->name = g_strdup(name); + stats->value = g_new0(StatsValue, 1); + stats->value->type = QTYPE_QNUM; + stats->value->u.scalar = *val; + + QAPI_LIST_PREPEND(stats_list, stats); + return stats_list; +} + +static int cryptodev_backend_stats_query(Object *obj, void *data) +{ + StatsArgs *stats_args = data; + StatsResultList **stats_results = stats_args->result.stats; + StatsList *stats_list = NULL; + StatsResult *entry; + CryptoDevBackend *backend; + CryptodevBackendSymStat *sym_stat; + CryptodevBackendAsymStat *asym_stat; + + if (!object_dynamic_cast(obj, TYPE_CRYPTODEV_BACKEND)) { + return 0; + } + + backend = CRYPTODEV_BACKEND(obj); + sym_stat = backend->sym_stat; + if (sym_stat) { + stats_list = cryptodev_backend_stats_add(SYM_ENCRYPT_OPS_STR, + &sym_stat->encrypt_ops, stats_list); + stats_list = cryptodev_backend_stats_add(SYM_DECRYPT_OPS_STR, + &sym_stat->decrypt_ops, stats_list); + stats_list = cryptodev_backend_stats_add(SYM_ENCRYPT_BYTES_STR, + &sym_stat->encrypt_bytes, stats_list); + stats_list = cryptodev_backend_stats_add(SYM_DECRYPT_BYTES_STR, + &sym_stat->decrypt_bytes, stats_list); + } + + asym_stat = backend->asym_stat; + if (asym_stat) { + stats_list = cryptodev_backend_stats_add(ASYM_ENCRYPT_OPS_STR, + &asym_stat->encrypt_ops, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_DECRYPT_OPS_STR, + &asym_stat->decrypt_ops, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_SIGN_OPS_STR, + &asym_stat->sign_ops, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_VERIFY_OPS_STR, + &asym_stat->verify_ops, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_ENCRYPT_BYTES_STR, + &asym_stat->encrypt_bytes, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_DECRYPT_BYTES_STR, + &asym_stat->decrypt_bytes, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_SIGN_BYTES_STR, + &asym_stat->sign_bytes, stats_list); + stats_list = cryptodev_backend_stats_add(ASYM_VERIFY_BYTES_STR, + &asym_stat->verify_bytes, stats_list); + } + + entry = g_new0(StatsResult, 1); + entry->provider = STATS_PROVIDER_CRYPTODEV; + entry->qom_path = g_strdup(object_get_canonical_path(obj)); + entry->stats = stats_list; + QAPI_LIST_PREPEND(*stats_results, entry); + + return 0; +} + +static void cryptodev_backend_stats_cb(StatsResultList **result, + StatsTarget target, + strList *names, strList *targets, + Error **errp) +{ + switch (target) { + case STATS_TARGET_CRYPTODEV: + { + Object *objs = container_get(object_get_root(), "/objects"); + StatsArgs stats_args; + stats_args.result.stats = result; + stats_args.names = names; + stats_args.errp = errp; + + object_child_foreach(objs, cryptodev_backend_stats_query, &stats_args); + break; + } + default: + break; + } +} + +static StatsSchemaValueList *cryptodev_backend_schemas_add(const char *name, + StatsSchemaValueList *list) +{ + StatsSchemaValueList *schema_entry = g_new0(StatsSchemaValueList, 1); + + schema_entry->value = g_new0(StatsSchemaValue, 1); + schema_entry->value->type = STATS_TYPE_CUMULATIVE; + schema_entry->value->name = g_strdup(name); + schema_entry->next = list; + + return schema_entry; +} + +static void cryptodev_backend_schemas_cb(StatsSchemaList **result, + Error **errp) +{ + StatsSchemaValueList *stats_list = NULL; + const char *sym_stats[] = { SYM_ENCRYPT_OPS_STR, SYM_DECRYPT_OPS_STR, + SYM_ENCRYPT_BYTES_STR, SYM_DECRYPT_BYTES_STR }; + const char *asym_stats[] = { ASYM_ENCRYPT_OPS_STR, ASYM_DECRYPT_OPS_STR, + ASYM_SIGN_OPS_STR, ASYM_VERIFY_OPS_STR, + ASYM_ENCRYPT_BYTES_STR, ASYM_DECRYPT_BYTES_STR, + ASYM_SIGN_BYTES_STR, ASYM_VERIFY_BYTES_STR }; + + for (int i = 0; i < ARRAY_SIZE(sym_stats); i++) { + stats_list = cryptodev_backend_schemas_add(sym_stats[i], stats_list); + } + + for (int i = 0; i < ARRAY_SIZE(asym_stats); i++) { + stats_list = cryptodev_backend_schemas_add(asym_stats[i], stats_list); + } + + add_stats_schema(result, STATS_PROVIDER_CRYPTODEV, STATS_TARGET_CRYPTODEV, + stats_list); } static void @@ -232,6 +600,17 @@ cryptodev_backend_class_init(ObjectClass *oc, void *data) cryptodev_backend_get_queues, cryptodev_backend_set_queues, NULL, NULL); + object_class_property_add(oc, "throttle-bps", "uint64", + cryptodev_backend_get_bps, + cryptodev_backend_set_bps, + NULL, NULL); + object_class_property_add(oc, "throttle-ops", "uint64", + cryptodev_backend_get_ops, + cryptodev_backend_set_ops, + NULL, NULL); + + add_stats_callbacks(STATS_PROVIDER_CRYPTODEV, cryptodev_backend_stats_cb, + cryptodev_backend_schemas_cb); } static const TypeInfo cryptodev_backend_info = { diff --git a/backends/meson.build b/backends/meson.build index 954e658b25..b369e0a9d0 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -1,5 +1,6 @@ softmmu_ss.add([files( 'cryptodev-builtin.c', + 'cryptodev-hmp-cmds.c', 'cryptodev.c', 'hostmem-ram.c', 'hostmem.c', diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 754b1e8408..47d63d26db 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -993,3 +993,17 @@ SRST ``info virtio-queue-element`` *path* *queue* [*index*] Display element of a given virtio queue ERST + + { + .name = "cryptodev", + .args_type = "", + .params = "", + .help = "show the crypto devices", + .cmd = hmp_info_cryptodev, + .flags = "p", + }, + +SRST + ``info cryptodev`` + Show the crypto devices. +ERST diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c index a43f6dafc9..dcee3ad7a1 100644 --- a/hw/acpi/acpi-pci-hotplug-stub.c +++ b/hw/acpi/acpi-pci-hotplug-stub.c @@ -5,8 +5,7 @@ const VMStateDescription vmstate_acpi_pcihp_pci_status; void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, - MemoryRegion *address_space_io, bool bridges_enabled, - uint16_t io_base) + MemoryRegion *address_space_io, uint16_t io_base) { return; } @@ -36,8 +35,12 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, return; } -void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) +void acpi_pcihp_reset(AcpiPciHpState *s) { return; } +bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +{ + return true; +} diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index d23bfcaa6b..25e2c7243e 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -218,7 +218,7 @@ static bool vmstate_test_use_pcihp(void *opaque) { ICH9LPCPMRegs *s = opaque; - return s->use_acpi_hotplug_bridge; + return s->acpi_pci_hotplug.use_acpi_hotplug_bridge; } static const VMStateDescription vmstate_pcihp_state = { @@ -277,8 +277,8 @@ static void pm_reset(void *opaque) } pm->smi_en_wmask = ~0; - if (pm->use_acpi_hotplug_bridge) { - acpi_pcihp_reset(&pm->acpi_pci_hotplug, true); + if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) { + acpi_pcihp_reset(&pm->acpi_pci_hotplug); } acpi_update_sci(&pm->acpi_regs, pm->irq); @@ -316,12 +316,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq) acpi_pm_tco_init(&pm->tco_regs, &pm->io); } - if (pm->use_acpi_hotplug_bridge) { + if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) { acpi_pcihp_init(OBJECT(lpc_pci), &pm->acpi_pci_hotplug, pci_get_bus(lpc_pci), pci_address_space_io(lpc_pci), - true, ACPI_PCIHP_ADDR_ICH9); qbus_set_hotplug_handler(BUS(pci_get_bus(lpc_pci)), @@ -403,14 +402,14 @@ static bool ich9_pm_get_acpi_pci_hotplug(Object *obj, Error **errp) { ICH9LPCState *s = ICH9_LPC_DEVICE(obj); - return s->pm.use_acpi_hotplug_bridge; + return s->pm.acpi_pci_hotplug.use_acpi_hotplug_bridge; } static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp) { ICH9LPCState *s = ICH9_LPC_DEVICE(obj); - s->pm.use_acpi_hotplug_bridge = value; + s->pm.acpi_pci_hotplug.use_acpi_hotplug_bridge = value; } static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp) @@ -435,7 +434,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) pm->disable_s3 = 0; pm->disable_s4 = 0; pm->s4_val = 2; - pm->use_acpi_hotplug_bridge = true; + pm->acpi_pci_hotplug.use_acpi_hotplug_bridge = true; pm->keep_pci_slot_hpc = true; pm->enable_tco = true; @@ -579,6 +578,12 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } } +bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) +{ + ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); + return acpi_pcihp_is_hotpluggbale_bus(&lpc->pm.acpi_pci_hotplug, bus); +} + void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) { ICH9LPCState *s = ICH9_LPC_DEVICE(adev); diff --git a/hw/acpi/pci-bridge.c b/hw/acpi/pci-bridge.c index 5f3ee5157f..7baa7034a1 100644 --- a/hw/acpi/pci-bridge.c +++ b/hw/acpi/pci-bridge.c @@ -21,7 +21,17 @@ void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope) { PCIBridge *br = PCI_BRIDGE(adev); - if (object_property_find(OBJECT(&br->sec_bus), ACPI_PCIHP_PROP_BSEL)) { - build_append_pci_bus_devices(scope, pci_bridge_get_sec_bus(br)); + if (!DEVICE(br)->hotplugged) { + PCIBus *sec_bus = pci_bridge_get_sec_bus(br); + + build_append_pci_bus_devices(scope, sec_bus); + + /* + * generate hotplug slots descriptors if + * bridge has ACPI PCI hotplug attached, + */ + if (object_property_find(OBJECT(sec_bus), ACPI_PCIHP_PROP_BSEL)) { + build_append_pcihp_slots(scope, sec_bus); + } } } diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 5dc7377411..dcfb779a7a 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -54,21 +54,6 @@ typedef struct AcpiPciHpFind { PCIBus *bus; } AcpiPciHpFind; -static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data) -{ - return a - b; -} - -static GSequence *pci_acpi_index_list(void) -{ - static GSequence *used_acpi_index_list; - - if (!used_acpi_index_list) { - used_acpi_index_list = g_sequence_new(NULL); - } - return used_acpi_index_list; -} - static int acpi_pcihp_get_bsel(PCIBus *bus) { Error *local_err = NULL; @@ -136,20 +121,6 @@ static void acpi_set_pci_info(bool has_bridge_hotplug) } } -static void acpi_pcihp_disable_root_bus(void) -{ - Object *host = acpi_get_i386_pci_host(); - PCIBus *bus; - - bus = PCI_HOST_BRIDGE(host)->bus; - if (bus && qbus_is_hotpluggable(BUS(bus))) { - /* setting the hotplug handler to NULL makes the bus non-hotpluggable */ - qbus_set_hotplug_handler(BUS(bus), NULL); - } - - return; -} - static void acpi_pcihp_test_hotplug_bus(PCIBus *bus, void *opaque) { AcpiPciHpFind *find = opaque; @@ -291,17 +262,12 @@ static void acpi_pcihp_update(AcpiPciHpState *s) } } -void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off) +void acpi_pcihp_reset(AcpiPciHpState *s) { - if (acpihp_root_off) { - acpi_pcihp_disable_root_bus(); - } - acpi_set_pci_info(!s->legacy_piix); + acpi_set_pci_info(s->use_acpi_hotplug_bridge); acpi_pcihp_update(s); } -#define ONBOARD_INDEX_MAX (16 * 1024 - 1) - void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -314,34 +280,6 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, ACPI_PCIHP_PROP_BSEL "' set"); return; } - - /* - * capped by systemd (see: udev-builtin-net_id.c) - * as it's the only known user honor it to avoid users - * misconfigure QEMU and then wonder why acpi-index doesn't work - */ - if (pdev->acpi_index > ONBOARD_INDEX_MAX) { - error_setg(errp, "acpi-index should be less or equal to %u", - ONBOARD_INDEX_MAX); - return; - } - - /* - * make sure that acpi-index is unique across all present PCI devices - */ - if (pdev->acpi_index) { - GSequence *used_indexes = pci_acpi_index_list(); - - if (g_sequence_lookup(used_indexes, GINT_TO_POINTER(pdev->acpi_index), - g_cmp_uint32, NULL)) { - error_setg(errp, "a PCI device with acpi-index = %" PRIu32 - " already exist", pdev->acpi_index); - return; - } - g_sequence_insert_sorted(used_indexes, - GINT_TO_POINTER(pdev->acpi_index), - g_cmp_uint32, NULL); - } } void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, @@ -361,17 +299,10 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, * Overwrite the default hotplug handler with the ACPI PCI one * for cold plugged bridges only. */ - if (!s->legacy_piix && + if (s->use_acpi_hotplug_bridge && object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); - /* Remove all hot-plug handlers if hot-plug is disabled on slot */ - if (object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT) && - !PCIE_SLOT(pdev)->hotplug) { - qbus_set_hotplug_handler(BUS(sec), NULL); - return; - } - qbus_set_hotplug_handler(BUS(sec), OBJECT(hotplug_dev)); /* We don't have to overwrite any other hotplug handler yet */ assert(QLIST_EMPTY(&sec->child)); @@ -401,17 +332,6 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, trace_acpi_pci_unplug(PCI_SLOT(pdev->devfn), acpi_pcihp_get_bsel(pci_get_bus(pdev))); - /* - * clean up acpi-index so it could reused by another device - */ - if (pdev->acpi_index) { - GSequence *used_indexes = pci_acpi_index_list(); - - g_sequence_remove(g_sequence_lookup(used_indexes, - GINT_TO_POINTER(pdev->acpi_index), - g_cmp_uint32, NULL)); - } - qdev_unrealize(dev); } @@ -441,6 +361,24 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } +bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +{ + Object *o = OBJECT(bus->parent); + + if (s->use_acpi_hotplug_bridge && + object_dynamic_cast(o, TYPE_PCI_BRIDGE)) { + if (object_dynamic_cast(o, TYPE_PCIE_SLOT) && !PCIE_SLOT(o)->hotplug) { + return false; + } + return true; + } + + if (s->use_acpi_root_pci_hotplug) { + return true; + } + return false; +} + static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) { AcpiPciHpState *s = opaque; @@ -454,7 +392,7 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) switch (addr) { case PCI_UP_BASE: val = s->acpi_pcihp_pci_status[bsel].up; - if (!s->legacy_piix) { + if (s->use_acpi_hotplug_bridge) { s->acpi_pcihp_pci_status[bsel].up = 0; } trace_acpi_pci_up_read(val); @@ -529,7 +467,8 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data, trace_acpi_pci_ej_write(addr, data); break; case PCI_SEL_BASE: - s->hotplug_select = s->legacy_piix ? ACPI_PCIHP_BSEL_DEFAULT : data; + s->hotplug_select = s->use_acpi_hotplug_bridge ? data : + ACPI_PCIHP_BSEL_DEFAULT; trace_acpi_pci_sel_write(addr, data); default: break; @@ -547,14 +486,13 @@ static const MemoryRegionOps acpi_pcihp_io_ops = { }; void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, - MemoryRegion *address_space_io, bool bridges_enabled, + MemoryRegion *address_space_io, uint16_t io_base) { s->io_len = ACPI_PCIHP_SIZE; s->io_base = io_base; s->root = root_bus; - s->legacy_piix = !bridges_enabled; memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s, "acpi-pci-hotplug", s->io_len); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index eac2125abd..63d2113b86 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -170,14 +170,14 @@ static const VMStateDescription vmstate_pci_status = { static bool vmstate_test_use_acpi_hotplug_bridge(void *opaque, int version_id) { PIIX4PMState *s = opaque; - return s->use_acpi_hotplug_bridge; + return s->acpi_pci_hotplug.use_acpi_hotplug_bridge; } static bool vmstate_test_no_use_acpi_hotplug_bridge(void *opaque, int version_id) { PIIX4PMState *s = opaque; - return !s->use_acpi_hotplug_bridge; + return !s->acpi_pci_hotplug.use_acpi_hotplug_bridge; } static bool vmstate_test_use_memhp(void *opaque) @@ -234,7 +234,8 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) { PIIX4PMState *s = PIIX4_PM(opaque); - return s->use_acpi_hotplug_bridge && !s->not_migrate_acpi_index; + return s->acpi_pci_hotplug.use_acpi_hotplug_bridge && + !s->not_migrate_acpi_index; } /* qemu-kvm 1.2 uses version 3 but advertised as 2 @@ -303,8 +304,9 @@ static void piix4_pm_reset(DeviceState *dev) acpi_update_sci(&s->ar, s->irq); pm_io_space_update(s); - if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) { - acpi_pcihp_reset(&s->acpi_pci_hotplug, !s->use_acpi_root_pci_hotplug); + if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge || + s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) { + acpi_pcihp_reset(&s->acpi_pci_hotplug); } } @@ -402,6 +404,13 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, } } +static bool piix4_is_hotpluggable_bus(HotplugHandler *hotplug_dev, + BusState *bus) +{ + PIIX4PMState *s = PIIX4_PM(hotplug_dev); + return acpi_pcihp_is_hotpluggbale_bus(&s->acpi_pci_hotplug, bus); +} + static void piix4_pm_machine_ready(Notifier *n, void *opaque) { PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready); @@ -487,12 +496,11 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp) qemu_add_machine_init_done_notifier(&s->machine_ready); if (xen_enabled()) { - s->use_acpi_hotplug_bridge = false; + s->acpi_pci_hotplug.use_acpi_hotplug_bridge = false; } piix4_acpi_system_hot_add_init(pci_address_space_io(dev), pci_get_bus(dev), s); - qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), OBJECT(s)); piix4_pm_add_properties(s); } @@ -561,9 +569,11 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, "acpi-gpe0", GPE_LEN); memory_region_add_subregion(parent, GPE_BASE, &s->io_gpe); - if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) { + if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge || + s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) { acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent, - s->use_acpi_hotplug_bridge, ACPI_PCIHP_ADDR_PIIX4); + ACPI_PCIHP_ADDR_PIIX4); + qbus_set_hotplug_handler(BUS(pci_get_bus(PCI_DEVICE(s))), OBJECT(s)); } s->cpu_hotplug_legacy = true; @@ -602,9 +612,9 @@ static Property piix4_pm_properties[] = { DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, - use_acpi_hotplug_bridge, true), + acpi_pci_hotplug.use_acpi_hotplug_bridge, true), DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState, - use_acpi_root_pci_hotplug, true), + acpi_pci_hotplug.use_acpi_root_pci_hotplug, true), DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, acpi_memory_hotplug.is_enabled, true), DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false), @@ -641,6 +651,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) hc->plug = piix4_device_plug_cb; hc->unplug_request = piix4_device_unplug_request_cb; hc->unplug = piix4_device_unplug_cb; + hc->is_hotpluggable_bus = piix4_is_hotpluggable_bus; adevc->ospm_status = piix4_ospm_status; adevc->send_event = piix4_send_gpe; adevc->madt_cpu = pc_madt_cpu_entry; diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 3edd303a33..b665d4f565 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -141,17 +141,19 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk) * Error status is RW1C but given bits are not yet set, it can * be handled as RO. */ - reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0; + stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0); + stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff); /* Bits 12-13 and 17-31 reserved in CXL 2.0 */ - reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff; - write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff; - reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff; - write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff; - reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0; - reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f; - write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f; + stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff); + stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff); + stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff); + stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff); + stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0); + stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f); + stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f); + stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f); /* CXL switches and devices must set */ - reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00; + stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200); } static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index 3c1ec8732a..6e923ceeaf 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -146,21 +146,28 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) return NULL; } - hb_cstate = cxl_get_hb_cstate(hb); - if (!hb_cstate) { - return NULL; - } + if (cxl_get_hb_passthrough(hb)) { + rp = pcie_find_port_first(hb->bus); + if (!rp) { + return NULL; + } + } else { + hb_cstate = cxl_get_hb_cstate(hb); + if (!hb_cstate) { + return NULL; + } - cache_mem = hb_cstate->crb.cache_mem_registers; + cache_mem = hb_cstate->crb.cache_mem_registers; - target_found = cxl_hdm_find_target(cache_mem, addr, &target); - if (!target_found) { - return NULL; - } + target_found = cxl_hdm_find_target(cache_mem, addr, &target); + if (!target_found) { + return NULL; + } - rp = pcie_find_port_by_pn(hb->bus, target); - if (!rp) { - return NULL; + rp = pcie_find_port_by_pn(hb->bus, target); + if (!rp) { + return NULL; + } } d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0]; diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index b19fb4259e..ec857a117e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -373,6 +373,104 @@ Aml *aml_pci_device_dsm(void) return method; } +static void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar) +{ + Aml *UUID, *ifctx1; + uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ + + aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar)); + /* + * PCI Firmware Specification 3.1 + * 4.6. _DSM Definitions for PCI + */ + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); + { + /* call is for unsupported UUID, bail out */ + aml_append(ifctx1, aml_return(retvar)); + } + aml_append(ctx, ifctx1); + + ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2))); + { + /* call is for unsupported REV, bail out */ + aml_append(ifctx1, aml_return(retvar)); + } + aml_append(ctx, ifctx1); +} + +static Aml *aml_pci_edsm(void) +{ + Aml *method, *ifctx; + Aml *zero = aml_int(0); + Aml *func = aml_arg(2); + Aml *ret = aml_local(0); + Aml *aidx = aml_local(1); + Aml *params = aml_arg(4); + + method = aml_method("EDSM", 5, AML_SERIALIZED); + + /* get supported functions */ + ifctx = aml_if(aml_equal(func, zero)); + { + /* 1: have supported functions */ + /* 7: support for function 7 */ + const uint8_t caps = 1 | BIT(7); + build_append_pci_dsm_func0_common(ifctx, ret); + aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + /* handle specific functions requests */ + /* + * PCI Firmware Specification 3.1 + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems + */ + ifctx = aml_if(aml_equal(func, aml_int(7))); + { + Aml *pkg = aml_package(2); + aml_append(pkg, zero); + /* optional, if not impl. should return null string */ + aml_append(pkg, aml_string("%s", "")); + aml_append(ifctx, aml_store(pkg, ret)); + + /* + * IASL is fine when initializing Package with computational data, + * however it makes guest unhappy /it fails to process such AML/. + * So use runtime assignment to set acpi-index after initializer + * to make OSPM happy. + */ + aml_append(ifctx, + aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx)); + aml_append(ifctx, aml_store(aidx, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + return method; +} + +static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev) +{ + Aml *method; + + g_assert(pdev->acpi_index != 0); + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *params = aml_local(0); + Aml *pkg = aml_package(1); + aml_append(pkg, aml_int(pdev->acpi_index)); + aml_append(method, aml_store(pkg, params)); + aml_append(method, + aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), params)) + ); + } + return method; +} + static void build_append_pcihp_notify_entry(Aml *method, int slot) { Aml *if_ctx; @@ -396,12 +494,6 @@ static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus) if (DEVICE(pdev)->hotplugged) { return true; } - } else if (!get_dev_aml_func(DEVICE(pdev))) { - /* - * Ignore all other devices on !0 functions unless they - * have AML description (i.e have get_dev_aml_func() != 0) - */ - return true; } } return false; @@ -428,12 +520,14 @@ static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus) return false; } -static void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus, - QObject *bsel) +void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus) { int devfn; Aml *dev, *notify_method = NULL, *method; + QObject *bsel = object_property_get_qobject(OBJECT(bus), + ACPI_PCIHP_PROP_BSEL, NULL); uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); + qobject_unref(bsel); aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); @@ -478,12 +572,9 @@ static void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus, void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) { - QObject *bsel; int devfn; Aml *dev; - bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); - for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn); @@ -498,16 +589,16 @@ void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); call_dev_aml_func(DEVICE(bus->devices[devfn]), dev); + /* add _DSM if device has acpi-index set */ + if (pdev->acpi_index && + !object_property_get_bool(OBJECT(pdev), "hotpluggable", + &error_abort)) { + aml_append(dev, aml_pci_static_endpoint_dsm(pdev)); + } /* device descriptor has been composed, add it into parent context */ aml_append(parent_scope, dev); } - - if (bsel) { - build_append_pcihp_slots(parent_scope, bus, bsel); - } - - qobject_unref(bsel); } static bool build_append_notfication_callback(Aml *parent_scope, @@ -517,16 +608,24 @@ static bool build_append_notfication_callback(Aml *parent_scope, PCIBus *sec; QObject *bsel; int nr_notifiers = 0; + GQueue *pcnt_bus_list = g_queue_new(); QLIST_FOREACH(sec, &bus->child, sibling) { Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn); - if (pci_bus_is_root(sec) || - !object_property_find(OBJECT(sec), ACPI_PCIHP_PROP_BSEL)) { + if (pci_bus_is_root(sec)) { continue; } nr_notifiers = nr_notifiers + build_append_notfication_callback(br_scope, sec); - aml_append(parent_scope, br_scope); + /* + * add new child scope to parent + * and keep track of bus that have PCNT, + * bus list is used later to call children PCNTs from this level PCNT + */ + if (nr_notifiers) { + g_queue_push_tail(pcnt_bus_list, sec); + aml_append(parent_scope, br_scope); + } } /* @@ -550,30 +649,25 @@ static bool build_append_notfication_callback(Aml *parent_scope, } /* Notify about child bus events in any case */ - QLIST_FOREACH(sec, &bus->child, sibling) { - if (pci_bus_is_root(sec) || - !object_property_find(OBJECT(sec), ACPI_PCIHP_PROP_BSEL)) { - continue; - } - + while ((sec = g_queue_pop_head(pcnt_bus_list))) { aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn)); } aml_append(parent_scope, method); qobject_unref(bsel); + g_queue_free(pcnt_bus_list); return !!nr_notifiers; } static Aml *aml_pci_pdsm(void) { - Aml *method, *UUID, *ifctx, *ifctx1; + Aml *method, *ifctx, *ifctx1; Aml *ret = aml_local(0); Aml *caps = aml_local(1); Aml *acpi_index = aml_local(2); Aml *zero = aml_int(0); Aml *one = aml_int(1); Aml *func = aml_arg(2); - Aml *rev = aml_arg(1); Aml *params = aml_arg(4); Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); Aml *sunum = aml_derefof(aml_index(params, aml_int(1))); @@ -583,29 +677,9 @@ static Aml *aml_pci_pdsm(void) /* get supported functions */ ifctx = aml_if(aml_equal(func, zero)); { - uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ - aml_append(ifctx, aml_store(aml_buffer(1, byte_list), ret)); - aml_append(ifctx, aml_store(zero, caps)); - - /* - * PCI Firmware Specification 3.1 - * 4.6. _DSM Definitions for PCI - */ - UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); - ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); - { - /* call is for unsupported UUID, bail out */ - aml_append(ifctx1, aml_return(ret)); - } - aml_append(ifctx, ifctx1); - - ifctx1 = aml_if(aml_lless(rev, aml_int(2))); - { - /* call is for unsupported REV, bail out */ - aml_append(ifctx1, aml_return(ret)); - } - aml_append(ifctx, ifctx1); + build_append_pci_dsm_func0_common(ifctx, ret); + aml_append(ifctx, aml_store(zero, caps)); aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); /* @@ -1388,6 +1462,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); + aml_append(dev, aml_pci_edsm()); aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); @@ -1403,6 +1478,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); + aml_append(dev, aml_pci_edsm()); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1710,6 +1786,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, Aml *scope = aml_scope("PCI0"); /* Scan all PCI buses. Generate tables to support hotplug. */ build_append_pci_bus_devices(scope, bus); + if (object_property_find(OBJECT(bus), ACPI_PCIHP_PROP_BSEL)) { + build_append_pcihp_slots(scope, bus); + } aml_append(sb_scope, scope); } } diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index d8303d0322..9714b0001e 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -865,6 +865,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) hc->plug = ich9_pm_device_plug_cb; hc->unplug_request = ich9_pm_device_unplug_request_cb; hc->unplug = ich9_pm_device_unplug_cb; + hc->is_hotpluggable_bus = ich9_pm_is_hotpluggable_bus; adevc->ospm_status = ich9_pm_ospm_status; adevc->send_event = ich9_send_gpe; adevc->madt_cpu = pc_madt_cpu_entry; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 217a5e639b..abe60b362c 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qemu/error-report.h" +#include "qapi/qapi-commands-cxl.h" #include "hw/mem/memory-device.h" #include "hw/mem/pc-dimm.h" #include "hw/pci/pci.h" @@ -250,6 +251,7 @@ static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val, pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size); pci_default_write_config(pci_dev, addr, val, size); + pcie_aer_write_config(pci_dev, addr, val, size); } /* @@ -322,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); } +static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) +{ + switch (qmp_err) { + case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY: + return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY; + case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY: + return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY; + case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY: + return CXL_RAS_UNC_ERR_CACHE_BE_PARITY; + case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC: + return CXL_RAS_UNC_ERR_CACHE_DATA_ECC; + case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY: + return CXL_RAS_UNC_ERR_MEM_DATA_PARITY; + case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY: + return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY; + case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY: + return CXL_RAS_UNC_ERR_MEM_BE_PARITY; + case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC: + return CXL_RAS_UNC_ERR_MEM_DATA_ECC; + case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD: + return CXL_RAS_UNC_ERR_REINIT_THRESHOLD; + case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING: + return CXL_RAS_UNC_ERR_RSVD_ENCODING; + case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED: + return CXL_RAS_UNC_ERR_POISON_RECEIVED; + case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW: + return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW; + case CXL_UNCOR_ERROR_TYPE_INTERNAL: + return CXL_RAS_UNC_ERR_INTERNAL; + case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX: + return CXL_RAS_UNC_ERR_CXL_IDE_TX; + case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX: + return CXL_RAS_UNC_ERR_CXL_IDE_RX; + default: + return -EINVAL; + } +} + +static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err) +{ + switch (qmp_err) { + case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC: + return CXL_RAS_COR_ERR_CACHE_DATA_ECC; + case CXL_COR_ERROR_TYPE_MEM_DATA_ECC: + return CXL_RAS_COR_ERR_MEM_DATA_ECC; + case CXL_COR_ERROR_TYPE_CRC_THRESHOLD: + return CXL_RAS_COR_ERR_CRC_THRESHOLD; + case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD: + return CXL_RAS_COR_ERR_RETRY_THRESHOLD; + case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED: + return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED; + case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED: + return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED; + case CXL_COR_ERROR_TYPE_PHYSICAL: + return CXL_RAS_COR_ERR_PHYSICAL; + default: + return -EINVAL; + } +} + static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { @@ -340,6 +402,83 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); which_hdm = 0; break; + case A_CXL_RAS_UNC_ERR_STATUS: + { + uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL); + uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER); + CXLError *cxl_err; + uint32_t unc_err; + + /* + * If single bit written that corresponds to the first error + * pointer being cleared, update the status and header log. + */ + if (!QTAILQ_EMPTY(&ct3d->error_list)) { + if ((1 << fe) ^ value) { + CXLError *cxl_next; + /* + * Software is using wrong flow for multiple header recording + * Following behavior in PCIe r6.0 and assuming multiple + * header support. Implementation defined choice to clear all + * matching records if more than one bit set - which corresponds + * closest to behavior of hardware not capable of multiple + * header recording. + */ + QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node, cxl_next) { + if ((1 << cxl_err->type) & value) { + QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node); + g_free(cxl_err); + } + } + } else { + /* Done with previous FE, so drop from list */ + cxl_err = QTAILQ_FIRST(&ct3d->error_list); + QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node); + g_free(cxl_err); + } + + /* + * If there is another FE, then put that in place and update + * the header log + */ + if (!QTAILQ_EMPTY(&ct3d->error_list)) { + uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0]; + int i; + + cxl_err = QTAILQ_FIRST(&ct3d->error_list); + for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) { + stl_le_p(header_log + i, cxl_err->header[i]); + } + capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL, + FIRST_ERROR_POINTER, cxl_err->type); + } else { + /* + * If no more errors, then follow recomendation of PCI spec + * r6.0 6.2.4.2 to set the first error pointer to a status + * bit that will never be used. + */ + capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL, + FIRST_ERROR_POINTER, + CXL_RAS_UNC_ERR_CXL_UNUSED); + } + stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl); + } + unc_err = 0; + QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) { + unc_err |= 1 << cxl_err->type; + } + stl_le_p((uint8_t *)cache_mem + offset, unc_err); + + return; + } + case A_CXL_RAS_COR_ERR_STATUS: + { + uint32_t rw1c = value; + uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset); + temp &= ~rw1c; + stl_le_p((uint8_t *)cache_mem + offset, temp); + return; + } default: break; } @@ -403,6 +542,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) unsigned short msix_num = 1; int i, rc; + QTAILQ_INIT(&ct3d->error_list); + if (!cxl_setup_memory(ct3d, errp)) { return; } @@ -452,8 +593,19 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table; cxl_cstate->cdat.private = ct3d; cxl_doe_cdat_init(cxl_cstate, errp); + + pcie_cap_deverr_init(pci_dev); + /* Leave a bit of room for expansion */ + rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL); + if (rc) { + goto err_release_cdat; + } + return; +err_release_cdat: + cxl_doe_cdat_release(cxl_cstate); + g_free(regs->special_ops); err_address_space_free: address_space_destroy(&ct3d->hostmem_as); return; @@ -465,6 +617,7 @@ static void ct3_exit(PCIDevice *pci_dev) CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; ComponentRegisters *regs = &cxl_cstate->crb; + pcie_aer_exit(pci_dev); cxl_doe_cdat_release(cxl_cstate); g_free(regs->special_ops); address_space_destroy(&ct3d->hostmem_as); @@ -618,6 +771,147 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, */ } +/* For uncorrectable errors include support for multiple header recording */ +void qmp_cxl_inject_uncorrectable_errors(const char *path, + CXLUncorErrorRecordList *errors, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + static PCIEAERErr err = {}; + CXLType3Dev *ct3d; + CXLError *cxl_err; + uint32_t *reg_state; + uint32_t unc_err; + bool first; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + + err.status = PCI_ERR_UNC_INTN; + err.source_id = pci_requester_id(PCI_DEVICE(obj)); + err.flags = 0; + + ct3d = CXL_TYPE3(obj); + + first = QTAILQ_EMPTY(&ct3d->error_list); + reg_state = ct3d->cxl_cstate.crb.cache_mem_registers; + while (errors) { + uint32List *header = errors->value->header; + uint8_t header_count = 0; + int cxl_err_code; + + cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type); + if (cxl_err_code < 0) { + error_setg(errp, "Unknown error code"); + return; + } + + /* If the error is masked, nothing to do here */ + if (!((1 << cxl_err_code) & + ~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) { + errors = errors->next; + continue; + } + + cxl_err = g_malloc0(sizeof(*cxl_err)); + if (!cxl_err) { + return; + } + + cxl_err->type = cxl_err_code; + while (header && header_count < 32) { + cxl_err->header[header_count++] = header->value; + header = header->next; + } + if (header_count > 32) { + error_setg(errp, "Header must be 32 DWORD or less"); + return; + } + QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node); + + errors = errors->next; + } + + if (first && !QTAILQ_EMPTY(&ct3d->error_list)) { + uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; + uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL); + uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0]; + int i; + + cxl_err = QTAILQ_FIRST(&ct3d->error_list); + for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) { + stl_le_p(header_log + i, cxl_err->header[i]); + } + + capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL, + FIRST_ERROR_POINTER, cxl_err->type); + stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl); + } + + unc_err = 0; + QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) { + unc_err |= (1 << cxl_err->type); + } + if (!unc_err) { + return; + } + + stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err); + pcie_aer_inject_error(PCI_DEVICE(obj), &err); + + return; +} + +void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, + Error **errp) +{ + static PCIEAERErr err = {}; + Object *obj = object_resolve_path(path, NULL); + CXLType3Dev *ct3d; + uint32_t *reg_state; + uint32_t cor_err; + int cxl_err_type; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + + err.status = PCI_ERR_COR_INTERNAL; + err.source_id = pci_requester_id(PCI_DEVICE(obj)); + err.flags = PCIE_AER_ERR_IS_CORRECTABLE; + + ct3d = CXL_TYPE3(obj); + reg_state = ct3d->cxl_cstate.crb.cache_mem_registers; + cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS); + + cxl_err_type = ct3d_qmp_cor_err_to_cxl(type); + if (cxl_err_type < 0) { + error_setg(errp, "Invalid COR error"); + return; + } + /* If the error is masked, nothting to do here */ + if (!((1 << cxl_err_type) & ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) { + return; + } + + cor_err |= (1 << cxl_err_type); + stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err); + + pcie_aer_inject_error(PCI_DEVICE(obj), &err); +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c new file mode 100644 index 0000000000..d574c58f9a --- /dev/null +++ b/hw/mem/cxl_type3_stubs.c @@ -0,0 +1,17 @@ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-cxl.h" + +void qmp_cxl_inject_uncorrectable_errors(const char *path, + CXLUncorErrorRecordList *errors, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} + +void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, + Error **errp) +{ + error_setg(errp, "CXL Type 3 support is not compiled in"); +} diff --git a/hw/mem/meson.build b/hw/mem/meson.build index 609b2b36fc..56c2618b84 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -4,6 +4,8 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) +softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c')) +softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c')) softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c index 6664783974..7dfd20aa67 100644 --- a/hw/pci-bridge/cxl_root_port.c +++ b/hw/pci-bridge/cxl_root_port.c @@ -22,6 +22,7 @@ #include "qemu/range.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pcie_port.h" +#include "hw/pci/msi.h" #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "qapi/error.h" @@ -29,6 +30,10 @@ #define CXL_ROOT_PORT_DID 0x7075 +#define CXL_RP_MSI_OFFSET 0x60 +#define CXL_RP_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_MASKBIT +#define CXL_RP_MSI_NR_VECTOR 2 + /* Copied from the gen root port which we derive */ #define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100 #define GEN_PCIE_ROOT_PORT_ACS_OFFSET \ @@ -47,6 +52,49 @@ typedef struct CXLRootPort { #define TYPE_CXL_ROOT_PORT "cxl-rp" DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT) +/* + * If two MSI vector are allocated, Advanced Error Interrupt Message Number + * is 1. otherwise 0. + * 17.12.5.10 RPERRSTS, 32:27 bit Advanced Error Interrupt Message Number. + */ +static uint8_t cxl_rp_aer_vector(const PCIDevice *d) +{ + switch (msi_nr_vectors_allocated(d)) { + case 1: + return 0; + case 2: + return 1; + case 4: + case 8: + case 16: + case 32: + default: + break; + } + abort(); + return 0; +} + +static int cxl_rp_interrupts_init(PCIDevice *d, Error **errp) +{ + int rc; + + rc = msi_init(d, CXL_RP_MSI_OFFSET, CXL_RP_MSI_NR_VECTOR, + CXL_RP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT, + CXL_RP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT, + errp); + if (rc < 0) { + assert(rc == -ENOTSUP); + } + + return rc; +} + +static void cxl_rp_interrupts_uninit(PCIDevice *d) +{ + msi_uninit(d); +} + static void latch_registers(CXLRootPort *crp) { uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers; @@ -183,16 +231,29 @@ static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr, } } +static void cxl_rp_aer_vector_update(PCIDevice *d) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d); + + if (rpc->aer_vector) { + pcie_aer_root_set_vector(d, rpc->aer_vector(d)); + } +} + static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { uint16_t slt_ctl, slt_sta; + uint32_t root_cmd = + pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND); pcie_cap_slot_get(d, &slt_ctl, &slt_sta); pci_bridge_write_config(d, address, val, len); + cxl_rp_aer_vector_update(d); pcie_cap_flr_write_config(d, address, val, len); pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); pcie_aer_write_config(d, address, val, len); + pcie_aer_root_write_config(d, address, val, len, root_cmd); cxl_rp_dvsec_write_config(d, address, val, len); } @@ -217,6 +278,9 @@ static void cxl_root_port_class_init(ObjectClass *oc, void *data) rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET; rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET; + rpc->aer_vector = cxl_rp_aer_vector; + rpc->interrupts_init = cxl_rp_interrupts_init; + rpc->interrupts_uninit = cxl_rp_interrupts_uninit; dc->hotpluggable = false; } diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index e752a21292..ead33f0c05 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -15,6 +15,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pci_host.h" +#include "hw/pci/pcie_port.h" #include "hw/qdev-properties.h" #include "hw/pci/pci_bridge.h" #include "hw/pci-bridge/pci_expander_bridge.h" @@ -79,6 +80,13 @@ CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb) return &host->cxl_cstate; } +bool cxl_get_hb_passthrough(PCIHostState *hb) +{ + CXLHost *host = PXB_CXL_HOST(hb); + + return host->passthrough; +} + static int pxb_bus_num(PCIBus *bus) { PXBDev *pxb = convert_to_pxb(bus->parent_dev); @@ -289,15 +297,32 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) return pin - PCI_SLOT(pxb->devfn); } -static void pxb_dev_reset(DeviceState *dev) +static void pxb_cxl_dev_reset(DeviceState *dev) { CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge; CXLComponentState *cxl_cstate = &cxl->cxl_cstate; + PCIHostState *hb = PCI_HOST_BRIDGE(cxl); uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask; + int dsp_count = 0; cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT); - ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8); + /* + * The CXL specification allows for host bridges with no HDM decoders + * if they only have a single root port. + */ + if (!PXB_DEV(dev)->hdm_for_passthrough) { + dsp_count = pcie_count_ds_ports(hb->bus); + } + /* Initial reset will have 0 dsp so wait until > 0 */ + if (dsp_count == 1) { + cxl->passthrough = true; + /* Set Capability ID in header to NONE */ + ARRAY_FIELD_DP32(reg_state, CXL_HDM_CAPABILITY_HEADER, ID, 0); + } else { + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, + 8); + } } static gint pxb_compare(gconstpointer a, gconstpointer b) @@ -481,9 +506,18 @@ static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp) } pxb_dev_realize_common(dev, CXL, errp); - pxb_dev_reset(DEVICE(dev)); + pxb_cxl_dev_reset(DEVICE(dev)); } +static Property pxb_cxl_dev_properties[] = { + /* Note: 0 is not a legal PXB bus number. */ + DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), + DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), + DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false), + DEFINE_PROP_BOOL("hdm_for_passthrough", PXBDev, hdm_for_passthrough, false), + DEFINE_PROP_END_OF_LIST(), +}; + static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -497,12 +531,12 @@ static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data) */ dc->desc = "CXL Host Bridge"; - device_class_set_props(dc, pxb_dev_properties); + device_class_set_props(dc, pxb_cxl_dev_properties); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); /* Host bridges aren't hotpluggable. FIXME: spec reference */ dc->hotpluggable = false; - dc->reset = pxb_dev_reset; + dc->reset = pxb_cxl_dev_reset; } static const TypeInfo pxb_cxl_dev_info = { diff --git a/hw/pci/pci-internal.h b/hw/pci/pci-internal.h index 2ea356bdf5..a7d6d8a732 100644 --- a/hw/pci/pci-internal.h +++ b/hw/pci/pci-internal.h @@ -20,6 +20,5 @@ void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); int pcie_aer_parse_error_string(const char *error_name, uint32_t *status, bool *correctable); -int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err); #endif diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 034fe49e9a..def5000e7b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -95,6 +95,21 @@ static const VMStateDescription vmstate_pcibus = { } }; +static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data) +{ + return a - b; +} + +static GSequence *pci_acpi_index_list(void) +{ + static GSequence *used_acpi_index_list; + + if (!used_acpi_index_list) { + used_acpi_index_list = g_sequence_new(NULL); + } + return used_acpi_index_list; +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -1246,6 +1261,17 @@ static void pci_qdev_unrealize(DeviceState *dev) do_pci_unregister_device(pci_dev); pci_dev->msi_trigger = NULL; + + /* + * clean up acpi-index so it could reused by another device + */ + if (pci_dev->acpi_index) { + GSequence *used_indexes = pci_acpi_index_list(); + + g_sequence_remove(g_sequence_lookup(used_indexes, + GINT_TO_POINTER(pci_dev->acpi_index), + g_cmp_uint32, NULL)); + } } void pci_register_bar(PCIDevice *pci_dev, int region_num, @@ -2005,6 +2031,8 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) return bus->devices[devfn]; } +#define ONBOARD_INDEX_MAX (16 * 1024 - 1) + static void pci_qdev_realize(DeviceState *qdev, Error **errp) { PCIDevice *pci_dev = (PCIDevice *)qdev; @@ -2014,6 +2042,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) bool is_default_rom; uint16_t class_id; + /* + * capped by systemd (see: udev-builtin-net_id.c) + * as it's the only known user honor it to avoid users + * misconfigure QEMU and then wonder why acpi-index doesn't work + */ + if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) { + error_setg(errp, "acpi-index should be less or equal to %u", + ONBOARD_INDEX_MAX); + return; + } + + /* + * make sure that acpi-index is unique across all present PCI devices + */ + if (pci_dev->acpi_index) { + GSequence *used_indexes = pci_acpi_index_list(); + + if (g_sequence_lookup(used_indexes, + GINT_TO_POINTER(pci_dev->acpi_index), + g_cmp_uint32, NULL)) { + error_setg(errp, "a PCI device with acpi-index = %" PRIu32 + " already exist", pci_dev->acpi_index); + return; + } + g_sequence_insert_sorted(used_indexes, + GINT_TO_POINTER(pci_dev->acpi_index), + g_cmp_uint32, NULL); + } + if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) { error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize); return; diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c index 9a19be44ae..103667c368 100644 --- a/hw/pci/pcie_aer.c +++ b/hw/pci/pcie_aer.c @@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, PCI_ERR_UNC_SUPPORTED); + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, + PCI_ERR_UNC_MASK_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, + PCI_ERR_UNC_SUPPORTED); pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, PCI_ERR_UNC_SEVERITY_DEFAULT); @@ -188,8 +192,16 @@ static void pcie_aer_update_uncor_status(PCIDevice *dev) static bool pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg) { + uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap + + PCI_EXP_DEVCTL); if (!(pcie_aer_msg_is_uncor(msg) && - (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) { + (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) && + !((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) && + (devctl & PCI_EXP_DEVCTL_NFERE)) && + !((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) && + (devctl & PCI_EXP_DEVCTL_CERE)) && + !((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) && + (devctl & PCI_EXP_DEVCTL_FERE))) { return false; } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 65a397ad23..20ff2b39e8 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -161,6 +161,51 @@ PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn) return NULL; } +/* Find first port in devfn number order */ +PCIDevice *pcie_find_port_first(PCIBus *bus) +{ + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + + if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + return d; + } + } + + return NULL; +} + +int pcie_count_ds_ports(PCIBus *bus) +{ + int dsp_count = 0; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + dsp_count++; + } + } + return dsp_count; +} + +static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler, + BusState *bus) +{ + PCIESlot *s = PCIE_SLOT(bus->parent); + return s->hotplug; +} + static const TypeInfo pcie_port_type_info = { .name = TYPE_PCIE_PORT, .parent = TYPE_PCI_BRIDGE, @@ -188,6 +233,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data) hc->plug = pcie_cap_slot_plug_cb; hc->unplug = pcie_cap_slot_unplug_cb; hc->unplug_request = pcie_cap_slot_unplug_request_cb; + hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus; } static const TypeInfo pcie_slot_type_info = { diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index a87c5f39a2..8f8d05cf9b 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -50,6 +50,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p" vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +vhost_vdpa_suspend(void *dev) "dev: %p" vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d" vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p" vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64 diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 515ccf870d..8361e70d1b 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -694,13 +694,17 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) g_autofree VirtQueueElement *elem = NULL; elem = g_steal_pointer(&svq->desc_state[i].elem); if (elem) { - virtqueue_detach_element(svq->vq, elem, 0); + /* + * TODO: This is ok for networking, but other kinds of devices + * might have problems with just unpop these. + */ + virtqueue_unpop(svq->vq, elem, 0); } } next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); if (next_avail_elem) { - virtqueue_detach_element(svq->vq, next_avail_elem, 0); + virtqueue_unpop(svq->vq, next_avail_elem, 0); } svq->vq = NULL; g_free(svq->desc_next); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 8968541514..e5285df4ba 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2031,8 +2031,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } else { if (virtio_has_feature(protocol_features, VHOST_USER_PROTOCOL_F_CONFIG)) { - warn_reportf_err(*errp, "vhost-user backend supports " - "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); + warn_report("vhost-user backend supports " + "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); } } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index df3a1e92ac..bc6bad23d5 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -431,16 +431,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) trace_vhost_vdpa_init(dev, opaque); int ret; - /* - * Similar to VFIO, we end up pinning all guest memory and have to - * disable discarding of RAM. - */ - ret = ram_block_discard_disable(true); - if (ret) { - error_report("Cannot set discarding of RAM broken"); - return ret; - } - v = opaque; v->dev = dev; dev->opaque = opaque ; @@ -448,10 +438,36 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->msg_type = VHOST_IOTLB_MSG_V2; vhost_vdpa_init_svq(dev, v); + error_propagate(&dev->migration_blocker, v->migration_blocker); if (!vhost_vdpa_first_dev(dev)) { return 0; } + /* + * If dev->shadow_vqs_enabled at initialization that means the device has + * been started with x-svq=on, so don't block migration + */ + if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { + /* We don't have dev->features yet */ + uint64_t features; + ret = vhost_vdpa_get_dev_features(dev, &features); + if (unlikely(ret)) { + error_setg_errno(errp, -ret, "Could not get device features"); + return ret; + } + vhost_svq_valid_features(features, &dev->migration_blocker); + } + + /* + * Similar to VFIO, we end up pinning all guest memory and have to + * disable discarding of RAM. + */ + ret = ram_block_discard_disable(true); + if (ret) { + error_report("Cannot set discarding of RAM broken"); + return ret; + } + vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); @@ -577,12 +593,15 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev) assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); v = dev->opaque; trace_vhost_vdpa_cleanup(dev, v); + if (vhost_vdpa_first_dev(dev)) { + ram_block_discard_disable(false); + } + vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); memory_listener_unregister(&v->listener); vhost_vdpa_svq_cleanup(dev); dev->opaque = NULL; - ram_block_discard_disable(false); return 0; } @@ -659,7 +678,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) uint64_t features; uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | - 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | + 0x1ULL << VHOST_BACKEND_F_SUSPEND; int r; if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { @@ -691,11 +711,13 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, static int vhost_vdpa_reset_device(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; int ret; uint8_t status = 0; ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev, status); + v->suspended = false; return ret; } @@ -1094,6 +1116,29 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) } } +static void vhost_vdpa_suspend(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + int r; + + if (!vhost_vdpa_first_dev(dev)) { + return; + } + + if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { + trace_vhost_vdpa_suspend(dev); + r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); + if (unlikely(r)) { + error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); + } else { + v->suspended = true; + return; + } + } + + vhost_vdpa_reset_device(dev); +} + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) { struct vhost_vdpa *v = dev->opaque; @@ -1108,6 +1153,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) } vhost_vdpa_set_vring_ready(dev); } else { + vhost_vdpa_suspend(dev); vhost_vdpa_svqs_stop(dev); vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } @@ -1119,14 +1165,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) if (started) { memory_listener_register(&v->listener, &address_space_memory); return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); - } else { - vhost_vdpa_reset_device(dev); - vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | - VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->listener); + } - return 0; + return 0; +} + +static void vhost_vdpa_reset_status(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return; } + + vhost_vdpa_reset_device(dev); + vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER); + memory_listener_unregister(&v->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1169,18 +1224,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { struct vhost_vdpa *v = dev->opaque; - VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); - /* - * vhost-vdpa devices does not support in-flight requests. Set all of them - * as available. - * - * TODO: This is ok for networking, but other kinds of devices might - * have problems with these retransmissions. - */ - while (virtqueue_rewind(vq, 1)) { - continue; - } if (v->shadow_vqs_enabled) { /* * Device vring base was set at device start. SVQ base is handled by @@ -1203,6 +1247,14 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, return 0; } + if (!v->suspended) { + /* + * Cannot trust in value returned by device, let vhost recover used + * idx from guest. + */ + return -1; + } + ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); return ret; @@ -1227,25 +1279,24 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, struct vhost_vring_file *file) { struct vhost_vdpa *v = dev->opaque; + int vdpa_idx = file->index - dev->vq_index; + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + /* Remember last call fd because we can switch to SVQ anytime. */ + vhost_svq_set_svq_call_fd(svq, file->fd); if (v->shadow_vqs_enabled) { - int vdpa_idx = file->index - dev->vq_index; - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); - - vhost_svq_set_svq_call_fd(svq, file->fd); return 0; - } else { - return vhost_vdpa_set_vring_dev_call(dev, file); } + + return vhost_vdpa_set_vring_dev_call(dev, file); } static int vhost_vdpa_get_features(struct vhost_dev *dev, uint64_t *features) { - struct vhost_vdpa *v = dev->opaque; int ret = vhost_vdpa_get_dev_features(dev, features); - if (ret == 0 && v->shadow_vqs_enabled) { + if (ret == 0) { /* Add SVQ logging capabilities */ *features |= BIT_ULL(VHOST_F_LOG_ALL); } @@ -1313,4 +1364,5 @@ const VhostOps vdpa_ops = { .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, .vhost_force_iommu = vhost_vdpa_force_iommu, .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index eb8c4c378c..a266396576 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->vqs + i, hdev->vq_index + i); } + if (hdev->vhost_ops->vhost_reset_status) { + hdev->vhost_ops->vhost_reset_status(hdev); + } if (vhost_dev_has_iommu(hdev)) { if (hdev->vhost_ops->vhost_set_iotlb_callback) { diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 516425e26a..802e1b9659 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -462,7 +462,7 @@ static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq, req->in_iov = NULL; req->in_num = 0; req->in_len = 0; - req->flags = CRYPTODEV_BACKEND_ALG__MAX; + req->flags = QCRYPTODEV_BACKEND_ALG__MAX; memset(&req->op_info, 0x00, sizeof(req->op_info)); } @@ -472,7 +472,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req) return; } - if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { + if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) { size_t max_len; CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info; @@ -485,7 +485,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req) /* Zeroize and free request data structure */ memset(op_info, 0, sizeof(*op_info) + max_len); g_free(op_info); - } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) { + } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) { CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info; if (op_info) { g_free(op_info->src); @@ -570,10 +570,10 @@ static void virtio_crypto_req_complete(void *opaque, int ret) VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); uint8_t status = -ret; - if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { + if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) { virtio_crypto_sym_input_data_helper(vdev, req, status, req->op_info.u.sym_op_info); - } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) { + } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) { virtio_crypto_akcipher_input_data_helper(vdev, req, status, req->op_info.u.asym_op_info); } @@ -871,11 +871,14 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) opcode = ldl_le_p(&req.header.opcode); op_info->session_id = ldq_le_p(&req.header.session_id); op_info->op_code = opcode; + op_info->queue_index = queue_index; + op_info->cb = virtio_crypto_req_complete; + op_info->opaque = request; switch (opcode) { case VIRTIO_CRYPTO_CIPHER_ENCRYPT: case VIRTIO_CRYPTO_CIPHER_DECRYPT: - op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_SYM; + op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_SYM; ret = virtio_crypto_handle_sym_req(vcrypto, &req.u.sym_req, op_info, out_iov, out_num); @@ -885,7 +888,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) case VIRTIO_CRYPTO_AKCIPHER_DECRYPT: case VIRTIO_CRYPTO_AKCIPHER_SIGN: case VIRTIO_CRYPTO_AKCIPHER_VERIFY: - op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_ASYM; + op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_ASYM; ret = virtio_crypto_handle_asym_req(vcrypto, &req.u.akcipher_req, op_info, out_iov, out_num); @@ -898,9 +901,7 @@ check_result: virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP); } else { ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev, - request, queue_index, - virtio_crypto_req_complete, - request); + op_info); if (ret < 0) { virtio_crypto_req_complete(request, ret); } @@ -997,12 +998,35 @@ static void virtio_crypto_reset(VirtIODevice *vdev) } } +static uint32_t virtio_crypto_init_services(uint32_t qservices) +{ + uint32_t vservices = 0; + + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_CIPHER)) { + vservices |= (1 << VIRTIO_CRYPTO_SERVICE_CIPHER); + } + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_HASH)) { + vservices |= (1 << VIRTIO_CRYPTO_SERVICE_HASH); + } + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_MAC)) { + vservices |= (1 << VIRTIO_CRYPTO_SERVICE_MAC); + } + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AEAD)) { + vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AEAD); + } + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER)) { + vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AKCIPHER); + } + + return vservices; +} + static void virtio_crypto_init_config(VirtIODevice *vdev) { VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); - vcrypto->conf.crypto_services = - vcrypto->conf.cryptodev->conf.crypto_services; + vcrypto->conf.crypto_services = virtio_crypto_init_services( + vcrypto->conf.cryptodev->conf.crypto_services); vcrypto->conf.cipher_algo_l = vcrypto->conf.cryptodev->conf.cipher_algo_l; vcrypto->conf.cipher_algo_h = diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f35178f5fc..98c4819fcc 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1069,7 +1069,7 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, VRingMemoryRegionCaches *caches) { VirtIODevice *vdev = vq->vdev; - unsigned int max, idx; + unsigned int idx; unsigned int total_bufs, in_total, out_total; MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; int64_t len = 0; @@ -1078,13 +1078,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; - max = vq->vring.num; - while ((rc = virtqueue_num_heads(vq, idx)) > 0) { MemoryRegionCache *desc_cache = &caches->desc; unsigned int num_bufs; VRingDesc desc; unsigned int i; + unsigned int max = vq->vring.num; num_bufs = total_bufs; @@ -1206,7 +1205,7 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, VRingMemoryRegionCaches *caches) { VirtIODevice *vdev = vq->vdev; - unsigned int max, idx; + unsigned int idx; unsigned int total_bufs, in_total, out_total; MemoryRegionCache *desc_cache; MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; @@ -1218,14 +1217,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, wrap_counter = vq->last_avail_wrap_counter; total_bufs = in_total = out_total = 0; - max = vq->vring.num; - for (;;) { unsigned int num_bufs = total_bufs; unsigned int i = idx; int rc; + unsigned int max = vq->vring.num; desc_cache = &caches->desc; + vring_packed_desc_read(vdev, &desc, desc_cache, idx, true); if (!is_desc_avail(desc.flags, wrap_counter)) { break; diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index 57a542c4b8..2faf7f0cae 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -87,6 +87,7 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); +bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus); void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list); #endif /* HW_ACPI_ICH9_H */ diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index 7e268c2c9c..ef59810c17 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -49,15 +49,16 @@ typedef struct AcpiPciHpState { uint32_t acpi_index; PCIBus *root; MemoryRegion io; - bool legacy_piix; uint16_t io_base; uint16_t io_len; + bool use_acpi_hotplug_bridge; + bool use_acpi_root_pci_hotplug; } AcpiPciHpState; void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, - MemoryRegion *address_space_io, bool bridges_enabled, - uint16_t io_base); + MemoryRegion *address_space_io, uint16_t io_base); +bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus); void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, @@ -69,7 +70,9 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, Error **errp); /* Called on reset */ -void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off); +void acpi_pcihp_reset(AcpiPciHpState *s); + +void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus); extern const VMStateDescription vmstate_acpi_pcihp_pci_status; diff --git a/include/hw/acpi/piix4.h b/include/hw/acpi/piix4.h index be1f8ea80e..eb1c122d80 100644 --- a/include/hw/acpi/piix4.h +++ b/include/hw/acpi/piix4.h @@ -57,8 +57,6 @@ struct PIIX4PMState { Notifier powerdown_notifier; AcpiPciHpState acpi_pci_hotplug; - bool use_acpi_hotplug_bridge; - bool use_acpi_root_pci_hotplug; bool not_migrate_acpi_index; uint8_t disable_s3; diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h index b161be59b7..b2cffbb364 100644 --- a/include/hw/cxl/cxl.h +++ b/include/hw/cxl/cxl.h @@ -49,6 +49,7 @@ struct CXLHost { PCIHostState parent_obj; CXLComponentState cxl_cstate; + bool passthrough; }; #define TYPE_PXB_CXL_HOST "pxb-cxl-host" diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 692d7a5507..42c7e581a7 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -65,11 +65,37 @@ CXLx_CAPABILITY_HEADER(SNOOP, 0x14) #define CXL_RAS_REGISTERS_OFFSET 0x80 #define CXL_RAS_REGISTERS_SIZE 0x58 REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET) +#define CXL_RAS_UNC_ERR_CACHE_DATA_PARITY 0 +#define CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY 1 +#define CXL_RAS_UNC_ERR_CACHE_BE_PARITY 2 +#define CXL_RAS_UNC_ERR_CACHE_DATA_ECC 3 +#define CXL_RAS_UNC_ERR_MEM_DATA_PARITY 4 +#define CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY 5 +#define CXL_RAS_UNC_ERR_MEM_BE_PARITY 6 +#define CXL_RAS_UNC_ERR_MEM_DATA_ECC 7 +#define CXL_RAS_UNC_ERR_REINIT_THRESHOLD 8 +#define CXL_RAS_UNC_ERR_RSVD_ENCODING 9 +#define CXL_RAS_UNC_ERR_POISON_RECEIVED 10 +#define CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW 11 +#define CXL_RAS_UNC_ERR_INTERNAL 14 +#define CXL_RAS_UNC_ERR_CXL_IDE_TX 15 +#define CXL_RAS_UNC_ERR_CXL_IDE_RX 16 +#define CXL_RAS_UNC_ERR_CXL_UNUSED 63 /* Magic value */ REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4) REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8) REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc) +#define CXL_RAS_COR_ERR_CACHE_DATA_ECC 0 +#define CXL_RAS_COR_ERR_MEM_DATA_ECC 1 +#define CXL_RAS_COR_ERR_CRC_THRESHOLD 2 +#define CXL_RAS_COR_ERR_RETRY_THRESHOLD 3 +#define CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED 4 +#define CXL_RAS_COR_ERR_MEM_POISON_RECEIVED 5 +#define CXL_RAS_COR_ERR_PHYSICAL 6 REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10) REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14) + FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6) +REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18) +#define CXL_RAS_ERR_HEADER_NUM 32 /* Offset 0x18 - 0x58 reserved for RAS logs */ /* 8.2.5.10 - CXL Security Capability Structure */ @@ -221,6 +247,7 @@ static inline hwaddr cxl_decode_ig(int ig) } CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb); +bool cxl_get_hb_passthrough(PCIHostState *hb); void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp); void cxl_doe_cdat_release(CXLComponentState *cxl_cstate); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 7e5ad65c1d..d589f78202 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -232,6 +232,14 @@ REG64(CXL_MEM_DEV_STS, 0) FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1) FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3) +typedef struct CXLError { + QTAILQ_ENTRY(CXLError) node; + int type; /* Error code as per FE definition */ + uint32_t header[32]; +} CXLError; + +typedef QTAILQ_HEAD(, CXLError) CXLErrorList; + struct CXLType3Dev { /* Private */ PCIDevice parent_obj; @@ -248,6 +256,9 @@ struct CXLType3Dev { /* DOE */ DOECap doe_cdat; + + /* Error injection */ + CXLErrorList error_list; }; #define TYPE_CXL_TYPE3 "cxl-type3" diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h index e15f59c8b3..a9840ed485 100644 --- a/include/hw/hotplug.h +++ b/include/hw/hotplug.h @@ -48,6 +48,7 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler, * @unplug: unplug callback. * Used for device removal with devices that implement * asynchronous and synchronous (surprise) removal. + * @is_hotpluggable_bus: called to check if bus/its parent allow hotplug on bus */ struct HotplugHandlerClass { /* <private> */ @@ -58,6 +59,7 @@ struct HotplugHandlerClass { hotplug_fn plug; hotplug_fn unplug_request; hotplug_fn unplug; + bool (*is_hotpluggable_bus)(HotplugHandler *plug_handler, BusState *bus); }; /** diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 63a7521567..81a058bb2c 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -92,6 +92,7 @@ struct PXBDev { uint8_t bus_nr; uint16_t numa_node; bool bypass_iommu; + bool hdm_for_passthrough; struct cxl_dev { CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */ } cxl; diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h index 65e71d98fe..1234fdc4e2 100644 --- a/include/hw/pci/pcie_aer.h +++ b/include/hw/pci/pcie_aer.h @@ -100,4 +100,5 @@ void pcie_aer_root_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len, uint32_t root_cmd_prev); +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err); #endif /* QEMU_PCIE_AER_H */ diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index 6c40e3733f..90e6cf45b8 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -41,6 +41,8 @@ struct PCIEPort { void pcie_port_init_reg(PCIDevice *d); PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn); +PCIDevice *pcie_find_port_first(PCIBus *bus); +int pcie_count_ds_ports(PCIBus *bus); #define TYPE_PCIE_SLOT "pcie-slot" OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT) diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 1fe0bdd25b..4972106c42 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -141,6 +141,9 @@ typedef enum PCIExpLinkWidth { PCI_ERR_UNC_ATOP_EBLOCKED | \ PCI_ERR_UNC_TLP_PRF_BLOCKED) +#define PCI_ERR_UNC_MASK_DEFAULT (PCI_ERR_UNC_INTN | \ + PCI_ERR_UNC_TLP_PRF_BLOCKED) + #define PCI_ERR_UNC_SEVERITY_DEFAULT (PCI_ERR_UNC_DLP | \ PCI_ERR_UNC_SDN | \ PCI_ERR_UNC_FCP | \ diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index f5b3b2f89a..bd50ad5ee1 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -812,7 +812,18 @@ void qbus_set_bus_hotplug_handler(BusState *bus); static inline bool qbus_is_hotpluggable(BusState *bus) { - return bus->hotplug_handler; + HotplugHandler *plug_handler = bus->hotplug_handler; + bool ret = !!plug_handler; + + if (plug_handler) { + HotplugHandlerClass *hdc; + + hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler); + if (hdc->is_hotpluggable_bus) { + ret = hdc->is_hotpluggable_bus(plug_handler, bus); + } + } + return ret; } /** diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index c5ab49051e..ec3fbae58d 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, int fd); + +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev); + typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -177,6 +180,7 @@ typedef struct VhostOps { vhost_get_device_id_op vhost_get_device_id; vhost_force_iommu_op vhost_force_iommu; vhost_set_config_call_op vhost_set_config_call; + vhost_reset_status_op vhost_reset_status; } VhostOps; int vhost_backend_update_device_iotlb(struct vhost_dev *dev, diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 7997f09a8d..c278a2a8de 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -42,12 +42,15 @@ typedef struct vhost_vdpa { bool shadow_vqs_enabled; /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ bool shadow_data; + /* Device suspended successfully */ + bool suspended; /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; GPtrArray *shadow_vqs; const VhostShadowVirtqueueOps *shadow_vq_ops; void *shadow_vq_ops_opaque; struct vhost_dev *dev; + Error *migration_blocker; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; } VhostVDPA; diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h index efae6b06bc..fdb69b7f9c 100644 --- a/include/monitor/hmp.h +++ b/include/monitor/hmp.h @@ -180,5 +180,6 @@ void hmp_ioport_read(Monitor *mon, const QDict *qdict); void hmp_ioport_write(Monitor *mon, const QDict *qdict); void hmp_boot_set(Monitor *mon, const QDict *qdict); void hmp_info_mtree(Monitor *mon, const QDict *qdict); +void hmp_info_cryptodev(Monitor *mon, const QDict *qdict); #endif diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h index cf9b3f07fe..bc021ce847 100644 --- a/include/sysemu/cryptodev.h +++ b/include/sysemu/cryptodev.h @@ -24,7 +24,9 @@ #define CRYPTODEV_H #include "qemu/queue.h" +#include "qemu/throttle.h" #include "qom/object.h" +#include "qapi/qapi-types-cryptodev.h" /** * CryptoDevBackend: @@ -48,12 +50,6 @@ typedef struct CryptoDevBackendPeers CryptoDevBackendPeers; typedef struct CryptoDevBackendClient CryptoDevBackendClient; -enum CryptoDevBackendAlgType { - CRYPTODEV_BACKEND_ALG_SYM, - CRYPTODEV_BACKEND_ALG_ASYM, - CRYPTODEV_BACKEND_ALG__MAX, -}; - /** * CryptoDevBackendSymSessionInfo: * @@ -179,17 +175,22 @@ typedef struct CryptoDevBackendAsymOpInfo { uint8_t *dst; } CryptoDevBackendAsymOpInfo; +typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret); + typedef struct CryptoDevBackendOpInfo { - enum CryptoDevBackendAlgType algtype; + QCryptodevBackendAlgType algtype; uint32_t op_code; + uint32_t queue_index; + CryptoDevCompletionFunc cb; + void *opaque; /* argument for cb */ uint64_t session_id; union { CryptoDevBackendSymOpInfo *sym_op_info; CryptoDevBackendAsymOpInfo *asym_op_info; } u; + QTAILQ_ENTRY(CryptoDevBackendOpInfo) next; } CryptoDevBackendOpInfo; -typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret); struct CryptoDevBackendClass { ObjectClass parent_class; @@ -209,24 +210,11 @@ struct CryptoDevBackendClass { void *opaque); int (*do_op)(CryptoDevBackend *backend, - CryptoDevBackendOpInfo *op_info, - uint32_t queue_index, - CryptoDevCompletionFunc cb, - void *opaque); + CryptoDevBackendOpInfo *op_info); }; -typedef enum CryptoDevBackendOptionsType { - CRYPTODEV_BACKEND_TYPE_NONE = 0, - CRYPTODEV_BACKEND_TYPE_BUILTIN = 1, - CRYPTODEV_BACKEND_TYPE_VHOST_USER = 2, - CRYPTODEV_BACKEND_TYPE_LKCF = 3, - CRYPTODEV_BACKEND_TYPE__MAX, -} CryptoDevBackendOptionsType; - struct CryptoDevBackendClient { - CryptoDevBackendOptionsType type; - char *model; - char *name; + QCryptodevBackendType type; char *info_str; unsigned int queue_index; int vring_enable; @@ -260,6 +248,24 @@ struct CryptoDevBackendConf { uint64_t max_size; }; +typedef struct CryptodevBackendSymStat { + int64_t encrypt_ops; + int64_t decrypt_ops; + int64_t encrypt_bytes; + int64_t decrypt_bytes; +} CryptodevBackendSymStat; + +typedef struct CryptodevBackendAsymStat { + int64_t encrypt_ops; + int64_t decrypt_ops; + int64_t sign_ops; + int64_t verify_ops; + int64_t encrypt_bytes; + int64_t decrypt_bytes; + int64_t sign_bytes; + int64_t verify_bytes; +} CryptodevBackendAsymStat; + struct CryptoDevBackend { Object parent_obj; @@ -267,15 +273,48 @@ struct CryptoDevBackend { /* Tag the cryptodev backend is used by virtio-crypto or not */ bool is_used; CryptoDevBackendConf conf; + CryptodevBackendSymStat *sym_stat; + CryptodevBackendAsymStat *asym_stat; + + ThrottleState ts; + ThrottleTimers tt; + ThrottleConfig tc; + QTAILQ_HEAD(, CryptoDevBackendOpInfo) opinfos; }; +#define CryptodevSymStatInc(be, op, bytes) do { \ + be->sym_stat->op##_bytes += (bytes); \ + be->sym_stat->op##_ops += 1; \ +} while (/*CONSTCOND*/0) + +#define CryptodevSymStatIncEncrypt(be, bytes) \ + CryptodevSymStatInc(be, encrypt, bytes) + +#define CryptodevSymStatIncDecrypt(be, bytes) \ + CryptodevSymStatInc(be, decrypt, bytes) + +#define CryptodevAsymStatInc(be, op, bytes) do { \ + be->asym_stat->op##_bytes += (bytes); \ + be->asym_stat->op##_ops += 1; \ +} while (/*CONSTCOND*/0) + +#define CryptodevAsymStatIncEncrypt(be, bytes) \ + CryptodevAsymStatInc(be, encrypt, bytes) + +#define CryptodevAsymStatIncDecrypt(be, bytes) \ + CryptodevAsymStatInc(be, decrypt, bytes) + +#define CryptodevAsymStatIncSign(be, bytes) \ + CryptodevAsymStatInc(be, sign, bytes) + +#define CryptodevAsymStatIncVerify(be, bytes) \ + CryptodevAsymStatInc(be, verify, bytes) + + /** * cryptodev_backend_new_client: - * @model: the cryptodev backend model - * @name: the cryptodev backend name, can be NULL * - * Creates a new cryptodev backend client object - * with the @name in the model @model. + * Creates a new cryptodev backend client object. * * The returned object must be released with * cryptodev_backend_free_client() when no @@ -283,9 +322,8 @@ struct CryptoDevBackend { * * Returns: a new cryptodev backend client object */ -CryptoDevBackendClient * -cryptodev_backend_new_client(const char *model, - const char *name); +CryptoDevBackendClient *cryptodev_backend_new_client(void); + /** * cryptodev_backend_free_client: * @cc: the cryptodev backend client object @@ -354,24 +392,17 @@ int cryptodev_backend_close_session( /** * cryptodev_backend_crypto_operation: * @backend: the cryptodev backend object - * @opaque1: pointer to a VirtIOCryptoReq object - * @queue_index: queue index of cryptodev backend client - * @errp: pointer to a NULL-initialized error object - * @cb: callbacks when operation is completed - * @opaque2: parameter passed to cb + * @op_info: pointer to a CryptoDevBackendOpInfo object * - * Do crypto operation, such as encryption and - * decryption + * Do crypto operation, such as encryption, decryption, signature and + * verification * * Returns: 0 for success and cb will be called when creation is completed, * negative value for error, and cb will not be called. */ int cryptodev_backend_crypto_operation( CryptoDevBackend *backend, - void *opaque1, - uint32_t queue_index, - CryptoDevCompletionFunc cb, - void *opaque2); + CryptoDevBackendOpInfo *op_info); /** * cryptodev_backend_set_used: diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index de5ed8ff22..99904a0da7 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -26,12 +26,15 @@ #include <err.h> #include "standard-headers/linux/virtio_net.h" #include "monitor/monitor.h" +#include "migration/migration.h" +#include "migration/misc.h" #include "hw/virtio/vhost.h" /* Todo:need to add the multiqueue support here */ typedef struct VhostVDPAState { NetClientState nc; struct vhost_vdpa vhost_vdpa; + Notifier migration_state; VHostNetState *vhost_net; /* Control commands shadow buffers */ @@ -98,6 +101,8 @@ static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | + /* VHOST_F_LOG_ALL is exposed by SVQ */ + BIT_ULL(VHOST_F_LOG_ALL) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY); @@ -178,13 +183,9 @@ err_init: static void vhost_vdpa_cleanup(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_dev *dev = &s->vhost_net->dev; qemu_vfree(s->cvq_cmd_out_buffer); qemu_vfree(s->status); - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); - } if (s->vhost_net) { vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); @@ -234,10 +235,126 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, return size; } +/** From any vdpa net client, get the netclient of the first queue pair */ +static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) +{ + NICState *nic = qemu_get_nic(s->nc.peer); + NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); + + return DO_UPCAST(VhostVDPAState, nc, nc0); +} + +static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable) +{ + struct vhost_vdpa *v = &s->vhost_vdpa; + VirtIONet *n; + VirtIODevice *vdev; + int data_queue_pairs, cvq, r; + + /* We are only called on the first data vqs and only if x-svq is not set */ + if (s->vhost_vdpa.shadow_vqs_enabled == enable) { + return; + } + + vdev = v->dev->vdev; + n = VIRTIO_NET(vdev); + if (!n->vhost_started) { + return; + } + + data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; + cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? + n->max_ncs - n->max_queue_pairs : 0; + /* + * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter + * in the future and resume the device if read-only operations between + * suspend and reset goes wrong. + */ + vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq); + + /* Start will check migration setup_or_active to configure or not SVQ */ + r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq); + if (unlikely(r < 0)) { + error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r); + } +} + +static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data) +{ + MigrationState *migration = data; + VhostVDPAState *s = container_of(notifier, VhostVDPAState, + migration_state); + + if (migration_in_setup(migration)) { + vhost_vdpa_net_log_global_enable(s, true); + } else if (migration_has_failed(migration)) { + vhost_vdpa_net_log_global_enable(s, false); + } +} + +static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) +{ + struct vhost_vdpa *v = &s->vhost_vdpa; + + add_migration_state_change_notifier(&s->migration_state); + if (v->shadow_vqs_enabled) { + v->iova_tree = vhost_iova_tree_new(v->iova_range.first, + v->iova_range.last); + } +} + +static int vhost_vdpa_net_data_start(NetClientState *nc) +{ + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_vdpa *v = &s->vhost_vdpa; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + if (s->always_svq || + migration_is_setup_or_active(migrate_get_current()->state)) { + v->shadow_vqs_enabled = true; + v->shadow_data = true; + } else { + v->shadow_vqs_enabled = false; + v->shadow_data = false; + } + + if (v->index == 0) { + vhost_vdpa_net_data_start_first(s); + return 0; + } + + if (v->shadow_vqs_enabled) { + VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); + v->iova_tree = s0->vhost_vdpa.iova_tree; + } + + return 0; +} + +static void vhost_vdpa_net_client_stop(NetClientState *nc) +{ + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_dev *dev; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + if (s->vhost_vdpa.index == 0) { + remove_migration_state_change_notifier(&s->migration_state); + } + + dev = s->vhost_vdpa.dev; + if (dev->vq_index + dev->nvqs == dev->vq_index_end) { + g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); + } +} + static NetClientInfo net_vhost_vdpa_info = { .type = NET_CLIENT_DRIVER_VHOST_VDPA, .size = sizeof(VhostVDPAState), .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_data_start, + .stop = vhost_vdpa_net_client_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, .has_ufo = vhost_vdpa_has_ufo, @@ -351,7 +468,7 @@ dma_map_err: static int vhost_vdpa_net_cvq_start(NetClientState *nc) { - VhostVDPAState *s; + VhostVDPAState *s, *s0; struct vhost_vdpa *v; uint64_t backend_features; int64_t cvq_group; @@ -362,11 +479,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) s = DO_UPCAST(VhostVDPAState, nc, nc); v = &s->vhost_vdpa; - v->shadow_data = s->always_svq; + s0 = vhost_vdpa_net_first_nc_vdpa(s); + v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; v->shadow_vqs_enabled = s->always_svq; s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; - if (s->always_svq) { + if (s->vhost_vdpa.shadow_data) { /* SVQ is already configured for all virtqueues */ goto out; } @@ -415,8 +533,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) return r; } - v->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); v->shadow_vqs_enabled = true; s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; @@ -425,6 +541,26 @@ out: return 0; } + if (s0->vhost_vdpa.iova_tree) { + /* + * SVQ is already configured for all virtqueues. Reuse IOVA tree for + * simplicity, whether CVQ shares ASID with guest or not, because: + * - Memory listener need access to guest's memory addresses allocated + * in the IOVA tree. + * - There should be plenty of IOVA address space for both ASID not to + * worry about collisions between them. Guest's translations are + * still validated with virtio virtqueue_pop so there is no risk for + * the guest to access memory that it shouldn't. + * + * To allocate a iova tree per ASID is doable but it complicates the + * code and it is not worth it for the moment. + */ + v->iova_tree = s0->vhost_vdpa.iova_tree; + } else { + v->iova_tree = vhost_iova_tree_new(v->iova_range.first, + v->iova_range.last); + } + r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len(), false); if (unlikely(r < 0)) { @@ -449,15 +585,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) if (s->vhost_vdpa.shadow_vqs_enabled) { vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); - if (!s->always_svq) { - /* - * If only the CVQ is shadowed we can delete this safely. - * If all the VQs are shadows this will be needed by the time the - * device is started again to register SVQ vrings and similar. - */ - g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); - } } + + vhost_vdpa_net_client_stop(nc); } static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, @@ -668,7 +798,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool is_datapath, bool svq, struct vhost_vdpa_iova_range iova_range, - VhostIOVATree *iova_tree) + uint64_t features) { NetClientState *nc = NULL; VhostVDPAState *s; @@ -687,11 +817,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.device_fd = vdpa_device_fd; s->vhost_vdpa.index = queue_pair_index; s->always_svq = svq; + s->migration_state.notify = vdpa_net_migration_state_notifier; s->vhost_vdpa.shadow_vqs_enabled = svq; s->vhost_vdpa.iova_range = iova_range; s->vhost_vdpa.shadow_data = svq; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { + if (queue_pair_index == 0) { + vhost_vdpa_net_valid_svq_features(features, + &s->vhost_vdpa.migration_blocker); + } else if (!is_datapath) { s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), vhost_vdpa_net_cvq_cmd_page_len()); memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); @@ -701,6 +834,15 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; s->vhost_vdpa.shadow_vq_ops_opaque = s; + + /* + * TODO: We cannot migrate devices with CVQ as there is no way to set + * the device state (MAC, MQ, etc) before starting the datapath. + * + * Migration blocker ownership now belongs to s->vhost_vdpa. + */ + error_setg(&s->vhost_vdpa.migration_blocker, + "net vdpa cannot migrate with CVQ feature"); } ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { @@ -760,7 +902,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, uint64_t features; int vdpa_device_fd; g_autofree NetClientState **ncs = NULL; - g_autoptr(VhostIOVATree) iova_tree = NULL; struct vhost_vdpa_iova_range iova_range; NetClientState *nc; int queue_pairs, r, i = 0, has_cvq = 0; @@ -812,12 +953,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, goto err; } - if (opts->x_svq) { - if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - - iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); + if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { + goto err; } ncs = g_malloc0(sizeof(*ncs) * queue_pairs); @@ -825,7 +962,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, for (i = 0; i < queue_pairs; i++) { ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, iova_tree); + iova_range, features); if (!ncs[i]) goto err; } @@ -833,13 +970,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, if (has_cvq) { nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, iova_tree); + opts->x_svq, iova_range, features); if (!nc) goto err; } - /* iova_tree ownership belongs to last NetClientState */ - g_steal_pointer(&iova_tree); return 0; err: @@ -849,7 +984,6 @@ err: } } -err_svq: qemu_close(vdpa_device_fd); return -1; diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json new file mode 100644 index 0000000000..f33f96a692 --- /dev/null +++ b/qapi/cryptodev.json @@ -0,0 +1,89 @@ +# -*- Mode: Python -*- +# vim: filetype=python +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +## +# @QCryptodevBackendAlgType: +# +# The supported algorithm types of a crypto device. +# +# @sym: symmetric encryption +# @asym: asymmetric Encryption +# +# Since: 8.0 +## +{ 'enum': 'QCryptodevBackendAlgType', + 'prefix': 'QCRYPTODEV_BACKEND_ALG', + 'data': ['sym', 'asym']} + +## +# @QCryptodevBackendServiceType: +# +# The supported service types of a crypto device. +# +# Since: 8.0 +## +{ 'enum': 'QCryptodevBackendServiceType', + 'prefix': 'QCRYPTODEV_BACKEND_SERVICE', + 'data': ['cipher', 'hash', 'mac', 'aead', 'akcipher']} + +## +# @QCryptodevBackendType: +# +# The crypto device backend type +# +# @builtin: the QEMU builtin support +# @vhost-user: vhost-user +# @lkcf: Linux kernel cryptographic framework +# +# Since: 8.0 +## +{ 'enum': 'QCryptodevBackendType', + 'prefix': 'QCRYPTODEV_BACKEND_TYPE', + 'data': ['builtin', 'vhost-user', 'lkcf']} + +## +# @QCryptodevBackendClient: +# +# Information about a queue of crypto device. +# +# @queue: the queue index of the crypto device +# +# @type: the type of the crypto device +# +# Since: 8.0 +## +{ 'struct': 'QCryptodevBackendClient', + 'data': { 'queue': 'uint32', + 'type': 'QCryptodevBackendType' } } + +## +# @QCryptodevInfo: +# +# Information about a crypto device. +# +# @id: the id of the crypto device +# +# @service: supported service types of a crypto device +# +# @client: the additional infomation of the crypto device +# +# Since: 8.0 +## +{ 'struct': 'QCryptodevInfo', + 'data': { 'id': 'str', + 'service': ['QCryptodevBackendServiceType'], + 'client': ['QCryptodevBackendClient'] } } + +## +# @query-cryptodev: +# +# Returns information about current crypto devices. +# +# Returns: a list of @QCryptodevInfo +# +# Since: 8.0 +## +{ 'command': 'query-cryptodev', 'returns': ['QCryptodevInfo']} diff --git a/qapi/cxl.json b/qapi/cxl.json new file mode 100644 index 0000000000..4be7d46041 --- /dev/null +++ b/qapi/cxl.json @@ -0,0 +1,128 @@ +# -*- Mode: Python -*- +# vim: filetype=python + +## +# = CXL devices +## + +## +# @CxlUncorErrorType: +# +# Type of uncorrectable CXL error to inject. These errors are reported via +# an AER uncorrectable internal error with additional information logged at +# the CXL device. +# +# @cache-data-parity: Data error such as data parity or data ECC error CXL.cache +# @cache-address-parity: Address parity or other errors associated with the +# address field on CXL.cache +# @cache-be-parity: Byte enable parity or other byte enable errors on CXL.cache +# @cache-data-ecc: ECC error on CXL.cache +# @mem-data-parity: Data error such as data parity or data ECC error on CXL.mem +# @mem-address-parity: Address parity or other errors associated with the +# address field on CXL.mem +# @mem-be-parity: Byte enable parity or other byte enable errors on CXL.mem. +# @mem-data-ecc: Data ECC error on CXL.mem. +# @reinit-threshold: REINIT threshold hit. +# @rsvd-encoding: Received unrecognized encoding. +# @poison-received: Received poison from the peer. +# @receiver-overflow: Buffer overflows (first 3 bits of header log indicate which) +# @internal: Component specific error +# @cxl-ide-tx: Integrity and data encryption tx error. +# @cxl-ide-rx: Integrity and data encryption rx error. +# +# Since: 8.0 +## + +{ 'enum': 'CxlUncorErrorType', + 'data': ['cache-data-parity', + 'cache-address-parity', + 'cache-be-parity', + 'cache-data-ecc', + 'mem-data-parity', + 'mem-address-parity', + 'mem-be-parity', + 'mem-data-ecc', + 'reinit-threshold', + 'rsvd-encoding', + 'poison-received', + 'receiver-overflow', + 'internal', + 'cxl-ide-tx', + 'cxl-ide-rx' + ] + } + +## +# @CXLUncorErrorRecord: +# +# Record of a single error including header log. +# +# @type: Type of error +# @header: 16 DWORD of header. +# +# Since: 8.0 +## +{ 'struct': 'CXLUncorErrorRecord', + 'data': { + 'type': 'CxlUncorErrorType', + 'header': [ 'uint32' ] + } +} + +## +# @cxl-inject-uncorrectable-errors: +# +# Command to allow injection of multiple errors in one go. This allows testing +# of multiple header log handling in the OS. +# +# @path: CXL Type 3 device canonical QOM path +# @errors: Errors to inject +# +# Since: 8.0 +## +{ 'command': 'cxl-inject-uncorrectable-errors', + 'data': { 'path': 'str', + 'errors': [ 'CXLUncorErrorRecord' ] }} + +## +# @CxlCorErrorType: +# +# Type of CXL correctable error to inject +# +# @cache-data-ecc: Data ECC error on CXL.cache +# @mem-data-ecc: Data ECC error on CXL.mem +# @crc-threshold: Component specific and applicable to 68 byte Flit mode only. +# @cache-poison-received: Received poison from a peer on CXL.cache. +# @mem-poison-received: Received poison from a peer on CXL.mem +# @physical: Received error indication from the physical layer. +# +# Since: 8.0 +## +{ 'enum': 'CxlCorErrorType', + 'data': ['cache-data-ecc', + 'mem-data-ecc', + 'crc-threshold', + 'retry-threshold', + 'cache-poison-received', + 'mem-poison-received', + 'physical'] +} + +## +# @cxl-inject-correctable-error: +# +# Command to inject a single correctable error. Multiple error injection +# of this error type is not interesting as there is no associated header log. +# These errors are reported via AER as a correctable internal error, with +# additional detail available from the CXL device. +# +# @path: CXL Type 3 device canonical QOM path +# @type: Type of error. +# +# Since: 8.0 +## +{ 'command': 'cxl-inject-correctable-error', + 'data': { 'path': 'str', + 'type': 'CxlCorErrorType' + } +} diff --git a/qapi/meson.build b/qapi/meson.build index fbdb442fdf..9fd480c4d8 100644 --- a/qapi/meson.build +++ b/qapi/meson.build @@ -31,6 +31,7 @@ qapi_all_modules = [ 'compat', 'control', 'crypto', + 'cxl', 'dump', 'error', 'introspect', @@ -56,6 +57,7 @@ if have_system qapi_all_modules += [ 'acpi', 'audio', + 'cryptodev', 'qdev', 'pci', 'rdma', diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json index f000b90744..7c09af5cc8 100644 --- a/qapi/qapi-schema.json +++ b/qapi/qapi-schema.json @@ -95,3 +95,5 @@ { 'include': 'pci.json' } { 'include': 'stats.json' } { 'include': 'virtio.json' } +{ 'include': 'cryptodev.json' } +{ 'include': 'cxl.json' } diff --git a/qapi/qom.json b/qapi/qom.json index 30e76653ad..a877b879b9 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -278,10 +278,16 @@ # cryptodev-backend and must be 1 for cryptodev-backend-builtin. # (default: 1) # +# @throttle-bps: limit total bytes per second (Since 8.0) +# +# @throttle-ops: limit total operations per second (Since 8.0) +# # Since: 2.8 ## { 'struct': 'CryptodevBackendProperties', - 'data': { '*queues': 'uint32' } } + 'data': { '*queues': 'uint32', + '*throttle-bps': 'uint64', + '*throttle-ops': 'uint64' } } ## # @CryptodevVhostUserProperties: diff --git a/qapi/stats.json b/qapi/stats.json index 57db5b1c74..1f5d3c59ab 100644 --- a/qapi/stats.json +++ b/qapi/stats.json @@ -50,10 +50,14 @@ # # Enumeration of statistics providers. # +# @kvm: since 7.1 +# +# @cryptodev: since 8.0 +# # Since: 7.1 ## { 'enum': 'StatsProvider', - 'data': [ 'kvm' ] } + 'data': [ 'kvm', 'cryptodev' ] } ## # @StatsTarget: @@ -65,10 +69,12 @@ # # @vcpu: statistics that apply to a single virtual CPU. # +# @cryptodev: statistics that apply to a crypto device. since 8.0 +# # Since: 7.1 ## { 'enum': 'StatsTarget', - 'data': [ 'vm', 'vcpu' ] } + 'data': [ 'vm', 'vcpu', 'cryptodev' ] } ## # @StatsRequest: diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs index 51340f7ecc..df572adb4a 100644 --- a/qga/installer/qemu-ga.wxs +++ b/qga/installer/qemu-ga.wxs @@ -31,6 +31,7 @@ /> <Media Id="1" Cabinet="qemu_ga.$(var.QEMU_GA_VERSION).cab" EmbedCab="yes" /> <Property Id="WHSLogo">1</Property> + <Property Id="ARPNOMODIFY" Value="yes" Secure="yes" /> <MajorUpgrade DowngradeErrorMessage="Error: A newer version of QEMU guest agent is already installed." /> @@ -121,27 +122,31 @@ <RegistryValue Type="integer" Name="TypesSupported" Value="7" /> <RegistryValue Type="string" Name="EventMessageFile" Value="[qemu_ga_directory]qemu-ga.exe" /> </RegistryKey> + <RegistryKey Root="HKLM" + Key="System\CurrentControlSet\Services\QEMU Guest Agent VSS Provider"> + <RegistryValue Type="integer" Name="VssOption" Value="1" /> + </RegistryKey> </Component> </Directory> </Directory> </Directory> - <Property Id="cmd" Value="cmd.exe"/> + <Property Id="rundll" Value="rundll32.exe"/> <Property Id="REINSTALLMODE" Value="amus"/> <?ifdef var.InstallVss?> <CustomAction Id="RegisterCom" - ExeCommand='/c "[qemu_ga_directory]qemu-ga.exe" -s vss-install' + ExeCommand='"[qemu_ga_directory]qga-vss.dll",DLLCOMRegister' Execute="deferred" - Property="cmd" + Property="rundll" Impersonate="no" Return="check" > </CustomAction> <CustomAction Id="UnRegisterCom" - ExeCommand='/c "[qemu_ga_directory]qemu-ga.exe" -s vss-uninstall' + ExeCommand='"[qemu_ga_directory]qga-vss.dll",DLLCOMUnregister' Execute="deferred" - Property="cmd" + Property="rundll" Impersonate="no" Return="check" > diff --git a/qga/vss-win32/install.cpp b/qga/vss-win32/install.cpp index b57508fbe0..68662a6dfc 100644 --- a/qga/vss-win32/install.cpp +++ b/qga/vss-win32/install.cpp @@ -357,6 +357,15 @@ out: return hr; } +STDAPI_(void) CALLBACK DLLCOMRegister(HWND, HINSTANCE, LPSTR, int) +{ + COMRegister(); +} + +STDAPI_(void) CALLBACK DLLCOMUnregister(HWND, HINSTANCE, LPSTR, int) +{ + COMUnregister(); +} static BOOL CreateRegistryKey(LPCTSTR key, LPCTSTR value, LPCTSTR data) { diff --git a/qga/vss-win32/qga-vss.def b/qga/vss-win32/qga-vss.def index 927782c31b..ee97a81427 100644 --- a/qga/vss-win32/qga-vss.def +++ b/qga/vss-win32/qga-vss.def @@ -1,6 +1,8 @@ LIBRARY "QGA-PROVIDER.DLL" EXPORTS + DLLCOMRegister + DLLCOMUnregister COMRegister PRIVATE COMUnregister PRIVATE DllCanUnloadNow PRIVATE diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp index b371affeab..3e998af4a8 100644 --- a/qga/vss-win32/requester.cpp +++ b/qga/vss-win32/requester.cpp @@ -23,6 +23,8 @@ /* Call QueryStatus every 10 ms while waiting for frozen event */ #define VSS_TIMEOUT_EVENT_MSEC 10 +#define DEFAULT_VSS_BACKUP_TYPE VSS_BT_FULL + #define err_set(e, err, fmt, ...) \ ((e)->error_setg_win32_wrapper((e)->errp, __FILE__, __LINE__, __func__, \ err, fmt, ## __VA_ARGS__)) @@ -234,6 +236,42 @@ out: } } +DWORD get_reg_dword_value(HKEY baseKey, LPCSTR subKey, LPCSTR valueName, + DWORD defaultData) +{ + DWORD regGetValueError; + DWORD dwordData; + DWORD dataSize = sizeof(DWORD); + + regGetValueError = RegGetValue(baseKey, subKey, valueName, RRF_RT_DWORD, + NULL, &dwordData, &dataSize); + if (regGetValueError != ERROR_SUCCESS) { + return defaultData; + } + return dwordData; +} + +bool is_valid_vss_backup_type(VSS_BACKUP_TYPE vssBT) +{ + return (vssBT > VSS_BT_UNDEFINED && vssBT < VSS_BT_OTHER); +} + +VSS_BACKUP_TYPE get_vss_backup_type( + VSS_BACKUP_TYPE defaultVssBT = DEFAULT_VSS_BACKUP_TYPE) +{ + VSS_BACKUP_TYPE vssBackupType; + + vssBackupType = static_cast<VSS_BACKUP_TYPE>( + get_reg_dword_value(HKEY_LOCAL_MACHINE, + QGA_PROVIDER_REGISTRY_ADDRESS, + "VssOption", + defaultVssBT)); + if (!is_valid_vss_backup_type(vssBackupType)) { + return defaultVssBT; + } + return vssBackupType; +} + void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset) { COMPointer<IVssAsync> pAsync; @@ -247,6 +285,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset) DWORD wait_status; int num_fixed_drives = 0, i; int num_mount_points = 0; + VSS_BACKUP_TYPE vss_bt = get_vss_backup_type(); if (vss_ctx.pVssbc) { /* already frozen */ *num_vols = 0; @@ -294,7 +333,7 @@ void requester_freeze(int *num_vols, void *mountpoints, ErrorSet *errset) goto out; } - hr = vss_ctx.pVssbc->SetBackupState(true, true, VSS_BT_FULL, false); + hr = vss_ctx.pVssbc->SetBackupState(true, true, vss_bt, false); if (FAILED(hr)) { err_set(errset, hr, "failed to set backup state"); goto out; diff --git a/qga/vss-win32/vss-handles.h b/qga/vss-win32/vss-handles.h index 0f8a741ad2..1a7d842129 100644 --- a/qga/vss-win32/vss-handles.h +++ b/qga/vss-win32/vss-handles.h @@ -6,6 +6,9 @@ #define QGA_PROVIDER_NAME "QEMU Guest Agent VSS Provider" #define QGA_PROVIDER_LNAME L(QGA_PROVIDER_NAME) #define QGA_PROVIDER_VERSION L(QEMU_VERSION) +#define QGA_PROVIDER_REGISTRY_ADDRESS "SYSTEM\\CurrentControlSet"\ + "\\Services"\ + "\\" QGA_PROVIDER_NAME #define EVENT_NAME_FROZEN "Global\\QGAVSSEvent-frozen" #define EVENT_NAME_THAW "Global\\QGAVSSEvent-thaw" diff --git a/stats/stats-hmp-cmds.c b/stats/stats-hmp-cmds.c index 531e35d128..1f91bf8bd5 100644 --- a/stats/stats-hmp-cmds.c +++ b/stats/stats-hmp-cmds.c @@ -155,6 +155,8 @@ static StatsFilter *stats_filter(StatsTarget target, const char *names, filter->u.vcpu.vcpus = vcpu_list; break; } + case STATS_TARGET_CRYPTODEV: + break; default: break; } @@ -226,6 +228,9 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict) int cpu_index = monitor_get_cpu_index(mon); filter = stats_filter(target, names, cpu_index, provider); break; + case STATS_TARGET_CRYPTODEV: + filter = stats_filter(target, names, -1, provider); + break; default: abort(); } diff --git a/stats/stats-qmp-cmds.c b/stats/stats-qmp-cmds.c index bc973747fb..e214b964fd 100644 --- a/stats/stats-qmp-cmds.c +++ b/stats/stats-qmp-cmds.c @@ -64,6 +64,8 @@ static bool invoke_stats_cb(StatsCallbacks *entry, targets = filter->u.vcpu.vcpus; } break; + case STATS_TARGET_CRYPTODEV: + break; default: abort(); } diff --git a/target/hexagon/README b/target/hexagon/README index 251960b862..ebafc78b1c 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -52,6 +52,7 @@ header files in <BUILD_DIR>/target/hexagon gen_tcg_func_table.py -> tcg_func_table_generated.c.inc gen_helper_funcs.py -> helper_funcs_generated.c.inc gen_idef_parser_funcs.py -> idef_parser_input.h + gen_analyze_funcs.py -> analyze_funcs_generated.c.inc Qemu helper functions have 3 parts DEF_HELPER declaration indicates the signature of the helper @@ -87,7 +88,6 @@ tcg_funcs_generated.c.inc TCGv RtV = hex_gpr[insn->regno[2]]; gen_helper_A2_add(RdV, cpu_env, RsV, RtV); gen_log_reg_write(RdN, RdV); - ctx_log_reg_write(ctx, RdN); } helper_funcs_generated.c.inc @@ -136,12 +136,9 @@ For HVX vectors, the generator behaves slightly differently. The wide vectors won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the address to helper functions. Here's an example for an HVX vector-add-word istruction. - static void generate_V6_vaddw( - CPUHexagonState *env, - DisasContext *ctx, - Insn *insn, - Packet *pkt) + static void generate_V6_vaddw(DisasContext *ctx) { + Insn *insn __attribute__((unused)) = ctx->insn; const int VdN = insn->regno[0]; const intptr_t VdV_off = ctx_future_vreg_off(ctx, VdN, 1, true); @@ -157,10 +154,7 @@ istruction. TCGv_ptr VvV = tcg_temp_new_ptr(); tcg_gen_addi_ptr(VuV, cpu_env, VuV_off); tcg_gen_addi_ptr(VvV, cpu_env, VvV_off); - TCGv slot = tcg_constant_tl(insn->slot); - gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV, slot); - gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false); - ctx_log_vreg_write(ctx, VdN, EXT_DFL, false); + gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV); } Notice that we also generate a variable named <operand>_off for each operand of @@ -173,12 +167,9 @@ functions from tcg-op-gvec.h. Here's the override for this instruction. Finally, we notice that the override doesn't use the TCGv_ptr variables, so we don't generate them when an override is present. Here is what we generate when the override is present. - static void generate_V6_vaddw( - CPUHexagonState *env, - DisasContext *ctx, - Insn *insn, - Packet *pkt) + static void generate_V6_vaddw(DisasContext *ctx) { + Insn *insn __attribute__((unused)) = ctx->insn; const int VdN = insn->regno[0]; const intptr_t VdV_off = ctx_future_vreg_off(ctx, VdN, 1, true); @@ -189,10 +180,14 @@ when the override is present. const intptr_t VvV_off = vreg_src_off(ctx, VvN); fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } }); - gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false); - ctx_log_vreg_write(ctx, VdN, EXT_DFL, false); } +We also generate an analyze_<tag> function for each instruction. Currently, +these functions record the writes to registers by calling ctx_log_*. During +gen_start_packet, we invoke the analyze_<tag> function for each instruction in +the packet, and we mark the implicit writes. After the analysis is performed, +we initialize hex_new_value for each of the predicated assignments. + In addition to instruction semantics, we use a generator to create the decode tree. This generation is also a two step process. The first step is to run target/hexagon/gen_dectree_import.c to produce @@ -277,10 +272,8 @@ For Hexagon Vector eXtensions (HVX), the following fields are used VRegs Vector registers future_VRegs Registers to be stored during packet commit tmp_VRegs Temporary registers *not* stored during commit - VRegs_updated Mask of predicated vector writes QRegs Q (vector predicate) registers future_QRegs Registers to be stored during packet commit - QRegs_updated Mask of predicated vector writes *** Debugging *** diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 5d2a102c18..9874d1658f 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -44,6 +44,7 @@ DEF_ATTRIB(MEMSIZE_1B, "Memory width is 1 byte", "", "") DEF_ATTRIB(MEMSIZE_2B, "Memory width is 2 bytes", "", "") DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "") +DEF_ATTRIB(SCALAR_LOAD, "Load is scalar", "", "") DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "") DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "") DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "") diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 34c0ae0a67..81b663ecfb 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -111,11 +111,8 @@ typedef struct CPUArchState { MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16); MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16); - VRegMask VRegs_updated; - MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16); MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16); - QRegMask QRegs_updated; /* Temporaries used within instructions */ MMVectorPair VuuV QEMU_ALIGNED(16); diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py new file mode 100755 index 0000000000..ebd3e7afb9 --- /dev/null +++ b/target/hexagon/gen_analyze_funcs.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 + +## +## Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, see <http://www.gnu.org/licenses/>. +## + +import sys +import re +import string +import hex_common + +## +## Helpers for gen_analyze_func +## +def is_predicated(tag): + return 'A_CONDEXEC' in hex_common.attribdict[tag] + +def analyze_opn_old(f, tag, regtype, regid, regno): + regN = "%s%sN" % (regtype, regid) + predicated = "true" if is_predicated(tag) else "false" + if (regtype == "R"): + if (regid in {"ss", "tt"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"dd", "ee", "xx", "yy"}): + f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) + f.write(" ctx_log_reg_write_pair(ctx, %s, %s);\n" % \ + (regN, predicated)) + elif (regid in {"s", "t", "u", "v"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"d", "e", "x", "y"}): + f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) + f.write(" ctx_log_reg_write(ctx, %s, %s);\n" % \ + (regN, predicated)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "P"): + if (regid in {"s", "t", "u", "v"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"d", "e", "x"}): + f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) + f.write(" ctx_log_pred_write(ctx, %s);\n" % (regN)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "C"): + if (regid == "ss"): + f.write("// const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ + (regN, regno)) + elif (regid == "dd"): + f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ + (regN, regno)) + f.write(" ctx_log_reg_write_pair(ctx, %s, %s);\n" % \ + (regN, predicated)) + elif (regid == "s"): + f.write("// const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ + (regN, regno)) + elif (regid == "d"): + f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ + (regN, regno)) + f.write(" ctx_log_reg_write(ctx, %s, %s);\n" % \ + (regN, predicated)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "M"): + if (regid == "u"): + f.write("// const int %s = insn->regno[%d];\n"% \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "V"): + newv = "EXT_DFL" + if (hex_common.is_new_result(tag)): + newv = "EXT_NEW" + elif (hex_common.is_tmp_result(tag)): + newv = "EXT_TMP" + if (regid in {"dd", "xx"}): + f.write(" const int %s = insn->regno[%d];\n" %\ + (regN, regno)) + f.write(" ctx_log_vreg_write_pair(ctx, %s, %s, %s);\n" % \ + (regN, newv, predicated)) + elif (regid in {"uu", "vv"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"s", "u", "v", "w"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"d", "x", "y"}): + f.write(" const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + f.write(" ctx_log_vreg_write(ctx, %s, %s, %s);\n" % \ + (regN, newv, predicated)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "Q"): + if (regid in {"d", "e", "x"}): + f.write(" const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + f.write(" ctx_log_qreg_write(ctx, %s);\n" % (regN)) + elif (regid in {"s", "t", "u", "v"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "G"): + if (regid in {"dd"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"d"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"ss"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"s"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "S"): + if (regid in {"dd"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"d"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"ss"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + elif (regid in {"s"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + else: + print("Bad register parse: ", regtype, regid) + +def analyze_opn_new(f, tag, regtype, regid, regno): + regN = "%s%sN" % (regtype, regid) + if (regtype == "N"): + if (regid in {"s", "t"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "P"): + if (regid in {"t", "u", "v"}): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + elif (regtype == "O"): + if (regid == "s"): + f.write("// const int %s = insn->regno[%d];\n" % \ + (regN, regno)) + else: + print("Bad register parse: ", regtype, regid) + else: + print("Bad register parse: ", regtype, regid) + +def analyze_opn(f, tag, regtype, regid, toss, numregs, i): + if (hex_common.is_pair(regid)): + analyze_opn_old(f, tag, regtype, regid, i) + elif (hex_common.is_single(regid)): + if hex_common.is_old_val(regtype, regid, tag): + analyze_opn_old(f,tag, regtype, regid, i) + elif hex_common.is_new_val(regtype, regid, tag): + analyze_opn_new(f, tag, regtype, regid, i) + else: + print("Bad register parse: ", regtype, regid, toss, numregs) + else: + print("Bad register parse: ", regtype, regid, toss, numregs) + +## +## Generate the code to analyze the instruction +## For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;} +## We produce: +## static void analyze_A2_add(DisasContext *ctx) +## { +## Insn *insn G_GNUC_UNUSED = ctx->insn; +## const int RdN = insn->regno[0]; +## ctx_log_reg_write(ctx, RdN, false); +## // const int RsN = insn->regno[1]; +## // const int RtN = insn->regno[2]; +## } +## +def gen_analyze_func(f, tag, regs, imms): + f.write("static void analyze_%s(DisasContext *ctx)\n" %tag) + f.write('{\n') + + f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n") + + i=0 + ## Analyze all the registers + for regtype, regid, toss, numregs in regs: + analyze_opn(f, tag, regtype, regid, toss, numregs, i) + i += 1 + + has_generated_helper = (not hex_common.skip_qemu_helper(tag) and + not hex_common.is_idef_parser_enabled(tag)) + if (has_generated_helper and + 'A_SCALAR_LOAD' in hex_common.attribdict[tag]): + f.write(" ctx->need_pkt_has_store_s1 = true;\n") + + f.write("}\n\n") + +def main(): + hex_common.read_semantics_file(sys.argv[1]) + hex_common.read_attribs_file(sys.argv[2]) + hex_common.read_overrides_file(sys.argv[3]) + hex_common.read_overrides_file(sys.argv[4]) + ## Whether or not idef-parser is enabled is + ## determined by the number of arguments to + ## this script: + ## + ## 5 args. -> not enabled, + ## 6 args. -> idef-parser enabled. + ## + ## The 6:th arg. then holds a list of the successfully + ## parsed instructions. + is_idef_parser_enabled = len(sys.argv) > 6 + if is_idef_parser_enabled: + hex_common.read_idef_parser_enabled_file(sys.argv[5]) + hex_common.calculate_attribs() + tagregs = hex_common.get_tagregs() + tagimms = hex_common.get_tagimms() + + with open(sys.argv[-1], 'w') as f: + f.write("#ifndef HEXAGON_TCG_FUNCS_H\n") + f.write("#define HEXAGON_TCG_FUNCS_H\n\n") + + for tag in hex_common.tags: + gen_analyze_func(f, tag, tagregs[tag], tagimms[tag]) + + f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n") + +if __name__ == "__main__": + main() diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index 19e9883f4c..7a224b66e6 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -226,6 +226,14 @@ def gen_helper_function(f, tag, tagregs, tagimms): print("Bad register parse: ",regtype,regid,toss,numregs) i += 1 + ## For conditional instructions, we pass in the destination register + if 'A_CONDEXEC' in hex_common.attribdict[tag]: + for regtype, regid, toss, numregs in regs: + if (hex_common.is_writeonly(regid) and + not hex_common.is_hvx_reg(regtype)): + gen_helper_arg_opn(f, regtype, regid, i, tag) + i += 1 + ## Arguments to the helper function are the source regs and immediates for regtype,regid,toss,numregs in regs: if (hex_common.is_read(regid)): @@ -262,10 +270,11 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_ea(tag): gen_decl_ea(f) ## Declare the return variable i=0 - for regtype,regid,toss,numregs in regs: - if (hex_common.is_writeonly(regid)): - gen_helper_dest_decl_opn(f,regtype,regid,i) - i += 1 + if 'A_CONDEXEC' not in hex_common.attribdict[tag]: + for regtype,regid,toss,numregs in regs: + if (hex_common.is_writeonly(regid)): + gen_helper_dest_decl_opn(f,regtype,regid,i) + i += 1 for regtype,regid,toss,numregs in regs: if (hex_common.is_read(regid)): diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 674bf370fa..ddddc9e4f0 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -87,6 +87,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): if hex_common.need_slot(tag): def_helper_size += 1 if hex_common.need_PC(tag): def_helper_size += 1 if hex_common.helper_needs_next_PC(tag): def_helper_size += 1 + if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1 f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag)) ## The return type is void f.write(', void' ) @@ -96,6 +97,7 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): def_helper_size += 1 if hex_common.need_PC(tag): def_helper_size += 1 + if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1 if hex_common.helper_needs_next_PC(tag): def_helper_size += 1 f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag)) @@ -121,6 +123,14 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i) i += 1 + ## For conditional instructions, we pass in the destination register + if 'A_CONDEXEC' in hex_common.attribdict[tag]: + for regtype, regid, toss, numregs in regs: + if (hex_common.is_writeonly(regid) and + not hex_common.is_hvx_reg(regtype)): + gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i) + i += 1 + ## Generate the qemu type for each input operand (regs and immediates) for regtype,regid,toss,numregs in regs: if (hex_common.is_read(regid)): diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index b2e7880b5c..bcf0cf466a 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -332,8 +332,6 @@ tcg_gen_movi_tl(EA, 0); \ PRED; \ CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \ - PRED_LOAD_CANCEL(LSB, EA); \ - tcg_gen_movi_tl(RdV, 0); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \ fLOAD(1, SIZE, SIGN, EA, RdV); \ gen_set_label(label); \ @@ -391,8 +389,6 @@ tcg_gen_movi_tl(EA, 0); \ PRED; \ CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \ - PRED_LOAD_CANCEL(LSB, EA); \ - tcg_gen_movi_i64(RddV, 0); \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \ fLOAD(1, 8, u, EA, RddV); \ gen_set_label(label); \ @@ -419,16 +415,16 @@ #define fGEN_TCG_STORE(SHORTCODE) \ do { \ - TCGv HALF = tcg_temp_new(); \ - TCGv BYTE = tcg_temp_new(); \ + TCGv HALF G_GNUC_UNUSED = tcg_temp_new(); \ + TCGv BYTE G_GNUC_UNUSED = tcg_temp_new(); \ SHORTCODE; \ } while (0) #define fGEN_TCG_STORE_pcr(SHIFT, STORE) \ do { \ TCGv ireg = tcg_temp_new(); \ - TCGv HALF = tcg_temp_new(); \ - TCGv BYTE = tcg_temp_new(); \ + TCGv HALF G_GNUC_UNUSED = tcg_temp_new(); \ + TCGv BYTE G_GNUC_UNUSED = tcg_temp_new(); \ tcg_gen_mov_tl(EA, RxV); \ gen_read_ireg(ireg, MuV, SHIFT); \ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ @@ -492,6 +488,59 @@ fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN)) /* + * dealloc_return + * Assembler mapped to + * r31:30 = dealloc_return(r30):raw + */ +#define fGEN_TCG_L4_return(SHORTCODE) \ + gen_return(ctx, RddV, RsV) + +/* + * sub-instruction version (no RddV, so handle it manually) + */ +#define fGEN_TCG_SL2_return(SHORTCODE) \ + do { \ + TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \ + gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \ + gen_log_reg_write_pair(HEX_REG_FP, RddV); \ + } while (0) + +/* + * Conditional returns follow this naming convention + * _t predicate true + * _f predicate false + * _tnew_pt predicate.new true predict taken + * _fnew_pt predicate.new false predict taken + * _tnew_pnt predicate.new true predict not taken + * _fnew_pnt predicate.new false predict not taken + * Predictions are not modelled in QEMU + * + * Example: + * if (p1) r31:30 = dealloc_return(r30):raw + */ +#define fGEN_TCG_L4_return_t(SHORTCODE) \ + gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_EQ); +#define fGEN_TCG_L4_return_f(SHORTCODE) \ + gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_NE) +#define fGEN_TCG_L4_return_tnew_pt(SHORTCODE) \ + gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ) +#define fGEN_TCG_L4_return_fnew_pt(SHORTCODE) \ + gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE) +#define fGEN_TCG_L4_return_tnew_pnt(SHORTCODE) \ + gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ) +#define fGEN_TCG_L4_return_fnew_pnt(SHORTCODE) \ + gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE) + +#define fGEN_TCG_SL2_return_t(SHORTCODE) \ + gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_pred[0]) +#define fGEN_TCG_SL2_return_f(SHORTCODE) \ + gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0]) +#define fGEN_TCG_SL2_return_tnew(SHORTCODE) \ + gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0]) +#define fGEN_TCG_SL2_return_fnew(SHORTCODE) \ + gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0]) + +/* * Mathematical operations with more than one definition require * special handling */ @@ -589,14 +638,24 @@ #define fGEN_TCG_J2_call(SHORTCODE) \ gen_call(ctx, riV) +#define fGEN_TCG_J2_callr(SHORTCODE) \ + gen_callr(ctx, RsV) #define fGEN_TCG_J2_callt(SHORTCODE) \ gen_cond_call(ctx, PuV, TCG_COND_EQ, riV) #define fGEN_TCG_J2_callf(SHORTCODE) \ gen_cond_call(ctx, PuV, TCG_COND_NE, riV) +#define fGEN_TCG_J2_callrt(SHORTCODE) \ + gen_cond_callr(ctx, TCG_COND_EQ, PuV, RsV) +#define fGEN_TCG_J2_callrf(SHORTCODE) \ + gen_cond_callr(ctx, TCG_COND_NE, PuV, RsV) #define fGEN_TCG_J2_endloop0(SHORTCODE) \ gen_endloop0(ctx) +#define fGEN_TCG_J2_endloop1(SHORTCODE) \ + gen_endloop1(ctx) +#define fGEN_TCG_J2_endloop01(SHORTCODE) \ + gen_endloop01(ctx) /* * Compound compare and jump instructions @@ -986,6 +1045,19 @@ #define fGEN_TCG_S2_asl_r_r_sat(SHORTCODE) \ gen_asl_r_r_sat(RdV, RsV, RtV) +#define fGEN_TCG_SL2_jumpr31(SHORTCODE) \ + gen_jumpr(ctx, hex_gpr[HEX_REG_LR]) + +#define fGEN_TCG_SL2_jumpr31_t(SHORTCODE) \ + gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_pred[0]) +#define fGEN_TCG_SL2_jumpr31_f(SHORTCODE) \ + gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0]) + +#define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \ + gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0]) +#define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \ + gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0]) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index 02cb52c21e..fa93e185ce 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -30,37 +30,33 @@ def gen_decl_ea_tcg(f, tag): def genptr_decl_pair_writable(f, tag, regtype, regid, regno): regN="%s%sN" % (regtype,regid) - f.write(" TCGv_i64 %s%sV = tcg_temp_new_i64();\n" % \ - (regtype, regid)) - if (regtype == "C"): + if (regtype == "R"): + f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) + elif (regtype == "C"): f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ (regN, regno)) else: - f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - f.write(" if (!is_preloaded(ctx, %s)) {\n" % regN) - f.write(" tcg_gen_mov_tl(hex_new_value[%s], hex_gpr[%s]);\n" % \ - (regN, regN)) - f.write(" }\n") - f.write(" if (!is_preloaded(ctx, %s + 1)) {\n" % regN) - f.write(" tcg_gen_mov_tl(hex_new_value[%s + 1], hex_gpr[%s + 1]);\n" % \ - (regN, regN)) - f.write(" }\n") + print("Bad register parse: ", regtype, regid) + f.write(" TCGv_i64 %s%sV = get_result_gpr_pair(ctx, %s);\n" % \ + (regtype, regid, regN)) def genptr_decl_writable(f, tag, regtype, regid, regno): regN="%s%sN" % (regtype,regid) - f.write(" TCGv %s%sV = tcg_temp_new();\n" % \ - (regtype, regid)) - if (regtype == "C"): + if (regtype == "R"): + f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) + f.write(" TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \ + (regtype, regid, regN)) + elif (regtype == "C"): f.write(" const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \ (regN, regno)) - else: + f.write(" TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \ + (regtype, regid, regN)) + elif (regtype == "P"): f.write(" const int %s = insn->regno[%d];\n" % (regN, regno)) - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - f.write(" if (!is_preloaded(ctx, %s)) {\n" % regN) - f.write(" tcg_gen_mov_tl(hex_new_value[%s], hex_gpr[%s]);\n" % \ - (regN, regN)) - f.write(" }\n") + f.write(" TCGv %s%sV = tcg_temp_new();\n" % \ + (regtype, regid)) + else: + print("Bad register parse: ", regtype, regid) def genptr_decl(f, tag, regtype, regid, regno): regN="%s%sN" % (regtype,regid) @@ -166,17 +162,6 @@ def genptr_decl(f, tag, regtype, regid, regno): f.write(" ctx_future_vreg_off(ctx, %s%sN," % \ (regtype, regid)) f.write(" 1, true);\n"); - if 'A_CONDEXEC' in hex_common.attribdict[tag]: - f.write(" if (!is_vreg_preloaded(ctx, %s)) {\n" % (regN)) - f.write(" intptr_t src_off =") - f.write(" offsetof(CPUHexagonState, VRegs[%s%sN]);\n"% \ - (regtype, regid)) - f.write(" tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \ - (regtype, regid)) - f.write(" src_off,\n") - f.write(" sizeof(MMVector),\n") - f.write(" sizeof(MMVector));\n") - f.write(" }\n") if (not hex_common.skip_qemu_helper(tag)): f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \ @@ -191,8 +176,7 @@ def genptr_decl(f, tag, regtype, regid, regno): (regtype, regid, regno)) f.write(" const intptr_t %s%sV_off =\n" % \ (regtype, regid)) - f.write(" offsetof(CPUHexagonState,\n") - f.write(" future_QRegs[%s%sN]);\n" % \ + f.write(" get_result_qreg(ctx, %s%sN);\n" % \ (regtype, regid)) if (not hex_common.skip_qemu_helper(tag)): f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \ @@ -274,8 +258,12 @@ def genptr_src_read(f, tag, regtype, regid): f.write(" hex_gpr[%s%sN + 1]);\n" % \ (regtype, regid)) elif (regid in {"x", "y"}): - f.write(" tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \ - (regtype,regid,regtype,regid)) + ## For read/write registers, we need to get the original value into + ## the result TCGv. For conditional instructions, this is done in + ## gen_start_packet. For unconditional instructions, we do it here. + if ('A_CONDEXEC' not in hex_common.attribdict[tag]): + f.write(" tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \ + (regtype, regid, regtype, regid)) elif (regid not in {"s", "t", "u", "v"}): print("Bad register parse: ", regtype, regid) elif (regtype == "P"): @@ -385,37 +373,22 @@ def gen_helper_call_imm(f,immlett): f.write(", tcgv_%s" % hex_common.imm_name(immlett)) def genptr_dst_write_pair(f, tag, regtype, regid): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - f.write(" gen_log_predicated_reg_write_pair(%s%sN, %s%sV, insn->slot);\n" % \ - (regtype, regid, regtype, regid)) - else: - f.write(" gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \ - (regtype, regid, regtype, regid)) - f.write(" ctx_log_reg_write_pair(ctx, %s%sN);\n" % \ - (regtype, regid)) + f.write(" gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \ + (regtype, regid, regtype, regid)) def genptr_dst_write(f, tag, regtype, regid): if (regtype == "R"): if (regid in {"dd", "xx", "yy"}): genptr_dst_write_pair(f, tag, regtype, regid) elif (regid in {"d", "e", "x", "y"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - f.write(" gen_log_predicated_reg_write(%s%sN, %s%sV,\n" % \ - (regtype, regid, regtype, regid)) - f.write(" insn->slot);\n") - else: - f.write(" gen_log_reg_write(%s%sN, %s%sV);\n" % \ - (regtype, regid, regtype, regid)) - f.write(" ctx_log_reg_write(ctx, %s%sN);\n" % \ - (regtype, regid)) + f.write(" gen_log_reg_write(%s%sN, %s%sV);\n" % \ + (regtype, regid, regtype, regid)) else: print("Bad register parse: ", regtype, regid) elif (regtype == "P"): if (regid in {"d", "e", "x"}): f.write(" gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \ (regtype, regid, regtype, regid)) - f.write(" ctx_log_pred_write(ctx, %s%sN);\n" % \ - (regtype, regid)) else: print("Bad register parse: ", regtype, regid) elif (regtype == "C"): @@ -432,43 +405,18 @@ def genptr_dst_write(f, tag, regtype, regid): def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): if (regtype == "V"): - if (regid in {"dd", "xx", "yy"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - is_predicated = "true" - else: - is_predicated = "false" + if (regid in {"xx"}): f.write(" gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \ (regtype, regid, regtype, regid)) - f.write("%s, insn->slot, %s);\n" % \ - (newv, is_predicated)) - f.write(" ctx_log_vreg_write_pair(ctx, %s%sN, %s,\n" % \ - (regtype, regid, newv)) - f.write(" %s);\n" % (is_predicated)) - elif (regid in {"d", "x", "y"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - is_predicated = "true" - else: - is_predicated = "false" - f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s, " % \ + f.write("%s);\n" % \ + (newv)) + elif (regid in {"y"}): + f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s);\n" % \ (regtype, regid, regtype, regid, newv)) - f.write("insn->slot, %s);\n" % \ - (is_predicated)) - f.write(" ctx_log_vreg_write(ctx, %s%sN, %s, %s);\n" % \ - (regtype, regid, newv, is_predicated)) - else: + elif (regid not in {"dd", "d", "x"}): print("Bad register parse: ", regtype, regid) elif (regtype == "Q"): - if (regid in {"d", "e", "x"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - is_predicated = "true" - else: - is_predicated = "false" - f.write(" gen_log_qreg_write(%s%sV_off, %s%sN, %s, " % \ - (regtype, regid, regtype, regid, newv)) - f.write("insn->slot, %s);\n" % (is_predicated)) - f.write(" ctx_log_qreg_write(ctx, %s%sN, %s);\n" % \ - (regtype, regid, is_predicated)) - else: + if (regid not in {"d", "e", "x"}): print("Bad register parse: ", regtype, regid) else: print("Bad register parse: ", regtype, regid) @@ -500,15 +448,15 @@ def genptr_dst_write_opn(f,regtype, regid, tag): ## For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;} ## We produce: ## static void generate_A2_add(DisasContext *ctx) -## { -## TCGv RdV = tcg_temp_new(); -## const int RdN = insn->regno[0]; -## TCGv RsV = hex_gpr[insn->regno[1]]; -## TCGv RtV = hex_gpr[insn->regno[2]]; -## <GEN> -## gen_log_reg_write(RdN, RdV); -## ctx_log_reg_write(ctx, RdN); -## } +## { +## Insn *insn __attribute__((unused)) = ctx->insn; +## const int RdN = insn->regno[0]; +## TCGv RdV = get_result_gpr(ctx, RdN); +## TCGv RsV = hex_gpr[insn->regno[1]]; +## TCGv RtV = hex_gpr[insn->regno[2]]; +## <GEN> +## gen_log_reg_write(RdN, RdV); +## } ## ## where <GEN> depends on hex_common.skip_qemu_helper(tag) ## if hex_common.skip_qemu_helper(tag) is True @@ -592,6 +540,14 @@ def gen_tcg_func(f, tag, regs, imms): if (i > 0): f.write(", ") f.write("cpu_env") i=1 + ## For conditional instructions, we pass in the destination register + if 'A_CONDEXEC' in hex_common.attribdict[tag]: + for regtype, regid, toss, numregs in regs: + if (hex_common.is_writeonly(regid) and + not hex_common.is_hvx_reg(regtype)): + gen_helper_call_opn(f, tag, regtype, regid, toss, \ + numregs, i) + i += 1 for regtype,regid,toss,numregs in regs: if (hex_common.is_written(regid)): if (not hex_common.is_hvx_reg(regtype)): diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h index 94f272e286..d4aefe8e3f 100644 --- a/target/hexagon/gen_tcg_hvx.h +++ b/target/hexagon/gen_tcg_hvx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -133,16 +133,11 @@ static inline void assert_vhist_tmp(DisasContext *ctx) do { \ TCGv lsb = tcg_temp_new(); \ TCGLabel *false_label = gen_new_label(); \ - TCGLabel *end_label = gen_new_label(); \ tcg_gen_andi_tl(lsb, PsV, 1); \ tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \ tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ sizeof(MMVector), sizeof(MMVector)); \ - tcg_gen_br(end_label); \ gen_set_label(false_label); \ - tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \ - 1 << insn->slot); \ - gen_set_label(end_label); \ } while (0) @@ -547,17 +542,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx) do { \ TCGv LSB = tcg_temp_new(); \ TCGLabel *false_label = gen_new_label(); \ - TCGLabel *end_label = gen_new_label(); \ GET_EA; \ PRED; \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \ gen_vreg_load(ctx, DSTOFF, EA, true); \ INC; \ - tcg_gen_br(end_label); \ gen_set_label(false_label); \ - tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \ - 1 << insn->slot); \ - gen_set_label(end_label); \ } while (0) #define fGEN_TCG_PRED_VEC_LOAD_pred_pi \ @@ -717,17 +707,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx) do { \ TCGv LSB = tcg_temp_new(); \ TCGLabel *false_label = gen_new_label(); \ - TCGLabel *end_label = gen_new_label(); \ GET_EA; \ PRED; \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \ gen_vreg_store(ctx, EA, SRCOFF, insn->slot, ALIGN); \ INC; \ - tcg_gen_br(end_label); \ gen_set_label(false_label); \ - tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \ - 1 << insn->slot); \ - gen_set_label(end_label); \ } while (0) #define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 86bd093ce8..bb274d4a71 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -68,26 +68,17 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, } } -static inline void gen_log_predicated_reg_write(int rnum, TCGv val, - uint32_t slot) +static TCGv get_result_gpr(DisasContext *ctx, int rnum) { - TCGv zero = tcg_constant_tl(0); - TCGv slot_mask = tcg_temp_new(); + return hex_new_value[rnum]; +} - tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); - tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, - val, hex_new_value[rnum]); - if (HEX_DEBUG) { - /* - * Do this so HELPER(debug_commit_end) will know - * - * Note that slot_mask indicates the value is not written - * (i.e., slot was cancelled), so we create a true/false value before - * or'ing with hex_reg_written[rnum]. - */ - tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); - tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); - } +static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) +{ + TCGv_i64 result = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(result, hex_new_value[rnum], + hex_new_value[rnum + 1]); + return result; } void gen_log_reg_write(int rnum, TCGv val) @@ -102,39 +93,6 @@ void gen_log_reg_write(int rnum, TCGv val) } } -static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, - uint32_t slot) -{ - TCGv val32 = tcg_temp_new(); - TCGv zero = tcg_constant_tl(0); - TCGv slot_mask = tcg_temp_new(); - - tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); - /* Low word */ - tcg_gen_extrl_i64_i32(val32, val); - tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], - slot_mask, zero, - val32, hex_new_value[rnum]); - /* High word */ - tcg_gen_extrh_i64_i32(val32, val); - tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1], - slot_mask, zero, - val32, hex_new_value[rnum + 1]); - if (HEX_DEBUG) { - /* - * Do this so HELPER(debug_commit_end) will know - * - * Note that slot_mask indicates the value is not written - * (i.e., slot was cancelled), so we create a true/false value before - * or'ing with hex_reg_written[rnum]. - */ - tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); - tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); - tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1], - slot_mask); - } -} - static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) { const target_ulong reg_mask_low = reg_immut_masks[rnum]; @@ -180,6 +138,7 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) hex_new_pred_value[pnum], base_val); } tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); + set_bit(pnum, ctx->pregs_written); } static inline void gen_read_p3_0(TCGv control_reg) @@ -256,7 +215,6 @@ static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg) for (int i = 0; i < NUM_PREGS; i++) { tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8); gen_log_pred_write(ctx, i, hex_p8); - ctx_log_pred_write(ctx, i); } } @@ -274,7 +232,6 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, gen_write_p3_0(ctx, val); } else { gen_log_reg_write(reg_num, val); - ctx_log_reg_write(ctx, reg_num); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; } @@ -291,15 +248,14 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, TCGv_i64 val) { if (reg_num == HEX_REG_P3_0_ALIASED) { + TCGv result = get_result_gpr(ctx, reg_num + 1); TCGv val32 = tcg_temp_new(); tcg_gen_extrl_i64_i32(val32, val); gen_write_p3_0(ctx, val32); tcg_gen_extrh_i64_i32(val32, val); - gen_log_reg_write(reg_num + 1, val32); - ctx_log_reg_write(ctx, reg_num + 1); + tcg_gen_mov_tl(result, val32); } else { gen_log_reg_write_pair(reg_num, val); - ctx_log_reg_write_pair(ctx, reg_num); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; ctx->num_insns = 0; @@ -571,6 +527,13 @@ static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc, gen_write_new_pc_addr(ctx, dst_pc, cond, pred); } +static void gen_cond_jumpr31(DisasContext *ctx, TCGCond cond, TCGv pred) +{ + TCGv LSB = tcg_temp_new(); + tcg_gen_andi_tl(LSB, pred, 1); + gen_cond_jumpr(ctx, hex_gpr[HEX_REG_LR], cond, LSB); +} + static void gen_cond_jump(DisasContext *ctx, TCGCond cond, TCGv pred, int pc_off) { @@ -669,27 +632,99 @@ static void gen_jumpr(DisasContext *ctx, TCGv new_pc) static void gen_call(DisasContext *ctx, int pc_off) { - TCGv next_PC = - tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes); - gen_log_reg_write(HEX_REG_LR, next_PC); + TCGv lr = get_result_gpr(ctx, HEX_REG_LR); + tcg_gen_movi_tl(lr, ctx->next_PC); gen_write_new_pc_pcrel(ctx, pc_off, TCG_COND_ALWAYS, NULL); } +static void gen_callr(DisasContext *ctx, TCGv new_pc) +{ + TCGv lr = get_result_gpr(ctx, HEX_REG_LR); + tcg_gen_movi_tl(lr, ctx->next_PC); + gen_write_new_pc_addr(ctx, new_pc, TCG_COND_ALWAYS, NULL); +} + static void gen_cond_call(DisasContext *ctx, TCGv pred, TCGCond cond, int pc_off) { - TCGv next_PC; + TCGv lr = get_result_gpr(ctx, HEX_REG_LR); TCGv lsb = tcg_temp_new(); TCGLabel *skip = gen_new_label(); tcg_gen_andi_tl(lsb, pred, 1); gen_write_new_pc_pcrel(ctx, pc_off, cond, lsb); tcg_gen_brcondi_tl(cond, lsb, 0, skip); - next_PC = - tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes); - gen_log_reg_write(HEX_REG_LR, next_PC); + tcg_gen_movi_tl(lr, ctx->next_PC); gen_set_label(skip); } +static void gen_cond_callr(DisasContext *ctx, + TCGCond cond, TCGv pred, TCGv new_pc) +{ + TCGv lsb = tcg_temp_new(); + TCGLabel *skip = gen_new_label(); + tcg_gen_andi_tl(lsb, pred, 1); + tcg_gen_brcondi_tl(cond, lsb, 0, skip); + gen_callr(ctx, new_pc); + gen_set_label(skip); +} + +/* frame ^= (int64_t)FRAMEKEY << 32 */ +static void gen_frame_unscramble(TCGv_i64 frame) +{ + TCGv_i64 framekey = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(framekey, hex_gpr[HEX_REG_FRAMEKEY]); + tcg_gen_shli_i64(framekey, framekey, 32); + tcg_gen_xor_i64(frame, frame, framekey); +} + +static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA) +{ + Insn *insn = ctx->insn; /* Needed for CHECK_NOSHUF */ + CHECK_NOSHUF(EA, 8); + tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx); +} + +static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src) +{ + /* + * frame = *src + * dst = frame_unscramble(frame) + * SP = src + 8 + * PC = dst.w[1] + */ + TCGv_i64 frame = tcg_temp_new_i64(); + TCGv r31 = tcg_temp_new(); + TCGv r29 = get_result_gpr(ctx, HEX_REG_SP); + + gen_load_frame(ctx, frame, src); + gen_frame_unscramble(frame); + tcg_gen_mov_i64(dst, frame); + tcg_gen_addi_tl(r29, src, 8); + tcg_gen_extrh_i64_i32(r31, dst); + gen_jumpr(ctx, r31); +} + +/* if (pred) dst = dealloc_return(src):raw */ +static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src, + TCGv pred, TCGCond cond) +{ + TCGv LSB = tcg_temp_new(); + TCGLabel *skip = gen_new_label(); + tcg_gen_andi_tl(LSB, pred, 1); + + tcg_gen_brcondi_tl(cond, LSB, 0, skip); + gen_return(ctx, dst, src); + gen_set_label(skip); +} + +/* sub-instruction version (no RddV, so handle it manually) */ +static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred) +{ + TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); + gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond); + gen_log_reg_write_pair(HEX_REG_FP, RddV); +} + static void gen_endloop0(DisasContext *ctx) { TCGv lpcfg = tcg_temp_new(); @@ -737,14 +772,95 @@ static void gen_endloop0(DisasContext *ctx) TCGLabel *label3 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3); { + TCGv lc0 = get_result_gpr(ctx, HEX_REG_LC0); gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]); - tcg_gen_subi_tl(hex_new_value[HEX_REG_LC0], - hex_gpr[HEX_REG_LC0], 1); + tcg_gen_subi_tl(lc0, hex_gpr[HEX_REG_LC0], 1); } gen_set_label(label3); } } +static void gen_endloop1(DisasContext *ctx) +{ + /* + * if (hex_gpr[HEX_REG_LC1] > 1) { + * PC = hex_gpr[HEX_REG_SA1]; + * hex_new_value[HEX_REG_LC1] = hex_gpr[HEX_REG_LC1] - 1; + * } + */ + TCGLabel *label = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC1], 1, label); + { + TCGv lc1 = get_result_gpr(ctx, HEX_REG_LC1); + gen_jumpr(ctx, hex_gpr[HEX_REG_SA1]); + tcg_gen_subi_tl(lc1, hex_gpr[HEX_REG_LC1], 1); + } + gen_set_label(label); +} + +static void gen_endloop01(DisasContext *ctx) +{ + TCGv lpcfg = tcg_temp_new(); + TCGLabel *label1 = gen_new_label(); + TCGLabel *label2 = gen_new_label(); + TCGLabel *label3 = gen_new_label(); + TCGLabel *done = gen_new_label(); + + GET_USR_FIELD(USR_LPCFG, lpcfg); + + /* + * if (lpcfg == 1) { + * hex_new_pred_value[3] = 0xff; + * hex_pred_written |= 1 << 3; + * } + */ + tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); + { + tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); + tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + } + gen_set_label(label1); + + /* + * if (lpcfg) { + * SET_USR_FIELD(USR_LPCFG, lpcfg - 1); + * } + */ + tcg_gen_brcondi_tl(TCG_COND_EQ, lpcfg, 0, label2); + { + tcg_gen_subi_tl(lpcfg, lpcfg, 1); + SET_USR_FIELD(USR_LPCFG, lpcfg); + } + gen_set_label(label2); + + /* + * if (hex_gpr[HEX_REG_LC0] > 1) { + * PC = hex_gpr[HEX_REG_SA0]; + * hex_new_value[HEX_REG_LC0] = hex_gpr[HEX_REG_LC0] - 1; + * } else { + * if (hex_gpr[HEX_REG_LC1] > 1) { + * hex_next_pc = hex_gpr[HEX_REG_SA1]; + * hex_new_value[HEX_REG_LC1] = hex_gpr[HEX_REG_LC1] - 1; + * } + * } + */ + tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, label3); + { + TCGv lc0 = get_result_gpr(ctx, HEX_REG_LC0); + gen_jumpr(ctx, hex_gpr[HEX_REG_SA0]); + tcg_gen_subi_tl(lc0, hex_gpr[HEX_REG_LC0], 1); + tcg_gen_br(done); + } + gen_set_label(label3); + tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC1], 1, done); + { + TCGv lc1 = get_result_gpr(ctx, HEX_REG_LC1); + gen_jumpr(ctx, hex_gpr[HEX_REG_SA1]); + tcg_gen_subi_tl(lc1, hex_gpr[HEX_REG_LC1], 1); + } + gen_set_label(done); +} + static void gen_cmp_jumpnv(DisasContext *ctx, TCGCond cond, TCGv val, TCGv src, int pc_off) { @@ -869,68 +985,32 @@ static intptr_t vreg_src_off(DisasContext *ctx, int num) } static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num, - VRegWriteType type, int slot_num, - bool is_predicated) + VRegWriteType type) { - TCGLabel *label_end = NULL; intptr_t dstoff; - if (is_predicated) { - TCGv cancelled = tcg_temp_new(); - label_end = gen_new_label(); - - /* Don't do anything if the slot was cancelled */ - tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); - tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); - } - if (type != EXT_TMP) { dstoff = ctx_future_vreg_off(ctx, num, 1, true); tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMVector), sizeof(MMVector)); - tcg_gen_ori_tl(hex_VRegs_updated, hex_VRegs_updated, 1 << num); } else { dstoff = ctx_tmp_vreg_off(ctx, num, 1, false); tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMVector), sizeof(MMVector)); } - - if (is_predicated) { - gen_set_label(label_end); - } } static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num, - VRegWriteType type, int slot_num, - bool is_predicated) + VRegWriteType type) { - gen_log_vreg_write(ctx, srcoff, num ^ 0, type, slot_num, is_predicated); + gen_log_vreg_write(ctx, srcoff, num ^ 0, type); srcoff += sizeof(MMVector); - gen_log_vreg_write(ctx, srcoff, num ^ 1, type, slot_num, is_predicated); + gen_log_vreg_write(ctx, srcoff, num ^ 1, type); } -static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew, - int slot_num, bool is_predicated) +static intptr_t get_result_qreg(DisasContext *ctx, int qnum) { - TCGLabel *label_end = NULL; - intptr_t dstoff; - - if (is_predicated) { - TCGv cancelled = tcg_temp_new(); - label_end = gen_new_label(); - - /* Don't do anything if the slot was cancelled */ - tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); - tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); - } - - dstoff = offsetof(CPUHexagonState, future_QRegs[num]); - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMQReg), sizeof(MMQReg)); - - if (is_predicated) { - tcg_gen_ori_tl(hex_QRegs_updated, hex_QRegs_updated, 1 << num); - gen_set_label(label_end); - } + return offsetof(CPUHexagonState, future_QRegs[qnum]); } static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index a29f61bb4f..0200a66cb6 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -89,6 +89,7 @@ def calculate_attribs(): add_qemu_macro_attrib('fWRITE_P3', 'A_WRITES_PRED_REG') add_qemu_macro_attrib('fSET_OVERFLOW', 'A_IMPLICIT_WRITES_USR') add_qemu_macro_attrib('fSET_LPCFG', 'A_IMPLICIT_WRITES_USR') + add_qemu_macro_attrib('fLOAD', 'A_SCALAR_LOAD') add_qemu_macro_attrib('fSTORE', 'A_SCALAR_STORE') # Recurse down macros, find attributes from sub-macros @@ -236,6 +237,13 @@ def helper_needs_next_PC(tag): def need_pkt_has_multi_cof(tag): return 'A_COF' in attribdict[tag] +def need_condexec_reg(tag, regs): + if 'A_CONDEXEC' in attribdict[tag]: + for regtype, regid, toss, numregs in regs: + if is_writeonly(regid) and not is_hvx_reg(regtype): + return True + return False + def skip_qemu_helper(tag): return tag in overrides.keys() diff --git a/target/hexagon/idef-parser/idef-parser.h b/target/hexagon/idef-parser/idef-parser.h index 17d2ebfaf6..d23e71f13b 100644 --- a/target/hexagon/idef-parser/idef-parser.h +++ b/target/hexagon/idef-parser/idef-parser.h @@ -82,7 +82,6 @@ enum ImmUnionTag { VALUE, QEMU_TMP, IMM_PC, - IMM_NPC, IMM_CONSTEXT, }; diff --git a/target/hexagon/idef-parser/idef-parser.lex b/target/hexagon/idef-parser/idef-parser.lex index ff87a02c3a..5eb8ac5a80 100644 --- a/target/hexagon/idef-parser/idef-parser.lex +++ b/target/hexagon/idef-parser/idef-parser.lex @@ -5,7 +5,7 @@ %{ /* - * Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved. + * Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -140,8 +140,6 @@ STRING_LIT \"(\\.|[^"\\])*\" yylval->rvalue.is_dotnew = true; yylval->rvalue.signedness = SIGNED; return PRED; } -"IV1DEAD()" | -"fPAUSE(uiV);" { return ';'; } "+=" { return INC; } "-=" { return DEC; } "++" { return PLUSPLUS; } @@ -159,9 +157,8 @@ STRING_LIT \"(\\.|[^"\\])*\" "else" { return ELSE; } "for" { return FOR; } "fREAD_IREG" { return ICIRC; } -"fPART1" { return PART1; } "if" { return IF; } -"fFRAME_SCRAMBLE" { return FSCR; } +"fFRAME_SCRAMBLE" | "fFRAME_UNSCRAMBLE" { return FSCR; } "fFRAMECHECK" { return FCHK; } "Constant_extended" { return CONSTEXT; } @@ -312,14 +309,10 @@ STRING_LIT \"(\\.|[^"\\])*\" "(unsigned int)" { yylval->cast.bit_width = 32; yylval->cast.signedness = UNSIGNED; return CAST; } -"fREAD_PC()" | -"PC" { return PC; } -"fREAD_NPC()" | -"NPC" { return NPC; } -"fGET_LPCFG" | +"fREAD_PC()" { return PC; } "USR.LPCFG" { return LPCFG; } "LOAD_CANCEL(EA)" { return LOAD_CANCEL; } -"STORE_CANCEL(EA)" | +"STORE_CANCEL(EA)" { return STORE_CANCEL; } "CANCEL" { return CANCEL; } "N"{LOWER_ID}"N" { yylval->rvalue.type = REGISTER_ARG; yylval->rvalue.reg.type = DOTNEW; @@ -360,14 +353,6 @@ STRING_LIT \"(\\.|[^"\\])*\" yylval->rvalue.bit_width = 32; yylval->rvalue.signedness = UNSIGNED; return REG; } -"fREAD_LC"[01] { yylval->rvalue.type = REGISTER; - yylval->rvalue.reg.type = CONTROL; - yylval->rvalue.reg.id = HEX_REG_LC0 - + (yytext[8] - '0') * 2; - yylval->rvalue.reg.bit_width = 32; - yylval->rvalue.bit_width = 32; - yylval->rvalue.signedness = UNSIGNED; - return REG; } "LC"[01] { yylval->rvalue.type = REGISTER; yylval->rvalue.reg.type = CONTROL; yylval->rvalue.reg.id = HEX_REG_LC0 @@ -376,14 +361,6 @@ STRING_LIT \"(\\.|[^"\\])*\" yylval->rvalue.bit_width = 32; yylval->rvalue.signedness = UNSIGNED; return REG; } -"fREAD_SA"[01] { yylval->rvalue.type = REGISTER; - yylval->rvalue.reg.type = CONTROL; - yylval->rvalue.reg.id = HEX_REG_SA0 - + (yytext[8] - '0') * 2; - yylval->rvalue.reg.bit_width = 32; - yylval->rvalue.bit_width = 32; - yylval->rvalue.signedness = UNSIGNED; - return REG; } "SA"[01] { yylval->rvalue.type = REGISTER; yylval->rvalue.reg.type = CONTROL; yylval->rvalue.reg.id = HEX_REG_SA0 diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y index c784726d41..7d05773b67 100644 --- a/target/hexagon/idef-parser/idef-parser.y +++ b/target/hexagon/idef-parser/idef-parser.y @@ -1,6 +1,6 @@ %{ /* - * Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved. + * Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -52,8 +52,8 @@ %token IN INAME VAR %token ABS CROUND ROUND CIRCADD COUNTONES INC DEC ANDA ORA XORA PLUSPLUS ASL %token ASR LSR EQ NEQ LTE GTE MIN MAX ANDL FOR ICIRC IF MUN FSCR FCHK SXT -%token ZXT CONSTEXT LOCNT BREV SIGN LOAD STORE PC NPC LPCFG -%token LOAD_CANCEL CANCEL IDENTITY PART1 ROTL INSBITS SETBITS EXTRANGE +%token ZXT CONSTEXT LOCNT BREV SIGN LOAD STORE PC LPCFG +%token LOAD_CANCEL STORE_CANCEL CANCEL IDENTITY ROTL INSBITS SETBITS EXTRANGE %token CAST4_8U FAIL CARRY_FROM_ADD ADDSAT64 LSBNEW %token TYPE_SIZE_T TYPE_INT TYPE_SIGNED TYPE_UNSIGNED TYPE_LONG @@ -336,15 +336,6 @@ assign_statement : lvalue '=' rvalue OUT(c, &@1, &$1, " = ", &$3, ";\n"); $$ = $1; } - | PC '=' rvalue - { - @1.last_column = @3.last_column; - yyassert(c, &@1, !is_inside_ternary(c), - "Assignment side-effect not modeled!"); - $3 = gen_rvalue_truncate(c, &@1, &$3); - $3 = rvalue_materialize(c, &@1, &$3); - OUT(c, &@1, "gen_write_new_pc(", &$3, ");\n"); - } | LOAD '(' IMM ',' IMM ',' SIGN ',' var ',' lvalue ')' { @1.last_column = @12.last_column; @@ -412,7 +403,6 @@ control_statement : frame_check | cancel_statement | if_statement | for_statement - | fpart1_statement ; frame_check : FCHK '(' rvalue ',' rvalue ')' ';' @@ -422,10 +412,11 @@ cancel_statement : LOAD_CANCEL { gen_load_cancel(c, &@1); } - | CANCEL + | STORE_CANCEL { gen_cancel(c, &@1); } + | CANCEL ; if_statement : if_stmt @@ -462,17 +453,6 @@ for_statement : FOR '(' IMM '=' IMM ';' IMM '<' IMM ';' IMM PLUSPLUS ')' } ; -fpart1_statement : PART1 - { - OUT(c, &@1, "if (insn->part1) {\n"); - } - '(' statements ')' - { - @1.last_column = @3.last_column; - OUT(c, &@1, "return; }\n"); - } - ; - if_stmt : IF '(' rvalue ')' { @1.last_column = @3.last_column; @@ -512,20 +492,6 @@ rvalue : FAIL rvalue.signedness = UNSIGNED; $$ = rvalue; } - | NPC - { - /* - * NPC is only read from CALLs, so we can hardcode it - * at translation time - */ - HexValue rvalue; - memset(&rvalue, 0, sizeof(HexValue)); - rvalue.type = IMMEDIATE; - rvalue.imm.type = IMM_NPC; - rvalue.bit_width = 32; - rvalue.signedness = UNSIGNED; - $$ = rvalue; - } | CONSTEXT { HexValue rvalue; @@ -781,11 +747,6 @@ rvalue : FAIL /* Ones count */ $$ = gen_ctpop_op(c, &@1, &$3); } - | LPCFG - { - $$ = gen_tmp(c, &@1, 32, UNSIGNED); - OUT(c, &@1, "GET_USR_FIELD(USR_LPCFG, ", &$$, ");\n"); - } | EXTRACT '(' rvalue ',' rvalue ')' { @1.last_column = @6.last_column; diff --git a/target/hexagon/idef-parser/macros.inc b/target/hexagon/idef-parser/macros.inc index 6b697da87a..7478d4db17 100644 --- a/target/hexagon/idef-parser/macros.inc +++ b/target/hexagon/idef-parser/macros.inc @@ -97,16 +97,8 @@ #define fWRITE_LR(A) (LR = A) #define fWRITE_FP(A) (FP = A) #define fWRITE_SP(A) (SP = A) -/* - * Note: There is a rule in the parser that matches `PC = ...` and emits - * a call to `gen_write_new_pc`. We need to call `gen_write_new_pc` to - * get the correct semantics when there are multiple stores in a packet. - */ -#define fBRANCH(LOC, TYPE) (PC = LOC) -#define fJUMPR(REGNO, TARGET, TYPE) (PC = TARGET) #define fWRITE_LOOP_REGS0(START, COUNT) SA0 = START; (LC0 = COUNT) #define fWRITE_LOOP_REGS1(START, COUNT) SA1 = START; (LC1 = COUNT) -#define fWRITE_LC0(VAL) (LC0 = VAL) #define fWRITE_LC1(VAL) (LC1 = VAL) #define fSET_LPCFG(VAL) (USR.LPCFG = VAL) #define fWRITE_P0(VAL) P0 = VAL; @@ -121,7 +113,6 @@ #define fEA_GPI(IMM) (EA = fREAD_GP() + IMM) #define fPM_I(REG, IMM) (REG = REG + IMM) #define fPM_M(REG, MVAL) (REG = REG + MVAL) -#define fWRITE_NPC(VAL) (PC = VAL) /* Unary operators */ #define fROUND(A) (A + 0x8000) diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index e1a55412c8..18cde6a1be 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 rev.ng Labs Srl. All Rights Reserved. + * Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -185,9 +185,6 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) case IMM_PC: EMIT(c, "ctx->base.pc_next"); break; - case IMM_NPC: - EMIT(c, "ctx->npc"); - break; case IMM_CONSTEXT: EMIT(c, "insn->extension_valid"); break; @@ -1323,10 +1320,6 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value) locp, "gen_log_reg_write(", ®->reg.id, ", ", &value_m, ");\n"); - OUT(c, - locp, - "ctx_log_reg_write(ctx, ", ®->reg.id, - ");\n"); } void gen_assign(Context *c, @@ -1675,9 +1668,7 @@ void gen_inst_init_args(Context *c, YYLTYPE *locp) for (unsigned i = 0; i < c->inst.init_list->len; i++) { HexValue *val = &g_array_index(c->inst.init_list, HexValue, i); if (val->type == REGISTER_ARG) { - char reg_id[5]; - reg_compose(c, locp, &val->reg, reg_id); - EMIT_HEAD(c, "tcg_gen_movi_i%u(%s, 0);\n", val->bit_width, reg_id); + /* Nothing to do here */ } else if (val->type == PREDICATE) { char suffix = val->is_dotnew ? 'N' : 'V'; EMIT_HEAD(c, "tcg_gen_movi_i%u(P%c%c, 0);\n", val->bit_width, @@ -1722,13 +1713,10 @@ void gen_pred_assign(Context *c, YYLTYPE *locp, HexValue *left_pred, *left_pred = gen_tmp(c, locp, 32, UNSIGNED); } /* Extract first 8 bits, and store new predicate value */ - OUT(c, locp, "tcg_gen_mov_i32(", left_pred, ", ", &r, ");\n"); - OUT(c, locp, "tcg_gen_andi_i32(", left_pred, ", ", left_pred, - ", 0xff);\n"); + OUT(c, locp, "tcg_gen_andi_i32(", left_pred, ", ", &r, ", 0xff);\n"); if (is_direct) { OUT(c, locp, "gen_log_pred_write(ctx, ", pred_id, ", ", left_pred, ");\n"); - OUT(c, locp, "ctx_log_pred_write(ctx, ", pred_id, ");\n"); } } @@ -1739,7 +1727,6 @@ void gen_cancel(Context *c, YYLTYPE *locp) void gen_load_cancel(Context *c, YYLTYPE *locp) { - gen_cancel(c, locp); OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n"); OUT(c, locp, "ctx->s1_store_processed = false;\n"); OUT(c, locp, "process_store(ctx, 1);\n"); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 17facadaad..482a9c787f 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -205,26 +205,11 @@ static inline void gen_cancel(uint32_t slot) #define CANCEL gen_cancel(slot); #else -#define CANCEL cancel_slot(env, slot) +#define CANCEL do { } while (0) #endif #define LOAD_CANCEL(EA) do { CANCEL; } while (0) -#ifdef QEMU_GENERATE -static inline void gen_pred_cancel(TCGv pred, uint32_t slot_num) - { - TCGv slot_mask = tcg_temp_new(); - TCGv tmp = tcg_temp_new(); - TCGv zero = tcg_constant_tl(0); - tcg_gen_ori_tl(slot_mask, hex_slot_cancelled, 1 << slot_num); - tcg_gen_andi_tl(tmp, pred, 1); - tcg_gen_movcond_tl(TCG_COND_EQ, hex_slot_cancelled, tmp, zero, - slot_mask, hex_slot_cancelled); -} -#define PRED_LOAD_CANCEL(PRED, EA) \ - gen_pred_cancel(PRED, insn->is_endloop ? 4 : insn->slot) -#endif - #define STORE_CANCEL(EA) { env->slot_cancelled |= (1 << slot); } #define fMAX(A, B) (((A) > (B)) ? (A) : (B)) @@ -415,16 +400,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC) #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR) #define fHINTJR(TARGET) { /* Not modelled in qemu */} -#define fCALL(A) \ - do { \ - fWRITE_LR(fREAD_NPC()); \ - fBRANCH(A, COF_TYPE_CALL); \ - } while (0) -#define fCALLR(A) \ - do { \ - fWRITE_LR(fREAD_NPC()); \ - fBRANCH(A, COF_TYPE_CALLR); \ - } while (0) #define fWRITE_LOOP_REGS0(START, COUNT) \ do { \ WRITE_RREG(HEX_REG_LC0, COUNT); \ diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index 42b03c81e6..da8e608d00 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -1,5 +1,5 @@ ## -## Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -276,4 +276,13 @@ tcg_funcs_generated = custom_target( ) hexagon_ss.add(tcg_funcs_generated) +analyze_funcs_generated = custom_target( + 'analyze_funcs_generated.c.inc', + output: 'analyze_funcs_generated.c.inc', + depends: helper_dep, + depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h], + command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'], +) +hexagon_ss.add(analyze_funcs_generated) + target_arch += {'hexagon': hexagon_ss} diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 35449ef524..c9a156030e 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include "mmvec/mmvec.h" #include "mmvec/macros.h" #include "op_helper.h" +#include "translate.h" #define SF_BIAS 127 #define SF_MANTBITS 23 @@ -105,30 +106,6 @@ void log_store64(CPUHexagonState *env, target_ulong addr, env->mem_log_stores[slot].data64 = val; } -void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, - target_ulong addr) -{ - HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr); - - if (pkt_has_multi_cof) { - /* - * If more than one branch is taken in a packet, only the first one - * is actually done. - */ - if (env->branch_taken) { - HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, " - "ignoring the second one\n"); - } else { - fCHECK_PCALIGN(addr); - env->gpr[HEX_REG_PC] = addr; - env->branch_taken = 1; - } - } else { - fCHECK_PCALIGN(addr); - env->gpr[HEX_REG_PC] = addr; - } -} - /* Handy place to set a breakpoint */ void HELPER(debug_start_packet)(CPUHexagonState *env) { @@ -439,9 +416,10 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env, return PeV; } -static void probe_store(CPUHexagonState *env, int slot, int mmu_idx) +static void probe_store(CPUHexagonState *env, int slot, int mmu_idx, + bool is_predicated) { - if (!(env->slot_cancelled & (1 << slot))) { + if (!is_predicated || !(env->slot_cancelled & (1 << slot))) { size1u_t width = env->mem_log_stores[slot].width; target_ulong va = env->mem_log_stores[slot].va; uintptr_t ra = GETPC(); @@ -461,9 +439,12 @@ void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va, } /* Called during packet commit when there are two scalar stores */ -void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx) +void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args) { - probe_store(env, 0, mmu_idx); + int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX); + bool is_predicated = + FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED); + probe_store(env, 0, mmu_idx, is_predicated); } void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx) @@ -510,15 +491,18 @@ void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx) void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask, int mmu_idx) { - bool has_st0 = (mask >> 0) & 1; - bool has_st1 = (mask >> 1) & 1; - bool has_hvx_stores = (mask >> 2) & 1; + bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0); + bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1); + bool has_hvx_stores = + FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES); + bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED); + bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED); if (has_st0) { - probe_store(env, 0, mmu_idx); + probe_store(env, 0, mmu_idx, s0_is_pred); } if (has_st1) { - probe_store(env, 1, mmu_idx); + probe_store(env, 1, mmu_idx, s1_is_pred); } if (has_hvx_stores) { HELPER(probe_hvx_stores)(env, mmu_idx); @@ -1193,7 +1177,7 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV, { float32 neg_RsV; arch_fpop_start(env); - neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status); + neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1); RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status); arch_fpop_end(env); return RxV; @@ -1468,12 +1452,6 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) } } -void cancel_slot(CPUHexagonState *env, uint32_t slot) -{ - HEX_DEBUG_LOG("Slot %d cancelled\n", slot); - env->slot_cancelled |= (1 << slot); -} - /* These macros can be referenced in the generated helper functions */ #define warn(...) /* Nothing */ #define fatal(...) g_assert_not_reached(); diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h index 02347edee8..34b3a53975 100644 --- a/target/hexagon/op_helper.h +++ b/target/hexagon/op_helper.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,7 +19,6 @@ #define HEXAGON_OP_HELPER_H /* Misc functions */ -void cancel_slot(CPUHexagonState *env, uint32_t slot); void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr); uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 93fd1b55e3..665476ab48 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,6 +29,15 @@ #include "translate.h" #include "printinsn.h" +#include "analyze_funcs_generated.c.inc" + +typedef void (*AnalyzeInsn)(DisasContext *ctx); +static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { +#define OPCODE(X) [X] = analyze_##X +#include "opcodes_def_generated.h.inc" +#undef OPCODE +}; + TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_pred[NUM_PREGS]; TCGv hex_this_PC; @@ -47,8 +56,6 @@ TCGv hex_dczero_addr; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; -TCGv hex_VRegs_updated; -TCGv hex_QRegs_updated; TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; @@ -239,7 +246,15 @@ static bool check_for_attrib(Packet *pkt, int attrib) static bool need_slot_cancelled(Packet *pkt) { - return check_for_attrib(pkt, A_CONDEXEC); + /* We only need slot_cancelled for conditional store instructions */ + for (int i = 0; i < pkt->num_insns; i++) { + uint16_t opcode = pkt->insn[i].opcode; + if (GET_ATTRIB(opcode, A_CONDEXEC) && + GET_ATTRIB(opcode, A_SCALAR_STORE)) { + return true; + } + } + return false; } static bool need_pred_written(Packet *pkt) @@ -265,6 +280,77 @@ static bool need_next_PC(DisasContext *ctx) return false; } +/* + * The opcode_analyze functions mark most of the writes in a packet + * However, there are some implicit writes marked as attributes + * of the applicable instructions. + */ +static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) +{ + uint16_t opcode = ctx->insn->opcode; + if (GET_ATTRIB(opcode, attrib)) { + /* + * USR is used to set overflow and FP exceptions, + * so treat it as conditional + */ + bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || + rnum == HEX_REG_USR; + + /* LC0/LC1 is conditionally written by endloop instructions */ + if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && + (opcode == J2_endloop0 || + opcode == J2_endloop1 || + opcode == J2_endloop01)) { + is_predicated = true; + } + + ctx_log_reg_write(ctx, rnum, is_predicated); + } +} + +static void mark_implicit_reg_writes(DisasContext *ctx) +{ + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); + mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); + mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); +} + +static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) +{ + if (GET_ATTRIB(ctx->insn->opcode, attrib)) { + ctx_log_pred_write(ctx, pnum); + } +} + +static void mark_implicit_pred_writes(DisasContext *ctx) +{ + mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); + mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); + mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); + mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); +} + +static void analyze_packet(DisasContext *ctx) +{ + Packet *pkt = ctx->pkt; + ctx->need_pkt_has_store_s1 = false; + for (int i = 0; i < pkt->num_insns; i++) { + Insn *insn = &pkt->insn[i]; + ctx->insn = insn; + if (opcode_analyze[insn->opcode]) { + opcode_analyze[insn->opcode](ctx); + } + mark_implicit_reg_writes(ctx); + mark_implicit_pred_writes(ctx); + } +} + static void gen_start_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -275,6 +361,7 @@ static void gen_start_packet(DisasContext *ctx) ctx->next_PC = next_PC; ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); + bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); ctx->future_vregs_idx = 0; @@ -283,14 +370,27 @@ static void gen_start_packet(DisasContext *ctx) bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); bitmap_zero(ctx->vregs_updated, NUM_VREGS); bitmap_zero(ctx->vregs_select, NUM_VREGS); + bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); + bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); ctx->qreg_log_idx = 0; for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; } - tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); ctx->s1_store_processed = false; ctx->pre_commit = true; + analyze_packet(ctx); + + if (ctx->need_pkt_has_store_s1) { + tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); + } + + /* + * pregs_written is used both in the analyze phase as well as the code + * gen phase, so clear it again. + */ + bitmap_zero(ctx->pregs_written, NUM_PREGS); + if (HEX_DEBUG) { /* Handy place to set a breakpoint before the packet executes */ gen_helper_debug_start_packet(cpu_env); @@ -313,9 +413,42 @@ static void gen_start_packet(DisasContext *ctx) tcg_gen_movi_tl(hex_pred_written, 0); } - if (pkt->pkt_has_hvx) { - tcg_gen_movi_tl(hex_VRegs_updated, 0); - tcg_gen_movi_tl(hex_QRegs_updated, 0); + /* Preload the predicated registers into hex_new_value[i] */ + if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { + int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); + while (i < TOTAL_PER_THREAD_REGS) { + tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); + i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, + i + 1); + } + } + + /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ + if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { + int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); + while (i < NUM_VREGS) { + const intptr_t VdV_off = + ctx_future_vreg_off(ctx, i, 1, true); + intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); + tcg_gen_gvec_mov(MO_64, VdV_off, + src_off, + sizeof(MMVector), + sizeof(MMVector)); + i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1); + } + } + if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) { + int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS); + while (i < NUM_VREGS) { + const intptr_t VdV_off = + ctx_tmp_vreg_off(ctx, i, 1, true); + intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); + tcg_gen_gvec_mov(MO_64, VdV_off, + src_off, + sizeof(MMVector), + sizeof(MMVector)); + i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); + } } } @@ -336,66 +469,6 @@ bool is_gather_store_insn(DisasContext *ctx) return false; } -/* - * The LOG_*_WRITE macros mark most of the writes in a packet - * However, there are some implicit writes marked as attributes - * of the applicable instructions. - */ -static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) -{ - uint16_t opcode = ctx->insn->opcode; - if (GET_ATTRIB(opcode, attrib)) { - /* - * USR is used to set overflow and FP exceptions, - * so treat it as conditional - */ - bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || - rnum == HEX_REG_USR; - - /* LC0/LC1 is conditionally written by endloop instructions */ - if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && - (opcode == J2_endloop0 || - opcode == J2_endloop1 || - opcode == J2_endloop01)) { - is_predicated = true; - } - - if (is_predicated && !is_preloaded(ctx, rnum)) { - tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]); - } - - ctx_log_reg_write(ctx, rnum); - } -} - -static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) -{ - if (GET_ATTRIB(ctx->insn->opcode, attrib)) { - ctx_log_pred_write(ctx, pnum); - } -} - -static void mark_implicit_reg_writes(DisasContext *ctx) -{ - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); - mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); - mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); -} - -static void mark_implicit_pred_writes(DisasContext *ctx) -{ - mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); - mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); - mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); - mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); -} - static void mark_store_width(DisasContext *ctx) { uint16_t opcode = ctx->insn->opcode; @@ -423,9 +496,7 @@ static void mark_store_width(DisasContext *ctx) static void gen_insn(DisasContext *ctx) { if (ctx->insn->generate) { - mark_implicit_reg_writes(ctx); ctx->insn->generate(ctx); - mark_implicit_pred_writes(ctx); mark_store_width(ctx); } else { gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); @@ -646,65 +717,31 @@ static void gen_commit_hvx(DisasContext *ctx) /* * for (i = 0; i < ctx->vreg_log_idx; i++) { * int rnum = ctx->vreg_log[i]; - * if (ctx->vreg_is_predicated[i]) { - * if (env->VRegs_updated & (1 << rnum)) { - * env->VRegs[rnum] = env->future_VRegs[rnum]; - * } - * } else { - * env->VRegs[rnum] = env->future_VRegs[rnum]; - * } + * env->VRegs[rnum] = env->future_VRegs[rnum]; * } */ for (i = 0; i < ctx->vreg_log_idx; i++) { int rnum = ctx->vreg_log[i]; - bool is_predicated = ctx->vreg_is_predicated[i]; intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); size_t size = sizeof(MMVector); - if (is_predicated) { - TCGv cmp = tcg_temp_new(); - TCGLabel *label_skip = gen_new_label(); - - tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum); - tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip); - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - gen_set_label(label_skip); - } else { - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - } + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); } /* * for (i = 0; i < ctx->qreg_log_idx; i++) { * int rnum = ctx->qreg_log[i]; - * if (ctx->qreg_is_predicated[i]) { - * if (env->QRegs_updated) & (1 << rnum)) { - * env->QRegs[rnum] = env->future_QRegs[rnum]; - * } - * } else { - * env->QRegs[rnum] = env->future_QRegs[rnum]; - * } + * env->QRegs[rnum] = env->future_QRegs[rnum]; * } */ for (i = 0; i < ctx->qreg_log_idx; i++) { int rnum = ctx->qreg_log[i]; - bool is_predicated = ctx->qreg_is_predicated[i]; intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); size_t size = sizeof(MMQReg); - if (is_predicated) { - TCGv cmp = tcg_temp_new(); - TCGLabel *label_skip = gen_new_label(); - - tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum); - tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip); - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - gen_set_label(label_skip); - } else { - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - } + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); } if (pkt_has_hvx_store(ctx->pkt)) { @@ -775,13 +812,27 @@ static void gen_commit_packet(DisasContext *ctx) TCGv mask_tcgv; if (has_store_s0) { - mask |= (1 << 0); + mask = + FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1); } if (has_store_s1) { - mask |= (1 << 1); + mask = + FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1); } if (has_hvx_store) { - mask |= (1 << 2); + mask = + FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, + HAS_HVX_STORES, 1); + } + if (has_store_s0 && slot_is_predicated(pkt, 0)) { + mask = + FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, + S0_IS_PRED, 1); + } + if (has_store_s1 && slot_is_predicated(pkt, 1)) { + mask = + FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, + S1_IS_PRED, 1); } mask_tcgv = tcg_constant_tl(mask); gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx); @@ -791,8 +842,15 @@ static void gen_commit_packet(DisasContext *ctx) * process_store_log will execute the slot 1 store first, * so we only have to probe the store in slot 0 */ - TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); - gen_helper_probe_pkt_scalar_store_s0(cpu_env, mem_idx); + int args = 0; + args = + FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx); + if (slot_is_predicated(pkt, 0)) { + args = + FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1); + } + TCGv args_tcgv = tcg_constant_tl(args); + gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv); } process_store_log(ctx); @@ -1029,10 +1087,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val), "llsc_val"); hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env, offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); - hex_VRegs_updated = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated"); - hex_QRegs_updated = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated"); for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index d971f4f095..db832b0f88 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,6 +38,7 @@ typedef struct DisasContext { int reg_log[REG_WRITES_MAX]; int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); + DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); @@ -48,52 +49,54 @@ typedef struct DisasContext { int tmp_vregs_idx; int tmp_vregs_num[VECTOR_TEMPS_MAX]; int vreg_log[NUM_VREGS]; - bool vreg_is_predicated[NUM_VREGS]; int vreg_log_idx; DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS); DECLARE_BITMAP(vregs_updated, NUM_VREGS); DECLARE_BITMAP(vregs_select, NUM_VREGS); + DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS); + DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS); int qreg_log[NUM_QREGS]; - bool qreg_is_predicated[NUM_QREGS]; int qreg_log_idx; bool pre_commit; TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; + bool need_pkt_has_store_s1; } DisasContext; -static inline void ctx_log_reg_write(DisasContext *ctx, int rnum) -{ - if (test_bit(rnum, ctx->regs_written)) { - HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum); - } - ctx->reg_log[ctx->reg_log_idx] = rnum; - ctx->reg_log_idx++; - set_bit(rnum, ctx->regs_written); -} - -static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum) -{ - ctx_log_reg_write(ctx, rnum); - ctx_log_reg_write(ctx, rnum + 1); -} - static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) { - ctx->preg_log[ctx->preg_log_idx] = pnum; - ctx->preg_log_idx++; - set_bit(pnum, ctx->pregs_written); + if (!test_bit(pnum, ctx->pregs_written)) { + ctx->preg_log[ctx->preg_log_idx] = pnum; + ctx->preg_log_idx++; + set_bit(pnum, ctx->pregs_written); + } } -static inline bool is_preloaded(DisasContext *ctx, int num) +static inline void ctx_log_reg_write(DisasContext *ctx, int rnum, + bool is_predicated) { - return test_bit(num, ctx->regs_written); + if (rnum == HEX_REG_P3_0_ALIASED) { + for (int i = 0; i < NUM_PREGS; i++) { + ctx_log_pred_write(ctx, i); + } + } else { + if (!test_bit(rnum, ctx->regs_written)) { + ctx->reg_log[ctx->reg_log_idx] = rnum; + ctx->reg_log_idx++; + set_bit(rnum, ctx->regs_written); + } + if (is_predicated) { + set_bit(rnum, ctx->predicated_regs); + } + } } -static inline bool is_vreg_preloaded(DisasContext *ctx, int num) +static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum, + bool is_predicated) { - return test_bit(num, ctx->vregs_updated) || - test_bit(num, ctx->vregs_updated_tmp); + ctx_log_reg_write(ctx, rnum, is_predicated); + ctx_log_reg_write(ctx, rnum + 1, is_predicated); } intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, @@ -106,17 +109,25 @@ static inline void ctx_log_vreg_write(DisasContext *ctx, bool is_predicated) { if (type != EXT_TMP) { - ctx->vreg_log[ctx->vreg_log_idx] = rnum; - ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated; - ctx->vreg_log_idx++; + if (!test_bit(rnum, ctx->vregs_updated)) { + ctx->vreg_log[ctx->vreg_log_idx] = rnum; + ctx->vreg_log_idx++; + set_bit(rnum, ctx->vregs_updated); + } set_bit(rnum, ctx->vregs_updated); + if (is_predicated) { + set_bit(rnum, ctx->predicated_future_vregs); + } } if (type == EXT_NEW) { set_bit(rnum, ctx->vregs_select); } if (type == EXT_TMP) { set_bit(rnum, ctx->vregs_updated_tmp); + if (is_predicated) { + set_bit(rnum, ctx->predicated_tmp_vregs); + } } } @@ -129,10 +140,9 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx, } static inline void ctx_log_qreg_write(DisasContext *ctx, - int rnum, bool is_predicated) + int rnum) { ctx->qreg_log[ctx->qreg_log_idx] = rnum; - ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated; ctx->qreg_log_idx++; } @@ -153,12 +163,20 @@ extern TCGv hex_dczero_addr; extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; -extern TCGv hex_VRegs_updated; -extern TCGv hex_QRegs_updated; extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX]; bool is_gather_store_insn(DisasContext *ctx); void process_store(DisasContext *ctx, int slot_num); + +FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2) +FIELD(PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 2, 1) + +FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 0, 1) +FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1, 1) +FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES, 2, 1) +FIELD(PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED, 3, 1) +FIELD(PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED, 4, 1) + #endif diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT Binary files differindex 0b475fb5a9..32d255cfc0 100644 --- a/tests/data/acpi/pc/DSDT +++ b/tests/data/acpi/pc/DSDT diff --git a/tests/data/acpi/pc/DSDT.acpierst b/tests/data/acpi/pc/DSDT.acpierst Binary files differindex 17ef7caeb6..33e872b2fa 100644 --- a/tests/data/acpi/pc/DSDT.acpierst +++ b/tests/data/acpi/pc/DSDT.acpierst diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat Binary files differindex 675b674eaa..cd84abc1b1 100644 --- a/tests/data/acpi/pc/DSDT.acpihmat +++ b/tests/data/acpi/pc/DSDT.acpihmat diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge Binary files differindex c1ce061366..69a73ea2a6 100644 --- a/tests/data/acpi/pc/DSDT.bridge +++ b/tests/data/acpi/pc/DSDT.bridge diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp Binary files differindex 754ab854dc..20379056b3 100644 --- a/tests/data/acpi/pc/DSDT.cphp +++ b/tests/data/acpi/pc/DSDT.cphp diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm Binary files differindex 170503336b..435496e836 100644 --- a/tests/data/acpi/pc/DSDT.dimmpxm +++ b/tests/data/acpi/pc/DSDT.dimmpxm diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge Binary files differindex 834c27002e..b6eafab250 100644 --- a/tests/data/acpi/pc/DSDT.hpbridge +++ b/tests/data/acpi/pc/DSDT.hpbridge diff --git a/tests/data/acpi/pc/DSDT.hpbrroot b/tests/data/acpi/pc/DSDT.hpbrroot Binary files differindex a71ed4fbaa..a4073f36d6 100644 --- a/tests/data/acpi/pc/DSDT.hpbrroot +++ b/tests/data/acpi/pc/DSDT.hpbrroot diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs Binary files differindex dd71356027..06aa7bfdec 100644 --- a/tests/data/acpi/pc/DSDT.ipmikcs +++ b/tests/data/acpi/pc/DSDT.ipmikcs diff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp Binary files differindex 2f895e9b38..10a0e44d61 100644 --- a/tests/data/acpi/pc/DSDT.memhp +++ b/tests/data/acpi/pc/DSDT.memhp diff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/pc/DSDT.nohpet Binary files differindex c012b63ace..6905312d82 100644 --- a/tests/data/acpi/pc/DSDT.nohpet +++ b/tests/data/acpi/pc/DSDT.nohpet diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem Binary files differindex f2ef4b9729..59e31338ee 100644 --- a/tests/data/acpi/pc/DSDT.numamem +++ b/tests/data/acpi/pc/DSDT.numamem diff --git a/tests/data/acpi/pc/DSDT.roothp b/tests/data/acpi/pc/DSDT.roothp Binary files differindex 657c8263f0..448d596cf4 100644 --- a/tests/data/acpi/pc/DSDT.roothp +++ b/tests/data/acpi/pc/DSDT.roothp diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT Binary files differindex d68c472b46..720e8cbbbb 100644 --- a/tests/data/acpi/q35/DSDT +++ b/tests/data/acpi/q35/DSDT diff --git a/tests/data/acpi/q35/DSDT.acpierst b/tests/data/acpi/q35/DSDT.acpierst Binary files differindex de7ae27125..f26b1f2a19 100644 --- a/tests/data/acpi/q35/DSDT.acpierst +++ b/tests/data/acpi/q35/DSDT.acpierst diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat Binary files differindex 48e2862257..86771f1746 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat +++ b/tests/data/acpi/q35/DSDT.acpihmat diff --git a/tests/data/acpi/q35/DSDT.acpihmat-noinitiator b/tests/data/acpi/q35/DSDT.acpihmat-noinitiator Binary files differindex 30a4aa2ec8..a894a2d16c 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat-noinitiator +++ b/tests/data/acpi/q35/DSDT.acpihmat-noinitiator diff --git a/tests/data/acpi/q35/DSDT.applesmc b/tests/data/acpi/q35/DSDT.applesmc Binary files differindex 84e2b5cbc4..276ae1df51 100644 --- a/tests/data/acpi/q35/DSDT.applesmc +++ b/tests/data/acpi/q35/DSDT.applesmc diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge Binary files differindex e411d40fd1..9f8a208aaa 100644 --- a/tests/data/acpi/q35/DSDT.bridge +++ b/tests/data/acpi/q35/DSDT.bridge diff --git a/tests/data/acpi/q35/DSDT.core-count2 b/tests/data/acpi/q35/DSDT.core-count2 Binary files differindex 0603db8cc6..2ec11fe3c3 100644 --- a/tests/data/acpi/q35/DSDT.core-count2 +++ b/tests/data/acpi/q35/DSDT.core-count2 diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp Binary files differindex beeb83c33b..612c85b1b4 100644 --- a/tests/data/acpi/q35/DSDT.cphp +++ b/tests/data/acpi/q35/DSDT.cphp diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl Binary files differindex 4586b9a18b..f049f414f0 100644 --- a/tests/data/acpi/q35/DSDT.cxl +++ b/tests/data/acpi/q35/DSDT.cxl diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm Binary files differindex 99a93e12a7..23dabeacb0 100644 --- a/tests/data/acpi/q35/DSDT.dimmpxm +++ b/tests/data/acpi/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt Binary files differindex 7f7601dbff..541bb70522 100644 --- a/tests/data/acpi/q35/DSDT.ipmibt +++ b/tests/data/acpi/q35/DSDT.ipmibt diff --git a/tests/data/acpi/q35/DSDT.ipmismbus b/tests/data/acpi/q35/DSDT.ipmismbus Binary files differindex 6c5d1afe44..e2d57a3318 100644 --- a/tests/data/acpi/q35/DSDT.ipmismbus +++ b/tests/data/acpi/q35/DSDT.ipmismbus diff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/q35/DSDT.ivrs Binary files differindex de7ae27125..f26b1f2a19 100644 --- a/tests/data/acpi/q35/DSDT.ivrs +++ b/tests/data/acpi/q35/DSDT.ivrs diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp Binary files differindex 79bce5c8f0..809d7e2f0f 100644 --- a/tests/data/acpi/q35/DSDT.memhp +++ b/tests/data/acpi/q35/DSDT.memhp diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 Binary files differindex c249929add..ab3fe3c1b5 100644 --- a/tests/data/acpi/q35/DSDT.mmio64 +++ b/tests/data/acpi/q35/DSDT.mmio64 diff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/q35/DSDT.multi-bridge Binary files differindex 66b39be294..9ae8ee0b41 100644 --- a/tests/data/acpi/q35/DSDT.multi-bridge +++ b/tests/data/acpi/q35/DSDT.multi-bridge diff --git a/tests/data/acpi/q35/DSDT.noacpihp b/tests/data/acpi/q35/DSDT.noacpihp Binary files differnew file mode 100644 index 0000000000..6ab1f0e525 --- /dev/null +++ b/tests/data/acpi/q35/DSDT.noacpihp diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet Binary files differindex 9ff9983a80..becb5f7cad 100644 --- a/tests/data/acpi/q35/DSDT.nohpet +++ b/tests/data/acpi/q35/DSDT.nohpet diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem Binary files differindex 1e7c45ef3c..0cdec0b4c5 100644 --- a/tests/data/acpi/q35/DSDT.numamem +++ b/tests/data/acpi/q35/DSDT.numamem diff --git a/tests/data/acpi/q35/DSDT.pvpanic-isa b/tests/data/acpi/q35/DSDT.pvpanic-isa Binary files differindex ed47451c44..6a9904ec94 100644 --- a/tests/data/acpi/q35/DSDT.pvpanic-isa +++ b/tests/data/acpi/q35/DSDT.pvpanic-isa diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12 Binary files differindex efc2efc19f..628bf628f6 100644 --- a/tests/data/acpi/q35/DSDT.tis.tpm12 +++ b/tests/data/acpi/q35/DSDT.tis.tpm12 diff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/q35/DSDT.tis.tpm2 Binary files differindex 675339715f..35c6b08068 100644 --- a/tests/data/acpi/q35/DSDT.tis.tpm2 +++ b/tests/data/acpi/q35/DSDT.tis.tpm2 diff --git a/tests/data/acpi/q35/DSDT.viot b/tests/data/acpi/q35/DSDT.viot Binary files differindex eeb40b360f..3ad4d26b7f 100644 --- a/tests/data/acpi/q35/DSDT.viot +++ b/tests/data/acpi/q35/DSDT.viot diff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/q35/DSDT.xapic Binary files differindex 3aa86f0724..d4a34e2351 100644 --- a/tests/data/acpi/q35/DSDT.xapic +++ b/tests/data/acpi/q35/DSDT.xapic diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index d29a4e47af..76d5100911 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -949,9 +949,14 @@ static void test_acpi_piix4_no_acpi_pci_hotplug(void) data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); test_acpi_one("-global PIIX4_PM.acpi-root-pci-hotplug=off " "-global PIIX4_PM.acpi-pci-hotplug-with-bridge-support=off " - "-device pci-bridge,chassis_nr=1 " - "-device pci-testdev,bus=pci.0 " - "-device pci-testdev,bus=pci.1", &data); + "-device pci-bridge,chassis_nr=1,addr=4.0 " + "-device pci-testdev,bus=pci.0,addr=5.0 " + "-device pci-testdev,bus=pci.0,addr=6.0,acpi-index=101 " + "-device pci-testdev,bus=pci.1,addr=1.0 " + "-device pci-testdev,bus=pci.1,addr=2.0,acpi-index=201 " + "-device pci-bridge,id=nhpbr,chassis_nr=2,shpc=off,addr=7.0 " + "-device pci-testdev,bus=nhpbr,addr=1.0,acpi-index=301 " + , &data); free_test_data(&data); } @@ -1002,18 +1007,42 @@ static void test_acpi_q35_tcg_bridge(void) free_test_data(&data); } +static void test_acpi_q35_tcg_no_acpi_hotplug(void) +{ + test_data data; + + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_Q35; + data.variant = ".noacpihp"; + data.required_struct_types = base_required_struct_types; + data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); + test_acpi_one("-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off" + " -device pci-testdev,bus=pcie.0,acpi-index=101,addr=3.0" + " -device pci-bridge,chassis_nr=1,id=shpcbr,addr=4.0" + " -device pci-testdev,bus=shpcbr,addr=1.0,acpi-index=201" + " -device pci-bridge,chassis_nr=2,shpc=off,id=noshpcbr,addr=5.0" + " -device pci-testdev,bus=noshpcbr,addr=1.0,acpi-index=301" + " -device pcie-root-port,id=hprp,port=0x0,chassis=1,addr=6.0" + " -device pci-testdev,bus=hprp,acpi-index=401" + " -device pcie-root-port,id=nohprp,port=0x0,chassis=2,hotplug=off," + "addr=7.0" + " -device pci-testdev,bus=nohprp,acpi-index=501" + " -device pcie-root-port,id=nohprpint,port=0x0,chassis=3,hotplug=off," + "multifunction=on,addr=8.0" + " -device pci-testdev,bus=nohprpint,acpi-index=601,addr=8.1" + " -device pcie-root-port,id=hprp2,port=0x0,chassis=4,bus=nohprpint," + "addr=9.0" + " -device pci-testdev,bus=hprp2,acpi-index=602" + , &data); + free_test_data(&data); +} + static void test_acpi_q35_multif_bridge(void) { test_data data = { .machine = MACHINE_Q35, .variant = ".multi-bridge", }; - - if (!qtest_has_device("pcie-root-port")) { - g_test_skip("Device pcie-root-port is not available"); - goto out; - } - test_vm_prepare("-S" " -device virtio-balloon,id=balloon0,addr=0x4.0x2" " -device pcie-root-port,id=rp0,multifunction=on," @@ -1025,9 +1054,14 @@ static void test_acpi_q35_multif_bridge(void) " -device pcie-root-port,id=rphptgt2,port=0x0,chassis=6,addr=2.2" " -device pcie-root-port,id=rphptgt3,port=0x0,chassis=7,addr=2.3" " -device pci-testdev,bus=pcie.0,addr=2.4" + " -device pci-testdev,bus=pcie.0,addr=2.5,acpi-index=102" " -device pci-testdev,bus=pcie.0,addr=5.0" + " -device pci-testdev,bus=pcie.0,addr=0xf.0,acpi-index=101" " -device pci-testdev,bus=rp0,addr=0.0" - " -device pci-testdev,bus=br1", &data); + " -device pci-testdev,bus=br1" + " -device pcie-root-port,id=rpnohp,chassis=8,addr=0xA.0,hotplug=off" + " -device pcie-root-port,id=rp3,chassis=9,bus=rpnohp" + , &data); /* hotplugged bridges section */ qtest_qmp_device_add(data.qts, "pci-bridge", "hpbr1", @@ -1049,7 +1083,6 @@ static void test_acpi_q35_multif_bridge(void) /* check that reboot/reset doesn't change any ACPI tables */ qtest_qmp_send(data.qts, "{'execute':'system_reset' }"); process_acpi_tables(&data); -out: free_test_data(&data); } @@ -1403,11 +1436,6 @@ static void test_acpi_tcg_dimm_pxm(const char *machine) { test_data data; - if (!qtest_has_device("nvdimm")) { - g_test_skip("Device nvdimm is not available"); - return; - } - memset(&data, 0, sizeof(data)); data.machine = machine; data.variant = ".dimmpxm"; @@ -1456,11 +1484,6 @@ static void test_acpi_virt_tcg_memhp(void) .scan_len = 256ULL * 1024 * 1024, }; - if (!qtest_has_device("nvdimm")) { - g_test_skip("Device nvdimm is not available"); - goto out; - } - data.variant = ".memhp"; test_acpi_one(" -machine nvdimm=on" " -cpu cortex-a57" @@ -1474,7 +1497,7 @@ static void test_acpi_virt_tcg_memhp(void) " -device pc-dimm,id=dimm0,memdev=ram2,node=0" " -device nvdimm,id=dimm1,memdev=nvm0,node=1", &data); -out: + free_test_data(&data); } @@ -1492,11 +1515,6 @@ static void test_acpi_microvm_tcg(void) { test_data data; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - return; - } - test_acpi_microvm_prepare(&data); test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=off", &data); @@ -1507,11 +1525,6 @@ static void test_acpi_microvm_usb_tcg(void) { test_data data; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - return; - } - test_acpi_microvm_prepare(&data); data.variant = ".usb"; test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,usb=on,rtc=off", @@ -1523,11 +1536,6 @@ static void test_acpi_microvm_rtc_tcg(void) { test_data data; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - return; - } - test_acpi_microvm_prepare(&data); data.variant = ".rtc"; test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=on", @@ -1539,11 +1547,6 @@ static void test_acpi_microvm_pcie_tcg(void) { test_data data; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - return; - } - test_acpi_microvm_prepare(&data); data.variant = ".pcie"; data.tcg_only = true; /* need constant host-phys-bits */ @@ -1556,11 +1559,6 @@ static void test_acpi_microvm_ioapic2_tcg(void) { test_data data; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - return; - } - test_acpi_microvm_prepare(&data); data.variant = ".ioapic2"; test_acpi_one(" -machine microvm,acpi=on,ioapic2=on,rtc=off", @@ -1600,12 +1598,6 @@ static void test_acpi_virt_tcg_pxb(void) .ram_start = 0x40000000ULL, .scan_len = 128ULL * 1024 * 1024, }; - - if (!qtest_has_device("pcie-root-port")) { - g_test_skip("Device pcie-root-port is not available"); - goto out; - } - /* * While using -cdrom, the cdrom would auto plugged into pxb-pcie, * the reason is the bus of pxb-pcie is also root bus, it would lead @@ -1624,7 +1616,7 @@ static void test_acpi_virt_tcg_pxb(void) " -cpu cortex-a57" " -device pxb-pcie,bus_nr=128", &data); -out: + free_test_data(&data); } @@ -1812,12 +1804,6 @@ static void test_acpi_microvm_acpi_erst(void) gchar *params; test_data data; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - g_free(tmp_path); - return; - } - test_acpi_microvm_prepare(&data); data.variant = ".pcie"; data.tcg_only = true; /* need constant host-phys-bits */ @@ -1878,11 +1864,6 @@ static void test_acpi_q35_viot(void) .variant = ".viot", }; - if (!qtest_has_device("virtio-iommu")) { - g_test_skip("Device virtio-iommu is not available"); - goto out; - } - /* * To keep things interesting, two buses bypass the IOMMU. * VIOT should only describes the other two buses. @@ -1893,7 +1874,6 @@ static void test_acpi_q35_viot(void) "-device pxb-pcie,bus_nr=0x20,id=pcie.200,bus=pcie.0,bypass_iommu=on " "-device pxb-pcie,bus_nr=0x30,id=pcie.300,bus=pcie.0", &data); -out: free_test_data(&data); } @@ -1954,10 +1934,8 @@ static void test_acpi_virt_viot(void) .scan_len = 128ULL * 1024 * 1024, }; - if (qtest_has_device("virtio-iommu")) { - test_acpi_one("-cpu cortex-a57 " - "-device virtio-iommu-pci", &data); - } + test_acpi_one("-cpu cortex-a57 " + "-device virtio-iommu-pci", &data); free_test_data(&data); } @@ -2066,11 +2044,6 @@ static void test_acpi_microvm_oem_fields(void) test_data data; char *args; - if (!qtest_has_device("virtio-blk-device")) { - g_test_skip("Device virtio-blk-device is not available"); - return; - } - test_acpi_microvm_prepare(&data); args = test_acpi_create_args(&data, @@ -2161,6 +2134,8 @@ int main(int argc, char *argv[]) test_acpi_q35_tcg_tpm12_tis); } qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge); + qtest_add_func("acpi/q35/no-acpi-hotplug", + test_acpi_q35_tcg_no_acpi_hotplug); qtest_add_func("acpi/q35/multif-bridge", test_acpi_q35_multif_bridge); qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64); diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 18e6a5969e..0d82dfa76e 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -1,5 +1,5 @@ ## -## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -45,6 +45,10 @@ HEX_TESTS += fpstuff HEX_TESTS += overflow HEX_TESTS += signal_context HEX_TESTS += reg_mut +HEX_TESTS += vector_add_int +HEX_TESTS += scatter_gather +HEX_TESTS += hvx_misc +HEX_TESTS += hvx_histogram HEX_TESTS += test_abs HEX_TESTS += test_bitcnt @@ -78,3 +82,10 @@ TESTS += $(HEX_TESTS) usr: usr.c $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS) +scatter_gather: CFLAGS += -mhvx +vector_add_int: CFLAGS += -mhvx -fvectorize +hvx_misc: CFLAGS += -mhvx +hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant + +hvx_histogram: hvx_histogram.c hvx_histogram_row.S + $(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 56bf562a40..90ce9a6ef3 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2020-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +40,7 @@ const int SF_HEX_NAN = 0xffffffff; const int SF_small_neg = 0xab98fba8; const int SF_denorm = 0x00000001; const int SF_random = 0x346001d6; +const int SF_neg_zero = 0x80000000; const long long DF_QNaN = 0x7ff8000000000000ULL; const long long DF_SNaN = 0x7ff7000000000000ULL; @@ -536,6 +537,33 @@ static void check_sffixupd(void) check32(result, 0x146001d6); } +static void check_sffms(void) +{ + int result; + + /* Check that sffms properly deals with -0 */ + result = SF_neg_zero; + asm ("%0 -= sfmpy(%1 , %2)\n\t" + : "+r"(result) + : "r"(SF_ZERO), "r"(SF_ZERO) + : "r12", "r8"); + check32(result, SF_neg_zero); + + result = SF_ZERO; + asm ("%0 -= sfmpy(%1 , %2)\n\t" + : "+r"(result) + : "r"(SF_neg_zero), "r"(SF_ZERO) + : "r12", "r8"); + check32(result, SF_ZERO); + + result = SF_ZERO; + asm ("%0 -= sfmpy(%1 , %2)\n\t" + : "+r"(result) + : "r"(SF_ZERO), "r"(SF_neg_zero) + : "r12", "r8"); + check32(result, SF_ZERO); +} + static void check_float2int_convs() { int res32; @@ -688,6 +716,7 @@ int main() check_invsqrta(); check_sffixupn(); check_sffixupd(); + check_sffms(); check_float2int_convs(); puts(err ? "FAIL" : "PASS"); diff --git a/tests/tcg/hexagon/preg_alias.c b/tests/tcg/hexagon/preg_alias.c index b44a8112b4..8798fbcaf3 100644 --- a/tests/tcg/hexagon/preg_alias.c +++ b/tests/tcg/hexagon/preg_alias.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -65,7 +65,7 @@ static inline void creg_alias(int cval, PRegs *pregs) : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3) : "r"(cval) - : "p0", "p1", "p2", "p3"); + : "c4", "p0", "p1", "p2", "p3"); } int err; @@ -92,7 +92,7 @@ static inline void creg_alias_pair(unsigned int cval, PRegs *pregs) : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5) : "r"(cval_pair) - : "p0", "p1", "p2", "p3"); + : "c4", "c5", "p0", "p1", "p2", "p3"); check(c5, 0xdeadbeef); } @@ -117,7 +117,7 @@ static void test_packet(void) "}\n\t" : "+r"(result) : "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653) - : "p0", "p1", "p2", "p3"); + : "c4", "p0", "p1", "p2", "p3"); check(result, old_val); /* Test a predicated store */ @@ -129,7 +129,7 @@ static void test_packet(void) "}\n\t" : : "r"(0), "r"(0xffffffff), "r"(&result) - : "p0", "p1", "p2", "p3", "memory"); + : "c4", "p0", "p1", "p2", "p3", "memory"); check(result, 0x0); } diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c index b93eb18133..bf8b5e0317 100644 --- a/tests/tcg/hexagon/scatter_gather.c +++ b/tests/tcg/hexagon/scatter_gather.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,47 +40,6 @@ typedef long HVX_VectorPair __attribute__((__vector_size__(256))) typedef long HVX_VectorPred __attribute__((__vector_size__(128))) __attribute__((aligned(128))); -#define VSCATTER_16(BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS) -#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS) -#define VSCATTER_32(BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS) -#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS) -#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS) -#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS) -#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS) -#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS) -#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \ - __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS) - -#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \ - __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF) -#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ - __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) -#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \ - __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF) -#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ - __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) -#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \ - __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF) -#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ - __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) - -#define VSHUFF_H(V) \ - __builtin_HEXAGON_V6_vshuffh_128B(V) -#define VSPLAT_H(X) \ - __builtin_HEXAGON_V6_lvsplath_128B(X) -#define VAND_VAL(PRED, VAL) \ - __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL) -#define VDEAL_H(V) \ - __builtin_HEXAGON_V6_vdealh_128B(V) - int err; /* define the number of rows/cols in a square matrix */ @@ -108,22 +67,22 @@ unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE]; unsigned short vgather16_32_ref[MATRIX_SIZE]; /* declare the arrays of offsets */ -unsigned short half_offsets[MATRIX_SIZE]; -unsigned int word_offsets[MATRIX_SIZE]; +unsigned short half_offsets[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned int word_offsets[MATRIX_SIZE] __attribute__((aligned(128))); /* declare the arrays of values */ -unsigned short half_values[MATRIX_SIZE]; -unsigned short half_values_acc[MATRIX_SIZE]; -unsigned short half_values_masked[MATRIX_SIZE]; -unsigned int word_values[MATRIX_SIZE]; -unsigned int word_values_acc[MATRIX_SIZE]; -unsigned int word_values_masked[MATRIX_SIZE]; +unsigned short half_values[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned short half_values_acc[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned short half_values_masked[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned int word_values[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned int word_values_acc[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned int word_values_masked[MATRIX_SIZE] __attribute__((aligned(128))); /* declare the arrays of predicates */ -unsigned short half_predicates[MATRIX_SIZE]; -unsigned int word_predicates[MATRIX_SIZE]; +unsigned short half_predicates[MATRIX_SIZE] __attribute__((aligned(128))); +unsigned int word_predicates[MATRIX_SIZE] __attribute__((aligned(128))); -/* make this big enough for all the intrinsics */ +/* make this big enough for all the operations */ const size_t region_len = sizeof(vtcm); /* optionally add sync instructions */ @@ -261,164 +220,201 @@ void create_offsets_values_preds_16_32(void) } } -/* scatter the 16 bit elements using intrinsics */ +/* scatter the 16 bit elements using HVX */ void vector_scatter_16(void) { - /* copy the offsets and values to vectors */ - HVX_Vector offsets = *(HVX_Vector *)half_offsets; - HVX_Vector values = *(HVX_Vector *)half_values; - - VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%3 + #0)\n\t" + "vscatter(%0, m0, v0.h).h = v1\n\t" + : : "r"(vtcm.vscatter16), "r"(region_len), + "r"(half_offsets), "r"(half_values) + : "m0", "v0", "v1", "memory"); sync_scatter(vtcm.vscatter16); } -/* scatter-accumulate the 16 bit elements using intrinsics */ +/* scatter-accumulate the 16 bit elements using HVX */ void vector_scatter_16_acc(void) { - /* copy the offsets and values to vectors */ - HVX_Vector offsets = *(HVX_Vector *)half_offsets; - HVX_Vector values = *(HVX_Vector *)half_values_acc; - - VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%3 + #0)\n\t" + "vscatter(%0, m0, v0.h).h += v1\n\t" + : : "r"(vtcm.vscatter16), "r"(region_len), + "r"(half_offsets), "r"(half_values_acc) + : "m0", "v0", "v1", "memory"); sync_scatter(vtcm.vscatter16); } -/* scatter the 16 bit elements using intrinsics */ +/* masked scatter the 16 bit elements using HVX */ void vector_scatter_16_masked(void) { - /* copy the offsets and values to vectors */ - HVX_Vector offsets = *(HVX_Vector *)half_offsets; - HVX_Vector values = *(HVX_Vector *)half_values_masked; - HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; - HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); - - VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values); + asm ("r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "v1 = vmem(%4 + #0)\n\t" + "if (q0) vscatter(%1, m0, v0.h).h = v1\n\t" + : : "r"(half_predicates), "r"(vtcm.vscatter16), "r"(region_len), + "r"(half_offsets), "r"(half_values_masked) + : "r1", "q0", "m0", "q0", "v0", "v1", "memory"); sync_scatter(vtcm.vscatter16); } -/* scatter the 32 bit elements using intrinsics */ +/* scatter the 32 bit elements using HVX */ void vector_scatter_32(void) { - /* copy the offsets and values to vectors */ - HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; - HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; - HVX_Vector valueslo = *(HVX_Vector *)word_values; - HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2]; - - VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo); - VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi); + HVX_Vector *offsetslo = (HVX_Vector *)word_offsets; + HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector *valueslo = (HVX_Vector *)word_values; + HVX_Vector *valueshi = (HVX_Vector *)&word_values[MATRIX_SIZE / 2]; + + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%3 + #0)\n\t" + "vscatter(%0, m0, v0.w).w = v1\n\t" + : : "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetslo), "r"(valueslo) + : "m0", "v0", "v1", "memory"); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%3 + #0)\n\t" + "vscatter(%0, m0, v0.w).w = v1\n\t" + : : "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetshi), "r"(valueshi) + : "m0", "v0", "v1", "memory"); sync_scatter(vtcm.vscatter32); } -/* scatter-acc the 32 bit elements using intrinsics */ +/* scatter-accumulate the 32 bit elements using HVX */ void vector_scatter_32_acc(void) { - /* copy the offsets and values to vectors */ - HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; - HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; - HVX_Vector valueslo = *(HVX_Vector *)word_values_acc; - HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2]; - - VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo); - VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi); + HVX_Vector *offsetslo = (HVX_Vector *)word_offsets; + HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector *valueslo = (HVX_Vector *)word_values_acc; + HVX_Vector *valueshi = (HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2]; + + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%3 + #0)\n\t" + "vscatter(%0, m0, v0.w).w += v1\n\t" + : : "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetslo), "r"(valueslo) + : "m0", "v0", "v1", "memory"); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%3 + #0)\n\t" + "vscatter(%0, m0, v0.w).w += v1\n\t" + : : "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetshi), "r"(valueshi) + : "m0", "v0", "v1", "memory"); sync_scatter(vtcm.vscatter32); } -/* scatter the 32 bit elements using intrinsics */ +/* masked scatter the 32 bit elements using HVX */ void vector_scatter_32_masked(void) { - /* copy the offsets and values to vectors */ - HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; - HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; - HVX_Vector valueslo = *(HVX_Vector *)word_values_masked; - HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2]; - HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; - HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; - HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); - HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); - - VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo, - valueslo); - VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi, - valueshi); + HVX_Vector *offsetslo = (HVX_Vector *)word_offsets; + HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector *valueslo = (HVX_Vector *)word_values_masked; + HVX_Vector *valueshi = (HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2]; + HVX_Vector *predslo = (HVX_Vector *)word_predicates; + HVX_Vector *predshi = (HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + + asm ("r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "v1 = vmem(%4 + #0)\n\t" + "if (q0) vscatter(%1, m0, v0.w).w = v1\n\t" + : : "r"(predslo), "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetslo), "r"(valueslo) + : "r1", "q0", "m0", "q0", "v0", "v1", "memory"); + asm ("r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "v1 = vmem(%4 + #0)\n\t" + "if (q0) vscatter(%1, m0, v0.w).w = v1\n\t" + : : "r"(predshi), "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetshi), "r"(valueshi) + : "r1", "q0", "m0", "q0", "v0", "v1", "memory"); - sync_scatter(vtcm.vscatter16); + sync_scatter(vtcm.vscatter32); } -/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ +/* scatter the 16 bit elements with 32 bit offsets using HVX */ void vector_scatter_16_32(void) { - HVX_VectorPair offsets; - HVX_Vector values; - - /* get the word offsets in a vector pair */ - offsets = *(HVX_VectorPair *)word_offsets; - - /* these values need to be shuffled for the scatter */ - values = *(HVX_Vector *)half_values; - values = VSHUFF_H(values); - - VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%2 + #1)\n\t" + "v2 = vmem(%3 + #0)\n\t" + "v2.h = vshuff(v2.h)\n\t" /* shuffle the values for the scatter */ + "vscatter(%0, m0, v1:0.w).h = v2\n\t" + : : "r"(vtcm.vscatter16_32), "r"(region_len), + "r"(word_offsets), "r"(half_values) + : "m0", "v0", "v1", "v2", "memory"); sync_scatter(vtcm.vscatter16_32); } -/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +/* scatter-accumulate the 16 bit elements with 32 bit offsets using HVX */ void vector_scatter_16_32_acc(void) { - HVX_VectorPair offsets; - HVX_Vector values; - - /* get the word offsets in a vector pair */ - offsets = *(HVX_VectorPair *)word_offsets; - - /* these values need to be shuffled for the scatter */ - values = *(HVX_Vector *)half_values_acc; - values = VSHUFF_H(values); - - VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%2 + #1)\n\t" + "v2 = vmem(%3 + #0)\n\t" \ + "v2.h = vshuff(v2.h)\n\t" /* shuffle the values for the scatter */ + "vscatter(%0, m0, v1:0.w).h += v2\n\t" + : : "r"(vtcm.vscatter16_32), "r"(region_len), + "r"(word_offsets), "r"(half_values_acc) + : "m0", "v0", "v1", "v2", "memory"); sync_scatter(vtcm.vscatter16_32); } -/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */ +/* masked scatter the 16 bit elements with 32 bit offsets using HVX */ void vector_scatter_16_32_masked(void) { - HVX_VectorPair offsets; - HVX_Vector values; - HVX_Vector pred_reg; - - /* get the word offsets in a vector pair */ - offsets = *(HVX_VectorPair *)word_offsets; - - /* these values need to be shuffled for the scatter */ - values = *(HVX_Vector *)half_values_masked; - values = VSHUFF_H(values); - - pred_reg = *(HVX_Vector *)half_predicates; - pred_reg = VSHUFF_H(pred_reg); - HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); - - VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets, - values); + asm ("r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "v0.h = vshuff(v0.h)\n\t" /* shuffle the predicates */ + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "v1 = vmem(%3 + #1)\n\t" + "v2 = vmem(%4 + #0)\n\t" \ + "v2.h = vshuff(v2.h)\n\t" /* shuffle the values for the scatter */ + "if (q0) vscatter(%1, m0, v1:0.w).h = v2\n\t" + : : "r"(half_predicates), "r"(vtcm.vscatter16_32), "r"(region_len), + "r"(word_offsets), "r"(half_values_masked) + : "r1", "q0", "m0", "v0", "v1", "v2", "memory"); sync_scatter(vtcm.vscatter16_32); } -/* gather the elements from the scatter16 buffer */ +/* gather the elements from the scatter16 buffer using HVX */ void vector_gather_16(void) { - HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; - HVX_Vector offsets = *(HVX_Vector *)half_offsets; - - VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets); - - sync_gather(vgather); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "{ vtmp.h = vgather(%0, m0, v0.h).h\n\t" + " vmem(%3 + #0) = vtmp.new }\n\t" + : : "r"(vtcm.vscatter16), "r"(region_len), + "r"(half_offsets), "r"(vtcm.vgather16) + : "m0", "v0", "memory"); + + sync_gather(vtcm.vgather16); } static unsigned short gather_16_masked_init(void) @@ -427,31 +423,51 @@ static unsigned short gather_16_masked_init(void) return letter | (letter << 8); } +/* masked gather the elements from the scatter16 buffer using HVX */ void vector_gather_16_masked(void) { - HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; - HVX_Vector offsets = *(HVX_Vector *)half_offsets; - HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; - HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); - - *vgather = VSPLAT_H(gather_16_masked_init()); - VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets); - - sync_gather(vgather); + unsigned short init = gather_16_masked_init(); + + asm ("v0.h = vsplat(%5)\n\t" + "vmem(%4 + #0) = v0\n\t" /* initialize the write area */ + "r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "{ if (q0) vtmp.h = vgather(%1, m0, v0.h).h\n\t" + " vmem(%4 + #0) = vtmp.new }\n\t" + : : "r"(half_predicates), "r"(vtcm.vscatter16), "r"(region_len), + "r"(half_offsets), "r"(vtcm.vgather16), "r"(init) + : "r1", "q0", "m0", "v0", "memory"); + + sync_gather(vtcm.vgather16); } -/* gather the elements from the scatter32 buffer */ +/* gather the elements from the scatter32 buffer using HVX */ void vector_gather_32(void) { - HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; - HVX_Vector *vgatherhi = - (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); - HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; - HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; - - VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo); - VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi); + HVX_Vector *vgatherlo = (HVX_Vector *)vtcm.vgather32; + HVX_Vector *vgatherhi = (HVX_Vector *)&vtcm.vgather32[MATRIX_SIZE / 2]; + HVX_Vector *offsetslo = (HVX_Vector *)word_offsets; + HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "{ vtmp.w = vgather(%0, m0, v0.w).w\n\t" + " vmem(%3 + #0) = vtmp.new }\n\t" + : : "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetslo), "r"(vgatherlo) + : "m0", "v0", "memory"); + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "{ vtmp.w = vgather(%0, m0, v0.w).w\n\t" + " vmem(%3 + #0) = vtmp.new }\n\t" + : : "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetshi), "r"(vgatherhi) + : "m0", "v0", "memory"); + sync_gather(vgatherlo); sync_gather(vgatherhi); } @@ -461,79 +477,88 @@ static unsigned int gather_32_masked_init(void) return letter | (letter << 8) | (letter << 16) | (letter << 24); } +/* masked gather the elements from the scatter32 buffer using HVX */ void vector_gather_32_masked(void) { - HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; - HVX_Vector *vgatherhi = - (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); - HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; - HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; - HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; - HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); - HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; - HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); - - *vgatherlo = VSPLAT_H(gather_32_masked_init()); - *vgatherhi = VSPLAT_H(gather_32_masked_init()); - VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len, - offsetslo); - VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len, - offsetshi); + unsigned int init = gather_32_masked_init(); + HVX_Vector *vgatherlo = (HVX_Vector *)vtcm.vgather32; + HVX_Vector *vgatherhi = (HVX_Vector *)&vtcm.vgather32[MATRIX_SIZE / 2]; + HVX_Vector *offsetslo = (HVX_Vector *)word_offsets; + HVX_Vector *offsetshi = (HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector *predslo = (HVX_Vector *)word_predicates; + HVX_Vector *predshi = (HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + + asm ("v0.h = vsplat(%5)\n\t" + "vmem(%4 + #0) = v0\n\t" /* initialize the write area */ + "r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "{ if (q0) vtmp.w = vgather(%1, m0, v0.w).w\n\t" + " vmem(%4 + #0) = vtmp.new }\n\t" + : : "r"(predslo), "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetslo), "r"(vgatherlo), "r"(init) + : "r1", "q0", "m0", "v0", "memory"); + asm ("v0.h = vsplat(%5)\n\t" + "vmem(%4 + #0) = v0\n\t" /* initialize the write area */ + "r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "{ if (q0) vtmp.w = vgather(%1, m0, v0.w).w\n\t" + " vmem(%4 + #0) = vtmp.new }\n\t" + : : "r"(predshi), "r"(vtcm.vscatter32), "r"(region_len), + "r"(offsetshi), "r"(vgatherhi), "r"(init) + : "r1", "q0", "m0", "v0", "memory"); sync_gather(vgatherlo); sync_gather(vgatherhi); } -/* gather the elements from the scatter16_32 buffer */ +/* gather the elements from the scatter16_32 buffer using HVX */ void vector_gather_16_32(void) { - HVX_Vector *vgather; - HVX_VectorPair offsets; - HVX_Vector values; - - /* get the vtcm address to gather from */ - vgather = (HVX_Vector *)&vtcm.vgather16_32; - - /* get the word offsets in a vector pair */ - offsets = *(HVX_VectorPair *)word_offsets; - - VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets); - - /* deal the elements to get the order back */ - values = *(HVX_Vector *)vgather; - values = VDEAL_H(values); - - /* write it back to vtcm address */ - *(HVX_Vector *)vgather = values; + asm ("m0 = %1\n\t" + "v0 = vmem(%2 + #0)\n\t" + "v1 = vmem(%2 + #1)\n\t" + "{ vtmp.h = vgather(%0, m0, v1:0.w).h\n\t" + " vmem(%3 + #0) = vtmp.new }\n\t" + "v0 = vmem(%3 + #0)\n\t" + "v0.h = vdeal(v0.h)\n\t" /* deal the elements to get the order back */ + "vmem(%3 + #0) = v0\n\t" + : : "r"(vtcm.vscatter16_32), "r"(region_len), + "r"(word_offsets), "r"(vtcm.vgather16_32) + : "m0", "v0", "v1", "memory"); + + sync_gather(vtcm.vgather16_32); } +/* masked gather the elements from the scatter16_32 buffer using HVX */ void vector_gather_16_32_masked(void) { - HVX_Vector *vgather; - HVX_VectorPair offsets; - HVX_Vector pred_reg; - HVX_VectorPred preds; - HVX_Vector values; - - /* get the vtcm address to gather from */ - vgather = (HVX_Vector *)&vtcm.vgather16_32; - - /* get the word offsets in a vector pair */ - offsets = *(HVX_VectorPair *)word_offsets; - pred_reg = *(HVX_Vector *)half_predicates; - pred_reg = VSHUFF_H(pred_reg); - preds = VAND_VAL(pred_reg, ~0); - - *vgather = VSPLAT_H(gather_16_masked_init()); - VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len, - offsets); - - /* deal the elements to get the order back */ - values = *(HVX_Vector *)vgather; - values = VDEAL_H(values); - - /* write it back to vtcm address */ - *(HVX_Vector *)vgather = values; + unsigned short init = gather_16_masked_init(); + + asm ("v0.h = vsplat(%5)\n\t" + "vmem(%4 + #0) = v0\n\t" /* initialize the write area */ + "r1 = #-1\n\t" + "v0 = vmem(%0 + #0)\n\t" + "v0.h = vshuff(v0.h)\n\t" /* shuffle the predicates */ + "q0 = vand(v0, r1)\n\t" + "m0 = %2\n\t" + "v0 = vmem(%3 + #0)\n\t" + "v1 = vmem(%3 + #1)\n\t" + "{ if (q0) vtmp.h = vgather(%1, m0, v1:0.w).h\n\t" + " vmem(%4 + #0) = vtmp.new }\n\t" + "v0 = vmem(%4 + #0)\n\t" + "v0.h = vdeal(v0.h)\n\t" /* deal the elements to get the order back */ + "vmem(%4 + #0) = v0\n\t" + : : "r"(half_predicates), "r"(vtcm.vscatter16_32), "r"(region_len), + "r"(word_offsets), "r"(vtcm.vgather16_32), "r"(init) + : "r1", "q0", "m0", "v0", "v1", "memory"); + + sync_gather(vtcm.vgather16_32); } static void check_buffer(const char *name, void *c, void *r, size_t size) @@ -579,6 +604,7 @@ void scalar_scatter_16_acc(unsigned short *vscatter16) } } +/* scatter-accumulate the 16 bit elements using C */ void check_scatter_16_acc() { memset(vscatter16_ref, FILL_CHAR, @@ -589,7 +615,7 @@ void check_scatter_16_acc() SCATTER_BUFFER_SIZE * sizeof(unsigned short)); } -/* scatter the 16 bit elements using C */ +/* masked scatter the 16 bit elements using C */ void scalar_scatter_16_masked(unsigned short *vscatter16) { for (int i = 0; i < MATRIX_SIZE; i++) { @@ -628,7 +654,7 @@ void check_scatter_32() SCATTER_BUFFER_SIZE * sizeof(unsigned int)); } -/* scatter the 32 bit elements using C */ +/* scatter-accumulate the 32 bit elements using C */ void scalar_scatter_32_acc(unsigned int *vscatter32) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -646,7 +672,7 @@ void check_scatter_32_acc() SCATTER_BUFFER_SIZE * sizeof(unsigned int)); } -/* scatter the 32 bit elements using C */ +/* masked scatter the 32 bit elements using C */ void scalar_scatter_32_masked(unsigned int *vscatter32) { for (int i = 0; i < MATRIX_SIZE; i++) { @@ -667,7 +693,7 @@ void check_scatter_32_masked() SCATTER_BUFFER_SIZE * sizeof(unsigned int)); } -/* scatter the 32 bit elements using C */ +/* scatter the 16 bit elements with 32 bit offsets using C */ void scalar_scatter_16_32(unsigned short *vscatter16_32) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -684,7 +710,7 @@ void check_scatter_16_32() SCATTER_BUFFER_SIZE * sizeof(unsigned short)); } -/* scatter the 32 bit elements using C */ +/* scatter-accumulate the 16 bit elements with 32 bit offsets using C */ void scalar_scatter_16_32_acc(unsigned short *vscatter16_32) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -702,6 +728,7 @@ void check_scatter_16_32_acc() SCATTER_BUFFER_SIZE * sizeof(unsigned short)); } +/* masked scatter the 16 bit elements with 32 bit offsets using C */ void scalar_scatter_16_32_masked(unsigned short *vscatter16_32) { for (int i = 0; i < MATRIX_SIZE; i++) { @@ -738,6 +765,7 @@ void check_gather_16() MATRIX_SIZE * sizeof(unsigned short)); } +/* masked gather the elements from the scatter buffer using C */ void scalar_gather_16_masked(unsigned short *vgather16) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -756,7 +784,7 @@ void check_gather_16_masked() MATRIX_SIZE * sizeof(unsigned short)); } -/* gather the elements from the scatter buffer using C */ +/* gather the elements from the scatter32 buffer using C */ void scalar_gather_32(unsigned int *vgather32) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -772,6 +800,7 @@ void check_gather_32(void) MATRIX_SIZE * sizeof(unsigned int)); } +/* masked gather the elements from the scatter32 buffer using C */ void scalar_gather_32_masked(unsigned int *vgather32) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -781,7 +810,6 @@ void scalar_gather_32_masked(unsigned int *vgather32) } } - void check_gather_32_masked(void) { memset(vgather32_ref, gather_32_masked_init(), @@ -791,7 +819,7 @@ void check_gather_32_masked(void) vgather32_ref, MATRIX_SIZE * sizeof(unsigned int)); } -/* gather the elements from the scatter buffer using C */ +/* gather the elements from the scatter16_32 buffer using C */ void scalar_gather_16_32(unsigned short *vgather16_32) { for (int i = 0; i < MATRIX_SIZE; ++i) { @@ -807,6 +835,7 @@ void check_gather_16_32(void) MATRIX_SIZE * sizeof(unsigned short)); } +/* masked gather the elements from the scatter16_32 buffer using C */ void scalar_gather_16_32_masked(unsigned short *vgather16_32) { for (int i = 0; i < MATRIX_SIZE; ++i) { |