diff options
59 files changed, 1437 insertions, 426 deletions
@@ -440,7 +440,10 @@ endif install: all $(if $(BUILD_DOCS),install-doc) \ install-datadir install-localstatedir ifneq ($(TOOLS),) - $(call install-prog,$(TOOLS),$(DESTDIR)$(bindir)) + $(call install-prog,$(filter-out qemu-ga,$(TOOLS)),$(DESTDIR)$(bindir)) +ifneq (,$(findstring qemu-ga,$(TOOLS))) + $(call install-prog,qemu-ga$(EXESUF),$(DESTDIR)$(bindir)) +endif endif ifneq ($(CONFIG_MODULES),) $(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)" @@ -1 +1 @@ -2.4.50 +2.4.90 diff --git a/aio-posix.c b/aio-posix.c index 06148a9ba3..482b316502 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -210,6 +210,7 @@ void aio_set_fd_handler(AioContext *ctx, { AioHandler *node; bool is_new = false; + bool deleted = false; node = find_aio_handler(ctx, fd); @@ -228,7 +229,7 @@ void aio_set_fd_handler(AioContext *ctx, * releasing the walking_handlers lock. */ QLIST_REMOVE(node, node); - g_free(node); + deleted = true; } } } else { @@ -253,6 +254,9 @@ void aio_set_fd_handler(AioContext *ctx, aio_epoll_update(ctx, node, is_new); aio_notify(ctx); + if (deleted) { + g_free(node); + } } void aio_set_event_notifier(AioContext *ctx, diff --git a/block/block-backend.c b/block/block-backend.c index 9889e813b6..36ccc9e616 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -642,8 +642,9 @@ static void error_callback_bh(void *opaque) qemu_aio_unref(acb); } -static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb, - void *opaque, int ret) +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret) { struct BlockBackendAIOCB *acb; QEMUBH *bh; @@ -665,7 +666,7 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags, @@ -725,7 +726,7 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -737,7 +738,7 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -747,7 +748,7 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { if (!blk_is_available(blk)) { - return abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } return bdrv_aio_flush(blk->bs, cb, opaque); @@ -759,7 +760,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque); @@ -802,7 +803,7 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { if (!blk_is_available(blk)) { - return abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque); diff --git a/block/snapshot.c b/block/snapshot.c index 89500f2f18..6e9fa8da98 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -253,9 +253,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return -ENOTSUP; } -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, - const char *id_or_name, - Error **errp) +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp) { int ret; Error *local_err = NULL; @@ -270,6 +270,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, if (ret < 0) { error_propagate(errp, local_err); } + return ret; } int bdrv_snapshot_list(BlockDriverState *bs, @@ -356,3 +357,130 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, return ret; } + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers) */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) +{ + bool ok = true; + BlockDriverState *bs = NULL; + + while (ok && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) { + ok = bdrv_can_snapshot(bs); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return ok; +} + +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **err) +{ + int ret = 0; + BlockDriverState *bs = NULL; + QEMUSnapshotInfo sn1, *snapshot = &sn1; + + while (ret == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs) && + bdrv_snapshot_find(bs, snapshot, name) >= 0) { + ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return ret; +} + + +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + err = bdrv_snapshot_goto(bs, name); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + QEMUSnapshotInfo sn; + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + err = bdrv_snapshot_find(bs, &sn, name); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + err = bdrv_snapshot_create(bs, sn); + } else if (bdrv_can_snapshot(bs)) { + sn->vm_state_size = 0; + err = bdrv_snapshot_create(bs, sn); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +BlockDriverState *bdrv_all_find_vmstate_bs(void) +{ + bool not_found = true; + BlockDriverState *bs = NULL; + + while (not_found && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + not_found = !bdrv_can_snapshot(bs); + aio_context_release(ctx); + } + return bs; +} diff --git a/blockdev.c b/blockdev.c index fc85128e94..313841b0b4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -300,6 +300,45 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) } } +static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals, + Error **errp) +{ + const QListEntry *entry; + for (entry = qlist_first(intervals); entry; entry = qlist_next(entry)) { + switch (qobject_type(entry->value)) { + + case QTYPE_QSTRING: { + unsigned long long length; + const char *str = qstring_get_str(qobject_to_qstring(entry->value)); + if (parse_uint_full(str, &length, 10) == 0 && + length > 0 && length <= UINT_MAX) { + block_acct_add_interval(stats, (unsigned) length); + } else { + error_setg(errp, "Invalid interval length: %s", str); + return false; + } + break; + } + + case QTYPE_QINT: { + int64_t length = qint_get_int(qobject_to_qint(entry->value)); + if (length > 0 && length <= UINT_MAX) { + block_acct_add_interval(stats, (unsigned) length); + } else { + error_setg(errp, "Invalid interval length: %" PRId64, length); + return false; + } + break; + } + + default: + error_setg(errp, "The specification of stats-intervals is invalid"); + return false; + } + } + return true; +} + static bool check_throttle_config(ThrottleConfig *cfg, Error **errp) { if (throttle_conflicting(cfg)) { @@ -442,13 +481,14 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, int bdrv_flags = 0; int on_read_error, on_write_error; bool account_invalid, account_failed; - const char *stats_intervals; BlockBackend *blk; BlockDriverState *bs; ThrottleConfig cfg; int snapshot = 0; Error *error = NULL; QemuOpts *opts; + QDict *interval_dict = NULL; + QList *interval_list = NULL; const char *id; bool has_driver_specific_opts; BlockdevDetectZeroesOptions detect_zeroes = @@ -482,7 +522,14 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true); account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true); - stats_intervals = qemu_opt_get(opts, "stats-intervals"); + qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals."); + qdict_array_split(interval_dict, &interval_list); + + if (qdict_size(interval_dict) != 0) { + error_setg(errp, "Invalid option stats-intervals.%s", + qdict_first(interval_dict)->key); + goto early_err; + } extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg, &detect_zeroes, &error); @@ -583,33 +630,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, block_acct_init(blk_get_stats(blk), account_invalid, account_failed); - if (stats_intervals) { - char **intervals = g_strsplit(stats_intervals, ":", 0); - unsigned i; - - if (*stats_intervals == '\0') { - error_setg(&error, "stats-intervals can't have an empty value"); - } - - for (i = 0; !error && intervals[i] != NULL; i++) { - unsigned long long val; - if (parse_uint_full(intervals[i], &val, 10) == 0 && - val > 0 && val <= UINT_MAX) { - block_acct_add_interval(blk_get_stats(blk), val); - } else { - error_setg(&error, "Invalid interval length: '%s'", - intervals[i]); - } - } - - g_strfreev(intervals); - - if (error) { - error_propagate(errp, error); - blk_unref(blk); - blk = NULL; - goto err_no_bs_opts; - } + if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) { + blk_unref(blk); + blk = NULL; + goto err_no_bs_opts; } } @@ -617,10 +641,14 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, err_no_bs_opts: qemu_opts_del(opts); + QDECREF(interval_dict); + QDECREF(interval_list); return blk; early_err: qemu_opts_del(opts); + QDECREF(interval_dict); + QDECREF(interval_list); err_no_opts: QDECREF(bs_opts); return NULL; @@ -2070,6 +2098,7 @@ static const BlkActionOps actions[] = { .prepare = external_snapshot_prepare, .commit = external_snapshot_commit, .abort = external_snapshot_abort, + .clean = external_snapshot_clean, }, [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { .instance_size = sizeof(ExternalSnapshotState), @@ -3143,6 +3172,7 @@ static void do_drive_backup(const char *device, const char *target, bmap = bdrv_find_dirty_bitmap(bs, bitmap); if (!bmap) { error_setg(errp, "Bitmap '%s' could not be found", bitmap); + bdrv_unref(target_bs); goto out; } } @@ -3948,11 +3978,6 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_BOOL, .help = "whether to account for failed I/O operations " "in the statistics", - },{ - .name = "stats-intervals", - .type = QEMU_OPT_STRING, - .help = "colon-separated list of intervals " - "for collecting I/O statistics, in seconds", }, { /* end of list */ } }, @@ -1888,16 +1888,34 @@ fi # libseccomp check if test "$seccomp" != "no" ; then - if test "$cpu" = "i386" || test "$cpu" = "x86_64" && - $pkg_config --atleast-version=2.1.1 libseccomp; then + case "$cpu" in + i386|x86_64) + libseccomp_minver="2.1.0" + ;; + arm|aarch64) + libseccomp_minver="2.2.3" + ;; + *) + libseccomp_minver="" + ;; + esac + + if test "$libseccomp_minver" != "" && + $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`" QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`" - seccomp="yes" + seccomp="yes" else - if test "$seccomp" = "yes"; then - feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.1" - fi - seccomp="no" + if test "$seccomp" = "yes" ; then + if test "$libseccomp_minver" != "" ; then + feature_not_found "libseccomp" \ + "Install libseccomp devel >= $libseccomp_minver" + else + feature_not_found "libseccomp" \ + "libseccomp is not supported for host cpu $cpu" + fi + fi + seccomp="no" fi fi ########################################## @@ -5663,6 +5681,7 @@ case "$target_name" in echo "CONFIG_KVM=y" >> $config_target_mak if test "$vhost_net" = "yes" ; then echo "CONFIG_VHOST_NET=y" >> $config_target_mak + echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak fi fi esac diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c index dc46bc40f7..d080deb83e 100644 --- a/crypto/tlscredsx509.c +++ b/crypto/tlscredsx509.c @@ -485,7 +485,8 @@ qcrypto_tls_creds_x509_sanity_check(QCryptoTLSCredsX509 *creds, int ret = -1; memset(cacerts, 0, sizeof(cacerts)); - if (access(certFile, R_OK) == 0) { + if (certFile && + access(certFile, R_OK) == 0) { cert = qcrypto_tls_creds_load_cert(creds, certFile, isServer, errp); @@ -654,6 +655,10 @@ qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds) gnutls_certificate_free_credentials(creds->data); creds->data = NULL; } + if (creds->parent_obj.dh_params) { + gnutls_dh_params_deinit(creds->parent_obj.dh_params); + creds->parent_obj.dh_params = NULL; + } } diff --git a/crypto/tlssession.c b/crypto/tlssession.c index ffc5c47949..373552942c 100644 --- a/crypto/tlssession.c +++ b/crypto/tlssession.c @@ -304,9 +304,9 @@ qcrypto_tls_session_check_certificate(QCryptoTLSSession *session, allow = qemu_acl_party_is_allowed(acl, session->peername); - error_setg(errp, "TLS x509 ACL check for %s is %s", - session->peername, allow ? "allowed" : "denied"); if (!allow) { + error_setg(errp, "TLS x509 ACL check for %s is denied", + session->peername); goto error; } } diff --git a/disas/arm.c b/disas/arm.c index 6165246539..7a7354b76a 100644 --- a/disas/arm.c +++ b/disas/arm.c @@ -1779,7 +1779,7 @@ print_insn_coprocessor (bfd_vma pc, struct disassemble_info *info, long given, /* Is ``imm'' a negative number? */ if (imm & 0x40) - imm |= (-1 << 7); + imm |= (~0u << 7); func (stream, "%d", imm); } diff --git a/docs/bitmaps.md b/docs/bitmaps.md index 9fd8ea65ea..a2e8d51163 100644 --- a/docs/bitmaps.md +++ b/docs/bitmaps.md @@ -19,12 +19,20 @@ which is included at the end of this document. * A dirty bitmap's name is unique to the node, but bitmaps attached to different nodes can share the same name. +* Dirty bitmaps created for internal use by QEMU may be anonymous and have no + name, but any user-created bitmaps may not be. There can be any number of + anonymous bitmaps per node. + +* The name of a user-created bitmap must not be empty (""). + ## Bitmap Modes * A Bitmap can be "frozen," which means that it is currently in-use by a backup operation and cannot be deleted, renamed, written to, reset, etc. +* The normal operating mode for a bitmap is "active." + ## Basic QMP Usage ### Supported Commands ### @@ -319,6 +327,155 @@ full backup as a backing image. "event": "BLOCK_JOB_COMPLETED" } ``` +### Partial Transactional Failures + +* Sometimes, a transaction will succeed in launching and return success, + but then later the backup jobs themselves may fail. It is possible that + a management application may have to deal with a partial backup failure + after a successful transaction. + +* If multiple backup jobs are specified in a single transaction, when one of + them fails, it will not interact with the other backup jobs in any way. + +* The job(s) that succeeded will clear the dirty bitmap associated with the + operation, but the job(s) that failed will not. It is not "safe" to delete + any incremental backups that were created successfully in this scenario, + even though others failed. + +#### Example + +* QMP example highlighting two backup jobs: + + ```json + { "execute": "transaction", + "arguments": { + "actions": [ + { "type": "drive-backup", + "data": { "device": "drive0", "bitmap": "bitmap0", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d0-incr-1.qcow2" } }, + { "type": "drive-backup", + "data": { "device": "drive1", "bitmap": "bitmap1", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d1-incr-1.qcow2" } }, + ] + } + } + ``` + +* QMP example response, highlighting one success and one failure: + * Acknowledgement that the Transaction was accepted and jobs were launched: + ```json + { "return": {} } + ``` + + * Later, QEMU sends notice that the first job was completed: + ```json + { "timestamp": { "seconds": 1447192343, "microseconds": 615698 }, + "data": { "device": "drive0", "type": "backup", + "speed": 0, "len": 67108864, "offset": 67108864 }, + "event": "BLOCK_JOB_COMPLETED" + } + ``` + + * Later yet, QEMU sends notice that the second job has failed: + ```json + { "timestamp": { "seconds": 1447192399, "microseconds": 683015 }, + "data": { "device": "drive1", "action": "report", + "operation": "read" }, + "event": "BLOCK_JOB_ERROR" } + ``` + + ```json + { "timestamp": { "seconds": 1447192399, "microseconds": 685853 }, + "data": { "speed": 0, "offset": 0, "len": 67108864, + "error": "Input/output error", + "device": "drive1", "type": "backup" }, + "event": "BLOCK_JOB_COMPLETED" } + +* In the above example, "d0-incr-1.qcow2" is valid and must be kept, + but "d1-incr-1.qcow2" is invalid and should be deleted. If a VM-wide + incremental backup of all drives at a point-in-time is to be made, + new backups for both drives will need to be made, taking into account + that a new incremental backup for drive0 needs to be based on top of + "d0-incr-1.qcow2." + +### Grouped Completion Mode + +* While jobs launched by transactions normally complete or fail on their own, + it is possible to instruct them to complete or fail together as a group. + +* QMP transactions take an optional properties structure that can affect + the semantics of the transaction. + +* The "completion-mode" transaction property can be either "individual" + which is the default, legacy behavior described above, or "grouped," + a new behavior detailed below. + +* Delayed Completion: In grouped completion mode, no jobs will report + success until all jobs are ready to report success. + +* Grouped failure: If any job fails in grouped completion mode, all remaining + jobs will be cancelled. Any incremental backups will restore their dirty + bitmap objects as if no backup command was ever issued. + + * Regardless of if QEMU reports a particular incremental backup job as + CANCELLED or as an ERROR, the in-memory bitmap will be restored. + +#### Example + +* Here's the same example scenario from above with the new property: + + ```json + { "execute": "transaction", + "arguments": { + "actions": [ + { "type": "drive-backup", + "data": { "device": "drive0", "bitmap": "bitmap0", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d0-incr-1.qcow2" } }, + { "type": "drive-backup", + "data": { "device": "drive1", "bitmap": "bitmap1", + "format": "qcow2", "mode": "existing", + "sync": "incremental", "target": "d1-incr-1.qcow2" } }, + ], + "properties": { + "completion-mode": "grouped" + } + } + } + ``` + +* QMP example response, highlighting a failure for drive2: + * Acknowledgement that the Transaction was accepted and jobs were launched: + ```json + { "return": {} } + ``` + + * Later, QEMU sends notice that the second job has errored out, + but that the first job was also cancelled: + ```json + { "timestamp": { "seconds": 1447193702, "microseconds": 632377 }, + "data": { "device": "drive1", "action": "report", + "operation": "read" }, + "event": "BLOCK_JOB_ERROR" } + ``` + + ```json + { "timestamp": { "seconds": 1447193702, "microseconds": 640074 }, + "data": { "speed": 0, "offset": 0, "len": 67108864, + "error": "Input/output error", + "device": "drive1", "type": "backup" }, + "event": "BLOCK_JOB_COMPLETED" } + ``` + + ```json + { "timestamp": { "seconds": 1447193702, "microseconds": 640163 }, + "data": { "device": "drive0", "type": "backup", "speed": 0, + "len": 67108864, "offset": 16777216 }, + "event": "BLOCK_JOB_CANCELLED" } + ``` + <!-- The FreeBSD Documentation License diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt index f9fa6f3d96..ceb9a782d0 100644 --- a/docs/qapi-code-gen.txt +++ b/docs/qapi-code-gen.txt @@ -514,6 +514,17 @@ exactly the server (QEMU) supports. For this purpose, QMP provides introspection via command query-qmp-schema. QGA currently doesn't support introspection. +While Client JSON Protocol wire compatibility should be maintained +between qemu versions, we cannot make the same guarantees for +introspection stability. For example, one version of qemu may provide +a non-variant optional member of a struct, and a later version rework +the member to instead be non-optional and associated with a variant. +Likewise, one version of qemu may list a member with open-ended type +'str', and a later version could convert it to a finite set of strings +via an enum type; or a member may be converted from a specific type to +an alternate that represents a choice between the original type and +something else. + query-qmp-schema returns a JSON array of SchemaInfo objects. These objects together describe the wire ABI, as defined in the QAPI schema. There is no specified order to the SchemaInfo objects returned; a diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt index 26dde2ec42..7b9cd6d0dd 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/specs/vhost-user.txt @@ -87,6 +87,14 @@ Depending on the request type, payload can be: User address: a 64-bit user address mmap offset: 64-bit offset where region starts in the mapped memory +* Log description + --------------------------- + | log size | log offset | + --------------------------- + log size: size of area used for logging + log offset: offset from start of supplied file descriptor + where logging starts (i.e. where guest address 0 would be logged) + In QEMU the vhost-user message is implemented with the following struct: typedef struct VhostUserMsg { @@ -138,6 +146,29 @@ As older slaves don't support negotiating protocol features, a feature bit was dedicated for this purpose: #define VHOST_USER_F_PROTOCOL_FEATURES 30 +Starting and stopping rings +---------------------- +Client must only process each ring when it is both started and enabled. + +If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is initialized +in an enabled state. + +If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized +in a disabled state. Client must not process it until ring is enabled by +VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by +VHOST_USER_SET_VRING_ENABLE with parameter 0. + +Each ring is initialized in a stopped state, client must not process it until +ring is started, or after it has been stopped. + +Client must start ring upon receiving a kick (that is, detecting that file +descriptor is readable) on the descriptor specified by +VHOST_USER_SET_VRING_KICK, and stop ring upon receiving +VHOST_USER_GET_VRING_BASE. + +While processing the rings (when they are started and enabled), client must +support changing some configuration aspects on the fly. + Multiple queue support ---------------------- @@ -162,9 +193,13 @@ the slave makes to the memory mapped regions. The client should mark the dirty pages in a log. Once it complies to this logging, it may declare the VHOST_F_LOG_ALL vhost feature. +To start/stop logging of data/used ring writes, server may send messages +VHOST_USER_SET_FEATURES with VHOST_F_LOG_ALL and VHOST_USER_SET_VRING_ADDR with +VHOST_VRING_F_LOG in ring's flags set to 1/0, respectively. + All the modifications to memory pointed by vring "descriptor" should be marked. Modifications to "used" vring should be marked if -VHOST_VRING_F_LOG is part of ring's features. +VHOST_VRING_F_LOG is part of ring's flags. Dirty pages are of size: #define VHOST_LOG_PAGE 0x1000 @@ -173,22 +208,35 @@ The log memory fd is provided in the ancillary data of VHOST_USER_SET_LOG_BASE message when the slave has VHOST_USER_PROTOCOL_F_LOG_SHMFD protocol feature. -The size of the log may be computed by using all the known guest -addresses. The log covers from address 0 to the maximum of guest +The size of the log is supplied as part of VhostUserMsg +which should be large enough to cover all known guest +addresses. Log starts at the supplied offset in the +supplied file descriptor. +The log covers from address 0 to the maximum of guest regions. In pseudo-code, to mark page at "addr" as dirty: page = addr / VHOST_LOG_PAGE log[page / 8] |= 1 << page % 8 +Where addr is the guest physical address. + Use atomic operations, as the log may be concurrently manipulated. +Note that when logging modifications to the used ring (when VHOST_VRING_F_LOG +is set for this ring), log_guest_addr should be used to calculate the log +offset: the write to first byte of the used ring is logged at this offset from +log start. Also note that this value might be outside the legal guest physical +address range (i.e. does not have to be covered by the VhostUserMemory table), +but the bit offset of the last byte of the ring must fall within +the size supplied by VhostUserLog. + VHOST_USER_SET_LOG_FD is an optional message with an eventfd in ancillary data, it may be used to inform the master that the log has been modified. -Once the source has finished migration, VHOST_USER_RESET_OWNER message -will be sent by the source. No further update must be done before the -destination takes over with new regions & rings. +Once the source has finished migration, rings will be stopped by +the source. No further update must be done before rings are +restarted. Protocol features ----------------- @@ -259,11 +307,13 @@ Message types * VHOST_USER_RESET_OWNER Id: 4 - Equivalent ioctl: VHOST_RESET_OWNER Master payload: N/A - Issued when a new connection is about to be closed. The Master will no - longer own this connection (and will usually close it). + This is no longer used. Used to be sent to request stopping + all rings, but some clients interpreted it to also discard + connection state (this interpretation would lead to bugs). + It is recommended that clients either ignore this message, + or use it to stop all rings. * VHOST_USER_SET_MEM_TABLE @@ -388,6 +438,8 @@ Message types Master payload: vring state description Signal slave to enable or disable corresponding vring. + This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES + has been negotiated. * VHOST_USER_SEND_RARP @@ -51,6 +51,7 @@ #include "qemu/main-loop.h" #include "translate-all.h" #include "sysemu/replay.h" +#include "sysemu/qtest.h" #include "exec/memory-internal.h" #include "exec/ram_addr.h" @@ -1196,8 +1197,10 @@ static long gethugepagesize(const char *path, Error **errp) return 0; } - if (fs.f_type != HUGETLBFS_MAGIC) + if (!qtest_driver() && + fs.f_type != HUGETLBFS_MAGIC) { fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path); + } return fs.f_bsize; } diff --git a/hw/acpi/core.c b/hw/acpi/core.c index fe6215af4a..21e113d713 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -625,8 +625,12 @@ void acpi_pm1_cnt_reset(ACPIREGS *ar) void acpi_gpe_init(ACPIREGS *ar, uint8_t len) { ar->gpe.len = len; - ar->gpe.sts = g_malloc0(len / 2); - ar->gpe.en = g_malloc0(len / 2); + /* Only first len / 2 bytes are ever used, + * but the caller in ich9.c migrates full len bytes. + * TODO: fix ich9.c and drop the extra allocation. + */ + ar->gpe.sts = g_malloc0(len); + ar->gpe.en = g_malloc0(len); } void acpi_gpe_reset(ACPIREGS *ar) diff --git a/hw/block/nand.c b/hw/block/nand.c index 61d2cec032..a68266f887 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -522,8 +522,8 @@ void nand_setio(DeviceState *dev, uint32_t value) if (s->ale) { unsigned int shift = s->addrlen * 8; - unsigned int mask = ~(0xff << shift); - unsigned int v = value << shift; + uint64_t mask = ~(0xffull << shift); + uint64_t v = (uint64_t)value << shift; s->addr = (s->addr & mask) | v; s->addrlen ++; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e70fccf80c..848f3fe3e1 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -112,6 +112,10 @@ static void virtio_blk_rw_complete(void *opaque, int ret) * happen on the other side of the migration). */ if (virtio_blk_handle_rw_error(req, -ret, is_read)) { + /* Break the link in case the next request is added to the + * restart queue and is going to be parsed from the ring again. + */ + req->mr_next = NULL; continue; } } diff --git a/hw/core/machine.c b/hw/core/machine.c index f4db340468..acca00db22 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -462,11 +462,6 @@ bool machine_usb(MachineState *machine) return machine->usb; } -bool machine_iommu(MachineState *machine) -{ - return machine->iommu; -} - bool machine_kernel_irqchip_allowed(MachineState *machine) { return machine->kernel_irqchip_allowed; diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index cf0b78e3a4..7b9f74c3b5 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -105,20 +105,27 @@ static void cd_data_to_raw(uint8_t *buf, int lba) memset(buf, 0, 288); } -static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) +static int +cd_read_sector_sync(IDEState *s) { int ret; block_acct_start(blk_get_stats(s->blk), &s->acct, 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - switch(sector_size) { +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector_sync: lba=%d\n", s->lba); +#endif + + switch (s->cd_sector_size) { case 2048: - ret = blk_read(s->blk, (int64_t)lba << 2, buf, 4); + ret = blk_read(s->blk, (int64_t)s->lba << 2, + s->io_buffer, 4); break; case 2352: - ret = blk_read(s->blk, (int64_t)lba << 2, buf + 16, 4); + ret = blk_read(s->blk, (int64_t)s->lba << 2, + s->io_buffer + 16, 4); if (ret >= 0) { - cd_data_to_raw(buf, lba); + cd_data_to_raw(s->io_buffer, s->lba); } break; default: @@ -130,11 +137,65 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t *buf, int sector_size) block_acct_failed(blk_get_stats(s->blk), &s->acct); } else { block_acct_done(blk_get_stats(s->blk), &s->acct); + s->lba++; + s->io_buffer_index = 0; } return ret; } +static void cd_read_sector_cb(void *opaque, int ret) +{ + IDEState *s = opaque; + + block_acct_done(blk_get_stats(s->blk), &s->acct); + +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector_cb: lba=%d ret=%d\n", s->lba, ret); +#endif + + if (ret < 0) { + ide_atapi_io_error(s, ret); + return; + } + + if (s->cd_sector_size == 2352) { + cd_data_to_raw(s->io_buffer, s->lba); + } + + s->lba++; + s->io_buffer_index = 0; + s->status &= ~BUSY_STAT; + + ide_atapi_cmd_reply_end(s); +} + +static int cd_read_sector(IDEState *s) +{ + if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) { + return -EINVAL; + } + + s->iov.iov_base = (s->cd_sector_size == 2352) ? + s->io_buffer + 16 : s->io_buffer; + + s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; + qemu_iovec_init_external(&s->qiov, &s->iov, 1); + +#ifdef DEBUG_IDE_ATAPI + printf("cd_read_sector: lba=%d\n", s->lba); +#endif + + block_acct_start(blk_get_stats(s->blk), &s->acct, + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + + ide_buffered_readv(s, (int64_t)s->lba << 2, &s->qiov, 4, + cd_read_sector_cb, s); + + s->status |= BUSY_STAT; + return 0; +} + void ide_atapi_cmd_ok(IDEState *s) { s->error = 0; @@ -170,6 +231,17 @@ void ide_atapi_io_error(IDEState *s, int ret) } } +static uint16_t atapi_byte_count_limit(IDEState *s) +{ + uint16_t bcl; + + bcl = s->lcyl | (s->hcyl << 8); + if (bcl == 0xffff) { + return 0xfffe; + } + return bcl; +} + /* The whole ATAPI transfer logic is handled in this function */ void ide_atapi_cmd_reply_end(IDEState *s) { @@ -185,18 +257,27 @@ void ide_atapi_cmd_reply_end(IDEState *s) ide_atapi_cmd_ok(s); ide_set_irq(s->bus); #ifdef DEBUG_IDE_ATAPI - printf("status=0x%x\n", s->status); + printf("end of transfer, status=0x%x\n", s->status); #endif } else { /* see if a new sector must be read */ if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) { - ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size); - if (ret < 0) { - ide_atapi_io_error(s, ret); + if (!s->elementary_transfer_size) { + ret = cd_read_sector(s); + if (ret < 0) { + ide_atapi_io_error(s, ret); + } return; + } else { + /* rebuffering within an elementary transfer is + * only possible with a sync request because we + * end up with a race condition otherwise */ + ret = cd_read_sector_sync(s); + if (ret < 0) { + ide_atapi_io_error(s, ret); + return; + } } - s->lba++; - s->io_buffer_index = 0; } if (s->elementary_transfer_size > 0) { /* there are some data left to transmit in this elementary @@ -212,12 +293,10 @@ void ide_atapi_cmd_reply_end(IDEState *s) } else { /* a new transfer is needed */ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO; - byte_count_limit = s->lcyl | (s->hcyl << 8); + byte_count_limit = atapi_byte_count_limit(s); #ifdef DEBUG_IDE_ATAPI printf("byte_count_limit=%d\n", byte_count_limit); #endif - if (byte_count_limit == 0xffff) - byte_count_limit--; size = s->packet_transfer_size; if (size > byte_count_limit) { /* byte count limit must be even if this case */ @@ -278,7 +357,6 @@ static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, s->io_buffer_index = sector_size; s->cd_sector_size = sector_size; - s->status = READY_STAT | SEEK_STAT; ide_atapi_cmd_reply_end(s); } @@ -354,16 +432,16 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) s->bus->dma->iov.iov_len = n * 4 * 512; qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1); - s->bus->dma->aiocb = blk_aio_readv(s->blk, (int64_t)s->lba << 2, - &s->bus->dma->qiov, n * 4, - ide_atapi_cmd_read_dma_cb, s); + s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2, + &s->bus->dma->qiov, n * 4, + ide_atapi_cmd_read_dma_cb, s); return; eot: if (ret < 0) { block_acct_failed(blk_get_stats(s->blk), &s->acct); } else { - block_acct_done(blk_get_stats(s->blk), &s->acct); + block_acct_done(blk_get_stats(s->blk), &s->acct); } ide_set_inactive(s, false); } @@ -1186,7 +1264,7 @@ enum { NONDATA = 0x04, }; -static const struct { +static const struct AtapiCmd { void (*handler)(IDEState *s, uint8_t *buf); int flags; } atapi_cmd_table[0x100] = { @@ -1213,9 +1291,9 @@ static const struct { void ide_atapi_cmd(IDEState *s) { - uint8_t *buf; + uint8_t *buf = s->io_buffer; + const struct AtapiCmd *cmd = &atapi_cmd_table[s->io_buffer[0]]; - buf = s->io_buffer; #ifdef DEBUG_IDE_ATAPI { int i; @@ -1226,14 +1304,14 @@ void ide_atapi_cmd(IDEState *s) printf("\n"); } #endif + /* * If there's a UNIT_ATTENTION condition pending, only command flagged with * ALLOW_UA are allowed to complete. with other commands getting a CHECK * condition response unless a higher priority status, defined by the drive * here, is pending. */ - if (s->sense_key == UNIT_ATTENTION && - !(atapi_cmd_table[s->io_buffer[0]].flags & ALLOW_UA)) { + if (s->sense_key == UNIT_ATTENTION && !(cmd->flags & ALLOW_UA)) { ide_atapi_cmd_check_status(s); return; } @@ -1244,7 +1322,7 @@ void ide_atapi_cmd(IDEState *s) * GET_EVENT_STATUS_NOTIFICATION to detect such tray open/close * states rely on this behavior. */ - if (!(atapi_cmd_table[s->io_buffer[0]].flags & ALLOW_UA) && + if (!(cmd->flags & ALLOW_UA) && !s->tray_open && blk_is_inserted(s->blk) && s->cdrom_changed) { if (s->cdrom_changed == 1) { @@ -1259,7 +1337,7 @@ void ide_atapi_cmd(IDEState *s) } /* Report a Not Ready condition if appropriate for the command */ - if ((atapi_cmd_table[s->io_buffer[0]].flags & CHECK_READY) && + if ((cmd->flags & CHECK_READY) && (!media_present(s) || !blk_is_inserted(s->blk))) { ide_atapi_cmd_error(s, NOT_READY, ASC_MEDIUM_NOT_PRESENT); @@ -1270,10 +1348,9 @@ void ide_atapi_cmd(IDEState *s) * If this is a data-transferring PIO command and BCL is 0, * we abort at the /ATA/ level, not the ATAPI level. * See ATA8 ACS3 section 7.17.6.49 and 7.21.5 */ - if (!(atapi_cmd_table[s->io_buffer[0]].flags & NONDATA)) { + if (cmd->handler && !(cmd->flags & NONDATA)) { /* TODO: Check IDENTIFY data word 125 for default BCL (currently 0) */ - uint16_t byte_count_limit = s->lcyl | (s->hcyl << 8); - if (!(byte_count_limit || s->atapi_dma)) { + if (!(atapi_byte_count_limit(s) || s->atapi_dma)) { /* TODO: Move abort back into core.c and make static inline again */ ide_abort_command(s); return; @@ -1281,8 +1358,8 @@ void ide_atapi_cmd(IDEState *s) } /* Execute the command */ - if (atapi_cmd_table[s->io_buffer[0]].handler) { - atapi_cmd_table[s->io_buffer[0]].handler(s, buf); + if (cmd->handler) { + cmd->handler(s, buf); return; } diff --git a/hw/ide/core.c b/hw/ide/core.c index 2725dd3b81..da3baab1eb 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -561,6 +561,53 @@ static bool ide_sect_range_ok(IDEState *s, return true; } +static void ide_buffered_readv_cb(void *opaque, int ret) +{ + IDEBufferedRequest *req = opaque; + if (!req->orphaned) { + if (!ret) { + qemu_iovec_from_buf(req->original_qiov, 0, req->iov.iov_base, + req->original_qiov->size); + } + req->original_cb(req->original_opaque, ret); + } + QLIST_REMOVE(req, list); + qemu_vfree(req->iov.iov_base); + g_free(req); +} + +#define MAX_BUFFERED_REQS 16 + +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque) +{ + BlockAIOCB *aioreq; + IDEBufferedRequest *req; + int c = 0; + + QLIST_FOREACH(req, &s->buffered_requests, list) { + c++; + } + if (c > MAX_BUFFERED_REQS) { + return blk_abort_aio_request(s->blk, cb, opaque, -EIO); + } + + req = g_new0(IDEBufferedRequest, 1); + req->original_qiov = iov; + req->original_cb = cb; + req->original_opaque = opaque; + req->iov.iov_base = qemu_blockalign(blk_bs(s->blk), iov->size); + req->iov.iov_len = iov->size; + qemu_iovec_init_external(&req->qiov, &req->iov, 1); + + aioreq = blk_aio_readv(s->blk, sector_num, &req->qiov, nb_sectors, + ide_buffered_readv_cb, req); + + QLIST_INSERT_HEAD(&s->buffered_requests, req, list); + return aioreq; +} + static void ide_sector_read(IDEState *s); static void ide_sector_read_cb(void *opaque, int ret) @@ -632,8 +679,8 @@ static void ide_sector_read(IDEState *s) block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); - s->pio_aiocb = blk_aio_readv(s->blk, sector_num, &s->qiov, n, - ide_sector_read_cb, s); + s->pio_aiocb = ide_buffered_readv(s, sector_num, &s->qiov, n, + ide_sector_read_cb, s); } void dma_buf_commit(IDEState *s, uint32_t tx_bytes) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index e4629b023a..2d1e2d2d2f 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -343,6 +343,16 @@ enum ide_dma_cmd { #define ide_cmd_is_read(s) \ ((s)->dma_cmd == IDE_DMA_READ) +typedef struct IDEBufferedRequest { + QLIST_ENTRY(IDEBufferedRequest) list; + struct iovec iov; + QEMUIOVector qiov; + QEMUIOVector *original_qiov; + BlockCompletionFunc *original_cb; + void *original_opaque; + bool orphaned; +} IDEBufferedRequest; + /* NOTE: IDEState represents in fact one drive */ struct IDEState { IDEBus *bus; @@ -396,6 +406,7 @@ struct IDEState { BlockAIOCB *pio_aiocb; struct iovec iov; QEMUIOVector qiov; + QLIST_HEAD(, IDEBufferedRequest) buffered_requests; /* ATA DMA state */ uint64_t io_buffer_offset; int32_t io_buffer_size; @@ -572,6 +583,9 @@ void ide_set_inactive(IDEState *s, bool more); BlockAIOCB *ide_issue_trim(BlockBackend *blk, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); /* hw/ide/atapi.c */ void ide_atapi_cmd(IDEState *s); diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 9c54b378d6..37dbc291da 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -233,6 +233,22 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) /* Ignore writes to SSBM if it keeps the old value */ if ((val & BM_CMD_START) != (bm->cmd & BM_CMD_START)) { if (!(val & BM_CMD_START)) { + /* First invoke the callbacks of all buffered requests + * and flag those requests as orphaned. Ideally there + * are no unbuffered (Scatter Gather DMA Requests or + * write requests) pending and we can avoid to drain. */ + IDEBufferedRequest *req; + IDEState *s = idebus_active_if(bm->bus); + QLIST_FOREACH(req, &s->buffered_requests, list) { + if (!req->orphaned) { +#ifdef DEBUG_IDE + printf("%s: invoking cb %p of buffered request %p with" + " -ECANCELED\n", __func__, req->original_cb, req); +#endif + req->original_cb(req->original_opaque, -ECANCELED); + } + req->orphaned = true; + } /* * We can't cancel Scatter Gather DMA in the middle of the * operation or a partial (not full) DMA transfer would reach @@ -246,6 +262,9 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) * aio operation with preadv/pwritev. */ if (bm->bus->dma->aiocb) { +#ifdef DEBUG_IDE + printf("%s: draining all remaining requests", __func__); +#endif blk_drain_all(); assert(bm->bus->dma->aiocb == NULL); } diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index d71aeb8a2a..13e297d52e 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -254,9 +254,9 @@ static void gic_activate_irq(GICState *s, int cpu, int irq) int bitno = preemption_level % 32; if (gic_has_groups(s) && GIC_TEST_GROUP(irq, (1 << cpu))) { - s->nsapr[regno][cpu] &= (1 << bitno); + s->nsapr[regno][cpu] |= (1 << bitno); } else { - s->apr[regno][cpu] &= (1 << bitno); + s->apr[regno][cpu] |= (1 << bitno); } s->running_priority[cpu] = prio; diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index d91b7b155e..318c3e6ad2 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -77,14 +77,8 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_NET_F_STATUS, - VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, - VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_CTRL_RX_EXTRA, - VIRTIO_NET_F_CTRL_MAC_ADDR, - VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, + /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, @@ -292,12 +286,6 @@ static void vhost_net_stop_one(struct vhost_net *net, int r = vhost_ops->vhost_net_set_backend(&net->dev, &file); assert(r >= 0); } - } else if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { - const VhostOps *vhost_ops = net->dev.vhost_ops; - int r = vhost_ops->vhost_reset_device(&net->dev); - assert(r >= 0); - } } if (net->nc->info->poll) { net->nc->info->poll(net->nc, true); diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 7b2fbf9598..715208b22a 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -34,6 +34,7 @@ #include "sysemu/sysemu.h" #include "hw/i386/ioapic.h" #include "qapi/visitor.h" +#include "qemu/error-report.h" /* * I440FX chipset data sheet. @@ -301,6 +302,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) static void i440fx_realize(PCIDevice *dev, Error **errp) { dev->config[I440FX_SMRAM] = 0x02; + + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { + error_report("warning: i440fx doesn't support emulated iommu"); + } } PCIBus *i440fx_init(const char *host_type, const char *pci_type, diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index c81507d710..1fb470758b 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -506,7 +506,7 @@ static void mch_realize(PCIDevice *d, Error **errp) PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } /* Intel IOMMU (VT-d) */ - if (machine_iommu(current_machine)) { + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { mch_init_dmar(mch); } } diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c index 0806b5f82e..ff073d501a 100644 --- a/hw/tpm/tpm_tis.c +++ b/hw/tpm/tpm_tis.c @@ -141,7 +141,7 @@ #define TPM_TIS_IFACE_ID_SUPPORTED_FLAGS1_3 \ (TPM_TIS_IFACE_ID_INTERFACE_TIS1_3 | \ - (~0 << 4)/* all of it is don't care */) + (~0u << 4)/* all of it is don't care */) /* if backend was a TPM 2.0: */ #define TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0 \ diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c44360219f..1b6c5ac238 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -121,8 +121,8 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { - error_report("Failed to read msg header. Read %d instead of %d.", r, - size); + error_report("Failed to read msg header. Read %d instead of %d." + " Original request %d.", r, size, msg->request); goto fail; } @@ -206,7 +206,7 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, VhostUserMsg msg = { .request = VHOST_USER_SET_LOG_BASE, .flags = VHOST_USER_VERSION, - .payload.log.mmap_size = log->size, + .payload.log.mmap_size = log->size * sizeof(*(log->log)), .payload.log.mmap_offset = 0, .size = sizeof(msg.payload.log), }; @@ -333,18 +333,23 @@ static int vhost_user_set_vring_base(struct vhost_dev *dev, static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) { - struct vhost_vring_state state = { - .index = dev->vq_index, - .num = enable, - }; + int i; - if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { + if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { return -1; } - return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); -} + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_vring_state state = { + .index = dev->vq_index + i, + .num = enable, + }; + + vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); + } + return 0; +} static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 770d9bbc8c..c6910da63a 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -63,9 +63,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, const char *name, Error **errp); -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, - const char *id_or_name, - Error **errp); +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp); int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int bdrv_snapshot_load_tmp(BlockDriverState *bs, @@ -75,4 +75,22 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, const char *id_or_name, Error **errp); + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, + Error **err); +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs); +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs); + +BlockDriverState *bdrv_all_find_vmstate_bs(void); + #endif diff --git a/include/hw/boards.h b/include/hw/boards.h index 3e9a92c055..24eb6f0e77 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -33,7 +33,6 @@ MachineClass *find_default_machine(void); extern MachineState *current_machine; bool machine_usb(MachineState *machine); -bool machine_iommu(MachineState *machine); bool machine_kernel_irqchip_allowed(MachineState *machine); bool machine_kernel_irqchip_required(MachineState *machine); int machine_kvm_shadow_mem(MachineState *machine); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 4bbc0ffc53..854c330b66 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -347,8 +347,25 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "qemu32" "-" TYPE_X86_CPU,\ .property = "popcnt",\ .value = "on",\ + },{\ + .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ + },{\ + .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ + },{\ + .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ + },{\ + .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ + .property = "rdtscp",\ + .value = "on",\ }, + #define PC_COMPAT_2_3 \ PC_COMPAT_2_4 \ HW_COMPAT_2_3 \ diff --git a/include/qemu/buffer.h b/include/qemu/buffer.h index b380cec6fa..dead9b77e1 100644 --- a/include/qemu/buffer.h +++ b/include/qemu/buffer.h @@ -34,12 +34,35 @@ typedef struct Buffer Buffer; */ struct Buffer { + char *name; size_t capacity; size_t offset; + uint64_t avg_size; uint8_t *buffer; }; /** + * buffer_init: + * @buffer: the buffer object + * @name: buffer name + * + * Optionally attach a name to the buffer, to make it easier + * to identify in debug traces. + */ +void buffer_init(Buffer *buffer, const char *name, ...) + GCC_FMT_ATTR(2, 3); + +/** + * buffer_shrink: + * @buffer: the buffer object + * + * Try to shrink the buffer. Checks current buffer capacity and size + * and reduces capacity in case only a fraction of the buffer is + * actually used. + */ +void buffer_shrink(Buffer *buffer); + +/** * buffer_reserve: * @buffer: the buffer object * @len: the minimum required free space @@ -115,4 +138,24 @@ uint8_t *buffer_end(Buffer *buffer); */ gboolean buffer_empty(Buffer *buffer); +/** + * buffer_move_empty: + * @to: destination buffer object + * @from: source buffer object + * + * Moves buffer, without copying data. 'to' buffer must be empty. + * 'from' buffer is empty and zero-sized on return. + */ +void buffer_move_empty(Buffer *to, Buffer *from); + +/** + * buffer_move: + * @to: destination buffer object + * @from: source buffer object + * + * Moves buffer, copying data (unless 'to' buffer happens to be empty). + * 'from' buffer is empty and zero-sized on return. + */ +void buffer_move(Buffer *to, Buffer *from); + #endif /* QEMU_BUFFER_H__ */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index f4a68e291b..fb068ea47b 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -184,5 +184,8 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); #endif diff --git a/migration/migration.c b/migration/migration.c index 7e4e27b57d..1a42aee412 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1345,7 +1345,7 @@ static void *source_return_path_thread(void *opaque) break; } } - if (rp && qemu_file_get_error(rp)) { + if (qemu_file_get_error(rp)) { trace_source_return_path_thread_bad_end(); mark_source_rp_bad(ms); } @@ -1643,7 +1643,6 @@ static void *migration_thread(void *opaque) if (pending_size && pending_size >= max_size) { /* Still a significant amount to transfer */ - current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); if (migrate_postcopy_ram() && s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && pend_nonpost <= max_size && diff --git a/migration/ram.c b/migration/ram.c index 7f32696d79..1eb155aedd 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1249,6 +1249,7 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f, if (unsentmap) { clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap); } + last_sent_block = block; } return res; diff --git a/migration/savevm.c b/migration/savevm.c index d90e228568..0ad1b93a8b 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1905,46 +1905,6 @@ int qemu_loadvm_state(QEMUFile *f) return ret; } -static BlockDriverState *find_vmstate_bs(void) -{ - BlockDriverState *bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs)) { - return bs; - } - } - return NULL; -} - -/* - * Deletes snapshots of a given name in all opened images. - */ -static int del_existing_snapshots(Monitor *mon, const char *name) -{ - BlockDriverState *bs; - QEMUSnapshotInfo sn1, *snapshot = &sn1; - Error *err = NULL; - - bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs) && - bdrv_snapshot_find(bs, snapshot, name) >= 0) { - bdrv_snapshot_delete_by_id_or_name(bs, name, &err); - if (err) { - monitor_printf(mon, - "Error while deleting snapshot on device '%s':" - " %s\n", - bdrv_get_device_name(bs), - error_get_pretty(err)); - error_free(err); - return -1; - } - } - } - - return 0; -} - void hmp_savevm(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; @@ -1957,27 +1917,29 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) struct tm tm; const char *name = qdict_get_try_str(qdict, "name"); Error *local_err = NULL; + AioContext *aio_context; - /* Verify if there is a device that doesn't support snapshots and is writable */ - bs = NULL; - while ((bs = bdrv_next(bs))) { - - if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { - continue; - } + if (!bdrv_all_can_snapshot(&bs)) { + monitor_printf(mon, "Device '%s' is writable but does not " + "support snapshots.\n", bdrv_get_device_name(bs)); + return; + } - if (!bdrv_can_snapshot(bs)) { - monitor_printf(mon, "Device '%s' is writable but does not support snapshots.\n", - bdrv_get_device_name(bs)); - return; - } + /* Delete old snapshots of the same name */ + if (name && bdrv_all_delete_snapshot(name, &bs1, &local_err) < 0) { + monitor_printf(mon, + "Error while deleting snapshot on device '%s': %s\n", + bdrv_get_device_name(bs1), error_get_pretty(local_err)); + error_free(local_err); + return; } - bs = find_vmstate_bs(); - if (!bs) { + bs = bdrv_all_find_vmstate_bs(); + if (bs == NULL) { monitor_printf(mon, "No block device can accept snapshots\n"); return; } + aio_context = bdrv_get_aio_context(bs); saved_vm_running = runstate_is_running(); @@ -1988,6 +1950,8 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) } vm_stop(RUN_STATE_SAVE_VM); + aio_context_acquire(aio_context); + memset(sn, 0, sizeof(*sn)); /* fill auxiliary fields */ @@ -2010,11 +1974,6 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm); } - /* Delete old snapshots of the same name */ - if (name && del_existing_snapshots(mon, name) < 0) { - goto the_end; - } - /* save the VM state */ f = qemu_fopen_bdrv(bs, 1); if (!f) { @@ -2030,22 +1989,14 @@ void hmp_savevm(Monitor *mon, const QDict *qdict) goto the_end; } - /* create the snapshots */ - - bs1 = NULL; - while ((bs1 = bdrv_next(bs1))) { - if (bdrv_can_snapshot(bs1)) { - /* Write VM state size only to the image that contains the state */ - sn->vm_state_size = (bs == bs1 ? vm_state_size : 0); - ret = bdrv_snapshot_create(bs1, sn); - if (ret < 0) { - monitor_printf(mon, "Error while creating snapshot on '%s'\n", - bdrv_get_device_name(bs1)); - } - } + ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs); + if (ret < 0) { + monitor_printf(mon, "Error while creating snapshot on '%s'\n", + bdrv_get_device_name(bs)); } the_end: + aio_context_release(aio_context); if (saved_vm_running) { vm_start(); } @@ -2084,15 +2035,31 @@ int load_vmstate(const char *name) QEMUSnapshotInfo sn; QEMUFile *f; int ret; + AioContext *aio_context; - bs_vm_state = find_vmstate_bs(); + if (!bdrv_all_can_snapshot(&bs)) { + error_report("Device '%s' is writable but does not support snapshots.", + bdrv_get_device_name(bs)); + return -ENOTSUP; + } + ret = bdrv_all_find_snapshot(name, &bs); + if (ret < 0) { + error_report("Device '%s' does not have the requested snapshot '%s'", + bdrv_get_device_name(bs), name); + return ret; + } + + bs_vm_state = bdrv_all_find_vmstate_bs(); if (!bs_vm_state) { error_report("No block device supports snapshots"); return -ENOTSUP; } + aio_context = bdrv_get_aio_context(bs_vm_state); /* Don't even try to load empty VM states */ + aio_context_acquire(aio_context); ret = bdrv_snapshot_find(bs_vm_state, &sn, name); + aio_context_release(aio_context); if (ret < 0) { return ret; } else if (sn.vm_state_size == 0) { @@ -2101,42 +2068,14 @@ int load_vmstate(const char *name) return -EINVAL; } - /* Verify if there is any device that doesn't support snapshots and is - writable and check if the requested snapshot is available too. */ - bs = NULL; - while ((bs = bdrv_next(bs))) { - - if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { - continue; - } - - if (!bdrv_can_snapshot(bs)) { - error_report("Device '%s' is writable but does not support snapshots.", - bdrv_get_device_name(bs)); - return -ENOTSUP; - } - - ret = bdrv_snapshot_find(bs, &sn, name); - if (ret < 0) { - error_report("Device '%s' does not have the requested snapshot '%s'", - bdrv_get_device_name(bs), name); - return ret; - } - } - /* Flush all IO requests so they don't interfere with the new state. */ bdrv_drain_all(); - bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs)) { - ret = bdrv_snapshot_goto(bs, name); - if (ret < 0) { - error_report("Error %d while activating snapshot '%s' on '%s'", - ret, name, bdrv_get_device_name(bs)); - return ret; - } - } + ret = bdrv_all_goto_snapshot(name, &bs); + if (ret < 0) { + error_report("Error %d while activating snapshot '%s' on '%s'", + ret, name, bdrv_get_device_name(bs)); + return ret; } /* restore the VM state */ @@ -2148,9 +2087,12 @@ int load_vmstate(const char *name) qemu_system_reset(VMRESET_SILENT); migration_incoming_state_new(f); - ret = qemu_loadvm_state(f); + aio_context_acquire(aio_context); + ret = qemu_loadvm_state(f); qemu_fclose(f); + aio_context_release(aio_context); + migration_incoming_state_destroy(); if (ret < 0) { error_report("Error %d while loading VM state", ret); @@ -2166,43 +2108,34 @@ void hmp_delvm(Monitor *mon, const QDict *qdict) Error *err; const char *name = qdict_get_str(qdict, "name"); - if (!find_vmstate_bs()) { - monitor_printf(mon, "No block device supports snapshots\n"); - return; - } - - bs = NULL; - while ((bs = bdrv_next(bs))) { - if (bdrv_can_snapshot(bs)) { - err = NULL; - bdrv_snapshot_delete_by_id_or_name(bs, name, &err); - if (err) { - monitor_printf(mon, - "Error while deleting snapshot on device '%s':" - " %s\n", - bdrv_get_device_name(bs), - error_get_pretty(err)); - error_free(err); - } - } + if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) { + monitor_printf(mon, + "Error while deleting snapshot on device '%s': %s\n", + bdrv_get_device_name(bs), error_get_pretty(err)); + error_free(err); } } void hmp_info_snapshots(Monitor *mon, const QDict *qdict) { BlockDriverState *bs, *bs1; - QEMUSnapshotInfo *sn_tab, *sn, s, *sn_info = &s; - int nb_sns, i, ret, available; + QEMUSnapshotInfo *sn_tab, *sn; + int nb_sns, i; int total; int *available_snapshots; + AioContext *aio_context; - bs = find_vmstate_bs(); + bs = bdrv_all_find_vmstate_bs(); if (!bs) { monitor_printf(mon, "No available block device supports snapshots\n"); return; } + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); nb_sns = bdrv_snapshot_list(bs, &sn_tab); + aio_context_release(aio_context); + if (nb_sns < 0) { monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); return; @@ -2216,21 +2149,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) available_snapshots = g_new0(int, nb_sns); total = 0; for (i = 0; i < nb_sns; i++) { - sn = &sn_tab[i]; - available = 1; - bs1 = NULL; - - while ((bs1 = bdrv_next(bs1))) { - if (bdrv_can_snapshot(bs1) && bs1 != bs) { - ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str); - if (ret < 0) { - available = 0; - break; - } - } - } - - if (available) { + if (bdrv_all_find_snapshot(sn_tab[i].id_str, &bs1) == 0) { available_snapshots[total] = i; total++; } diff --git a/qapi-schema.json b/qapi-schema.json index b65905f4d6..8b1a423fa7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3579,16 +3579,22 @@ # Button of a pointer input device (mouse, tablet). # # Since: 2.0 +# +# Note that the spelling of these values may change when the +# x-input-send-event is promoted out of experimental status. ## { 'enum' : 'InputButton', 'data' : [ 'Left', 'Middle', 'Right', 'WheelUp', 'WheelDown' ] } ## -# @InputButton +# @InputAxis # # Position axis of a pointer input device (mouse, tablet). # # Since: 2.0 +# +# Note that the spelling of these values may change when the +# x-input-send-event is promoted out of experimental status. ## { 'enum' : 'InputAxis', 'data' : [ 'X', 'Y' ] } @@ -3679,7 +3685,10 @@ # # Since: 2.2 # -# Note: this command is experimental, and not a stable API. +# Note: this command is experimental, and not a stable API. Things that +# might change before it becomes stable include the spelling of enum +# values for InputButton and InputAxis, and the notion of how to designate +# which console will receive the event. # ## { 'command': 'x-input-send-event', diff --git a/qapi/block-core.json b/qapi/block-core.json index f97c250ce9..a07b13f54a 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1531,9 +1531,8 @@ # @stats-account-failed: #optional whether to include failed # operations when computing latency and last # access statistics (default: true) (Since 2.5) -# @stats-intervals: #optional colon-separated list of intervals for -# collecting I/O statistics, in seconds (default: none) -# (Since 2.5) +# @stats-intervals: #optional list of intervals for collecting I/O +# statistics, in seconds (default: none) (Since 2.5) # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) # (default: off) # @@ -1551,7 +1550,7 @@ '*read-only': 'bool', '*stats-account-invalid': 'bool', '*stats-account-failed': 'bool', - '*stats-intervals': 'str', + '*stats-intervals': ['int'], '*detect-zeroes': 'BlockdevDetectZeroesOptions' } } ## diff --git a/qapi/introspect.json b/qapi/introspect.json index e7c4c3e998..9e9369e160 100644 --- a/qapi/introspect.json +++ b/qapi/introspect.json @@ -22,6 +22,15 @@ # what's there), not interface specification. The specification is in # the QAPI schema. # +# Furthermore, while we strive to keep the QMP wire format +# backwards-compatible across qemu versions, the introspection output +# is not guaranteed to have the same stability. For example, one +# version of qemu may list an object member as an optional +# non-variant, while another lists the same member only through the +# object's variants; or the type of a member may change from a generic +# string into a specific enum or from one specific type into an +# alternate that includes the original type alongside something else. +# # Returns: array of @SchemaInfo, where each element describes an # entity in the ABI: command, event, type, ... # diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 80d034a8d5..c831fe83ad 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -16,6 +16,14 @@ #include <seccomp.h> #include "sysemu/seccomp.h" +#if SCMP_VER_MAJOR >= 3 + #define HAVE_CACHEFLUSH +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 3 + #define HAVE_CACHEFLUSH +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 2 && SCMP_VER_MICRO >= 3 + #define HAVE_CACHEFLUSH +#endif + struct QemuSeccompSyscall { int32_t num; uint8_t priority; @@ -238,7 +246,10 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { { SCMP_SYS(inotify_init1), 240 }, { SCMP_SYS(inotify_add_watch), 240 }, { SCMP_SYS(mbind), 240 }, - { SCMP_SYS(memfd_create), 240 } + { SCMP_SYS(memfd_create), 240 }, +#ifdef HAVE_CACHEFLUSH + { SCMP_SYS(cacheflush), 240 }, +#endif }; int seccomp_start(void) diff --git a/qga/commands.c b/qga/commands.c index 0f80ce65a4..bb73e7dfbf 100644 --- a/qga/commands.c +++ b/qga/commands.c @@ -398,9 +398,12 @@ GuestExec *qmp_guest_exec(const char *path, arglist.next = has_arg ? arg : NULL; argv = guest_exec_get_args(&arglist, true); - envp = guest_exec_get_args(has_env ? env : NULL, false); + envp = has_env ? guest_exec_get_args(env, false) : NULL; flags = G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD; +#if GLIB_CHECK_VERSION(2, 33, 2) + flags |= G_SPAWN_SEARCH_PATH_FROM_ENVP; +#endif if (!has_output) { flags |= G_SPAWN_STDOUT_TO_DEV_NULL | G_SPAWN_STDERR_TO_DEV_NULL; } diff --git a/target-arm/translate.c b/target-arm/translate.c index 43518541e1..5d22879755 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -7210,6 +7210,7 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) break; } + gen_set_condexec(s); gen_set_pc_im(s, s->pc - 4); tmpptr = tcg_const_ptr(ri); tcg_syn = tcg_const_i32(syndrome); @@ -11373,6 +11374,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { if (bp->pc == dc->pc) { if (bp->flags & BP_CPU) { + gen_set_condexec(dc); gen_set_pc_im(dc, dc->pc); gen_helper_check_breakpoints(cpu_env); /* End the TB early; it's likely not going to be executed */ diff --git a/target-i386/cpu.c b/target-i386/cpu.c index e5f1c5bcda..11e5e39a75 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -1244,8 +1244,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_DE | CPUID_FP87, .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR | + CPUID_EXT2_LM | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | @@ -1273,8 +1274,9 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_1_ECX] = CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR | + CPUID_EXT2_LM | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | @@ -1305,8 +1307,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | @@ -1340,8 +1343,9 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, + /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE | diff --git a/target-i386/translate.c b/target-i386/translate.c index fbe4f80aa6..a3dd167a9b 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -3848,8 +3848,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; #ifdef TARGET_X86_64 case MO_64: - tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg], + tcg_gen_mulu2_i64(cpu_T[0], cpu_T[1], cpu_T[0], cpu_regs[R_EDX]); + tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T[0]); + tcg_gen_mov_i64(cpu_regs[reg], cpu_T[1]); break; #endif } diff --git a/tests/.gitignore b/tests/.gitignore index e96f569903..1e55722b6a 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -9,6 +9,7 @@ check-qom-proplist rcutorture test-aio test-bitops +test-blockjob-txn test-coroutine test-crypto-cipher test-crypto-hash @@ -45,6 +46,7 @@ test-string-input-visitor test-string-output-visitor test-thread-pool test-throttle +test-timed-average test-visitor-serialization test-vmstate test-write-threshold diff --git a/tests/Makefile b/tests/Makefile index 90c4141ac5..b9379841d8 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -197,8 +197,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c check-qtest-i386-y += tests/pc-cpu-test$(EXESUF) check-qtest-i386-y += tests/q35-test$(EXESUF) gcov-files-i386-y += hw/pci-host/q35.c -ifeq ($(CONFIG_VHOST_NET),y) -check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF) +check-qtest-i386-$(CONFIG_VHOST_NET_TEST_i386) += tests/vhost-user-test$(EXESUF) +ifeq ($(CONFIG_VHOST_NET_TEST_i386),) +check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += tests/vhost-user-test$(EXESUF) endif check-qtest-i386-y += tests/test-netfilter$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 59d387c6d0..088850642e 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -39,15 +39,17 @@ #include "hw/pci/pci_ids.h" #include "hw/pci/pci_regs.h" -/* Test-specific defines -- in MiB */ -#define TEST_IMAGE_SIZE_MB (200 * 1024) -#define TEST_IMAGE_SECTORS ((TEST_IMAGE_SIZE_MB / AHCI_SECTOR_SIZE) \ - * 1024 * 1024) +/* Test images sizes in MB */ +#define TEST_IMAGE_SIZE_MB_LARGE (200 * 1024) +#define TEST_IMAGE_SIZE_MB_SMALL 64 /*** Globals ***/ static char tmp_path[] = "/tmp/qtest.XXXXXX"; static char debug_path[] = "/tmp/qtest-blkdebug.XXXXXX"; +static char mig_socket[] = "/tmp/qtest-migration.XXXXXX"; static bool ahci_pedantic; +static const char *imgfmt; +static unsigned test_image_size_mb; /*** Function Declarations ***/ static void ahci_test_port_spec(AHCIQState *ahci, uint8_t port); @@ -60,6 +62,11 @@ static void ahci_test_pmcap(AHCIQState *ahci, uint8_t offset); /*** Utilities ***/ +static uint64_t mb_to_sectors(uint64_t image_size_mb) +{ + return (image_size_mb * 1024 * 1024) / AHCI_SECTOR_SIZE; +} + static void string_bswap16(uint16_t *s, size_t bytes) { g_assert_cmphex((bytes & 1), ==, 0); @@ -114,8 +121,11 @@ static void ahci_migrate(AHCIQState *from, AHCIQState *to, const char *uri) { QOSState *tmp = to->parent; QPCIDevice *dev = to->dev; + char *uri_local = NULL; + if (uri == NULL) { - uri = "tcp:127.0.0.1:1234"; + uri_local = g_strdup_printf("%s%s", "unix:", mig_socket); + uri = uri_local; } /* context will be 'to' after completion. */ @@ -135,6 +145,7 @@ static void ahci_migrate(AHCIQState *from, AHCIQState *to, const char *uri) from->dev = dev; verify_state(to); + g_free(uri_local); } /*** Test Setup & Teardown ***/ @@ -170,11 +181,11 @@ static AHCIQState *ahci_boot(const char *cli, ...) va_end(ap); } else { cli = "-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s" - ",format=qcow2" + ",format=%s" " -M q35 " "-device ide-hd,drive=drive0 " "-global ide-hd.ver=%s"; - s = ahci_boot(cli, tmp_path, "testdisk", "version"); + s = ahci_boot(cli, tmp_path, "testdisk", imgfmt, "version"); } return s; @@ -900,7 +911,7 @@ static void ahci_test_max(AHCIQState *ahci) uint64_t nsect; uint8_t port; uint8_t cmd; - uint64_t config_sect = TEST_IMAGE_SECTORS - 1; + uint64_t config_sect = mb_to_sectors(test_image_size_mb) - 1; if (config_sect > 0xFFFFFF) { cmd = CMD_READ_MAX_EXT; @@ -1073,12 +1084,12 @@ static void test_flush_retry(void) prepare_blkdebug_script(debug_path, "flush_to_disk"); ahci = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 ", debug_path, - tmp_path); + tmp_path, imgfmt); /* Issue Flush Command and wait for error */ port = ahci_port_select(ahci); @@ -1105,18 +1116,19 @@ static void test_flush_retry(void) static void test_migrate_sanity(void) { AHCIQState *src, *dst; - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); src = ahci_boot("-m 1024 -M q35 " - "-hda %s ", tmp_path); + "-drive if=ide,file=%s,format=%s ", tmp_path, imgfmt); dst = ahci_boot("-m 1024 -M q35 " - "-hda %s " - "-incoming %s", tmp_path, uri); + "-drive if=ide,file=%s,format=%s " + "-incoming %s", tmp_path, imgfmt, uri); ahci_migrate(src, dst, uri); ahci_shutdown(src); ahci_shutdown(dst); + g_free(uri); } /** @@ -1129,13 +1141,14 @@ static void ahci_migrate_simple(uint8_t cmd_read, uint8_t cmd_write) size_t bufsize = 4096; unsigned char *tx = g_malloc(bufsize); unsigned char *rx = g_malloc0(bufsize); - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); src = ahci_boot_and_enable("-m 1024 -M q35 " - "-hda %s ", tmp_path); + "-drive if=ide,format=%s,file=%s ", + imgfmt, tmp_path); dst = ahci_boot("-m 1024 -M q35 " - "-hda %s " - "-incoming %s", tmp_path, uri); + "-drive if=ide,format=%s,file=%s " + "-incoming %s", imgfmt, tmp_path, uri); set_context(src->parent); @@ -1158,6 +1171,7 @@ static void ahci_migrate_simple(uint8_t cmd_read, uint8_t cmd_write) ahci_shutdown(dst); g_free(rx); g_free(tx); + g_free(uri); } static void test_migrate_dma(void) @@ -1190,12 +1204,12 @@ static void ahci_halted_io_test(uint8_t cmd_read, uint8_t cmd_write) prepare_blkdebug_script(debug_path, "write_aio"); ahci = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 ", debug_path, - tmp_path); + tmp_path, imgfmt); /* Initialize and prepare */ port = ahci_port_select(ahci); @@ -1251,25 +1265,25 @@ static void ahci_migrate_halted_io(uint8_t cmd_read, uint8_t cmd_write) unsigned char *rx = g_malloc0(bufsize); uint64_t ptr; AHCICommand *cmd; - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); prepare_blkdebug_script(debug_path, "write_aio"); src = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 ", debug_path, - tmp_path); + tmp_path, imgfmt); dst = ahci_boot("-drive file=%s,if=none,id=drive0," - "format=qcow2,cache=writeback," + "format=%s,cache=writeback," "rerror=stop,werror=stop " "-M q35 " "-device ide-hd,drive=drive0 " "-incoming %s", - tmp_path, uri); + tmp_path, imgfmt, uri); set_context(src->parent); @@ -1301,6 +1315,7 @@ static void ahci_migrate_halted_io(uint8_t cmd_read, uint8_t cmd_write) ahci_shutdown(dst); g_free(rx); g_free(tx); + g_free(uri); } static void test_migrate_halted_dma(void) @@ -1322,20 +1337,22 @@ static void test_flush_migrate(void) AHCICommand *cmd; uint8_t px; const char *s; - const char *uri = "tcp:127.0.0.1:1234"; + char *uri = g_strdup_printf("unix:%s", mig_socket); prepare_blkdebug_script(debug_path, "flush_to_disk"); src = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0," - "cache=writeback,rerror=stop,werror=stop " + "cache=writeback,rerror=stop,werror=stop," + "format=%s " "-M q35 " "-device ide-hd,drive=drive0 ", - debug_path, tmp_path); + debug_path, tmp_path, imgfmt); dst = ahci_boot("-drive file=%s,if=none,id=drive0," - "cache=writeback,rerror=stop,werror=stop " + "cache=writeback,rerror=stop,werror=stop," + "format=%s " "-M q35 " "-device ide-hd,drive=drive0 " - "-incoming %s", tmp_path, uri); + "-incoming %s", tmp_path, imgfmt, uri); set_context(src->parent); @@ -1360,6 +1377,7 @@ static void test_flush_migrate(void) ahci_command_free(cmd); ahci_shutdown(src); ahci_shutdown(dst); + g_free(uri); } static void test_max(void) @@ -1476,7 +1494,7 @@ static uint64_t offset_sector(enum OffsetType ofst, return 1; case OFFSET_HIGH: ceil = (addr_type == ADDR_MODE_LBA28) ? 0xfffffff : 0xffffffffffff; - ceil = MIN(ceil, TEST_IMAGE_SECTORS - 1); + ceil = MIN(ceil, mb_to_sectors(test_image_size_mb) - 1); nsectors = buffsize / AHCI_SECTOR_SIZE; return ceil - nsectors + 1; default: @@ -1558,8 +1576,9 @@ static void create_ahci_io_test(enum IOMode type, enum AddrMode addr, enum BuffLen len, enum OffsetType offset) { char *name; - AHCIIOTestOptions *opts = g_malloc(sizeof(AHCIIOTestOptions)); + AHCIIOTestOptions *opts; + opts = g_malloc(sizeof(AHCIIOTestOptions)); opts->length = len; opts->address_type = addr; opts->io_type = type; @@ -1571,6 +1590,13 @@ static void create_ahci_io_test(enum IOMode type, enum AddrMode addr, buff_len_str[len], offset_str[offset]); + if ((addr == ADDR_MODE_LBA48) && (offset == OFFSET_HIGH) && + (mb_to_sectors(test_image_size_mb) <= 0xFFFFFFF)) { + g_test_message("%s: skipped; test image too small", name); + g_free(name); + return; + } + qtest_add_data_func(name, opts, test_io_interface); g_free(name); } @@ -1617,15 +1643,33 @@ int main(int argc, char **argv) return 0; } - /* Create a temporary qcow2 image */ - close(mkstemp(tmp_path)); - mkqcow2(tmp_path, TEST_IMAGE_SIZE_MB); + /* Create a temporary image */ + fd = mkstemp(tmp_path); + g_assert(fd >= 0); + if (have_qemu_img()) { + imgfmt = "qcow2"; + test_image_size_mb = TEST_IMAGE_SIZE_MB_LARGE; + mkqcow2(tmp_path, TEST_IMAGE_SIZE_MB_LARGE); + } else { + g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; " + "skipping LBA48 high-sector tests"); + imgfmt = "raw"; + test_image_size_mb = TEST_IMAGE_SIZE_MB_SMALL; + ret = ftruncate(fd, test_image_size_mb * 1024 * 1024); + g_assert(ret == 0); + } + close(fd); /* Create temporary blkdebug instructions */ fd = mkstemp(debug_path); g_assert(fd >= 0); close(fd); + /* Reserve a hollow file to use as a socket for migration tests */ + fd = mkstemp(mig_socket); + g_assert(fd >= 0); + close(fd); + /* Run the tests */ qtest_add_func("/ahci/sanity", test_sanity); qtest_add_func("/ahci/pci_spec", test_pci_spec); @@ -1668,6 +1712,7 @@ int main(int argc, char **argv) /* Cleanup */ unlink(tmp_path); unlink(debug_path); + unlink(mig_socket); return ret; } diff --git a/tests/crypto-tls-x509-helpers.c b/tests/crypto-tls-x509-helpers.c index c5de67baaf..47b4c7ba53 100644 --- a/tests/crypto-tls-x509-helpers.c +++ b/tests/crypto-tls-x509-helpers.c @@ -153,6 +153,7 @@ test_tls_get_ipaddr(const char *addrstr, *datalen = res->ai_addrlen; *data = g_new(char, *datalen); memcpy(*data, res->ai_addr, *datalen); + freeaddrinfo(res); } /* @@ -465,6 +466,7 @@ void test_tls_write_cert_chain(const char *filename, if (!g_file_set_contents(filename, buffer, offset, NULL)) { abort(); } + g_free(buffer); } diff --git a/tests/libqos/libqos.c b/tests/libqos/libqos.c index 8d7c5a9db8..2d1a802dbe 100644 --- a/tests/libqos/libqos.c +++ b/tests/libqos/libqos.c @@ -147,6 +147,23 @@ void migrate(QOSState *from, QOSState *to, const char *uri) set_context(to); } +bool have_qemu_img(void) +{ + char *rpath; + const char *path = getenv("QTEST_QEMU_IMG"); + if (!path) { + return false; + } + + rpath = realpath(path, NULL); + if (!rpath) { + return false; + } else { + free(rpath); + return true; + } +} + void mkimg(const char *file, const char *fmt, unsigned size_mb) { gchar *cli; @@ -155,13 +172,14 @@ void mkimg(const char *file, const char *fmt, unsigned size_mb) GError *err = NULL; char *qemu_img_path; gchar *out, *out2; - char *abs_path; + char *qemu_img_abs_path; qemu_img_path = getenv("QTEST_QEMU_IMG"); - abs_path = realpath(qemu_img_path, NULL); - assert(qemu_img_path); + g_assert(qemu_img_path); + qemu_img_abs_path = realpath(qemu_img_path, NULL); + g_assert(qemu_img_abs_path); - cli = g_strdup_printf("%s create -f %s %s %uM", abs_path, + cli = g_strdup_printf("%s create -f %s %s %uM", qemu_img_abs_path, fmt, file, size_mb); ret = g_spawn_command_line_sync(cli, &out, &out2, &rc, &err); if (err) { @@ -183,7 +201,7 @@ void mkimg(const char *file, const char *fmt, unsigned size_mb) g_free(out); g_free(out2); g_free(cli); - free(abs_path); + free(qemu_img_abs_path); } void mkqcow2(const char *file, unsigned size_mb) diff --git a/tests/libqos/libqos.h b/tests/libqos/libqos.h index 492a651f5b..ca14d2e9fe 100644 --- a/tests/libqos/libqos.h +++ b/tests/libqos/libqos.h @@ -19,6 +19,7 @@ typedef struct QOSState { QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap); QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...); void qtest_shutdown(QOSState *qs); +bool have_qemu_img(void); void mkimg(const char *file, const char *fmt, unsigned size_mb); void mkqcow2(const char *file, unsigned size_mb); void set_context(QOSState *s); diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 952a524ec7..32469efd76 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -245,6 +245,7 @@ class TestEIO(TestErrors): while not completed: for event in self.vm.get_qmp_events(wait=True): if event['event'] == 'BLOCK_JOB_ERROR': + error = True self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/operation', 'read') @@ -257,9 +258,11 @@ class TestEIO(TestErrors): self.assert_qmp(result, 'return', {}) result = self.vm.qmp('query-block-jobs') + if result == {'return': []}: + # Race; likely already finished. Check. + continue self.assert_qmp(result, 'return[0]/paused', False) self.assert_qmp(result, 'return[0]/io-status', 'ok') - error = True elif event['event'] == 'BLOCK_JOB_COMPLETED': self.assertTrue(error, 'job completed unexpectedly') self.assert_qmp(event, 'data/type', 'stream') diff --git a/tests/qemu-iotests/136 b/tests/qemu-iotests/136 index f574d83ff7..e8c6937fc9 100644 --- a/tests/qemu-iotests/136 +++ b/tests/qemu-iotests/136 @@ -69,7 +69,7 @@ sector = "%d" def setUp(self): drive_args = [] - drive_args.append("stats-intervals=%d" % interval_length) + drive_args.append("stats-intervals.0=%d" % interval_length) drive_args.append("stats-account-invalid=%s" % (self.account_invalid and "on" or "off")) drive_args.append("stats-account-failed=%s" % diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 864f69e738..7bdfc98615 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -13,16 +13,22 @@ /* * TODO: * - main should get parameters from the command line. - * - implement all request handlers. + * - implement all request handlers. Still not implemented: + * vubr_get_queue_num_exec() + * vubr_send_rarp_exec() * - test for broken requests and virtqueue. * - implement features defined by Virtio 1.0 spec. * - support mergeable buffers and indirect descriptors. - * - implement RESET_DEVICE request. * - implement clean shutdown. * - implement non-blocking writes to UDP backend. * - implement polling strategy. + * - implement clean starting/stopping of vq processing + * - implement clean starting/stopping of used and buffers + * dirty page logging. */ +#define _FILE_OFFSET_BITS 64 + #include <stddef.h> #include <assert.h> #include <stdio.h> @@ -166,6 +172,8 @@ typedef struct VubrVirtq { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; + uint64_t log_guest_addr; + int enable; } VubrVirtq; /* Based on qemu/hw/virtio/vhost-user.c */ @@ -173,6 +181,8 @@ typedef struct VubrVirtq { #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_LOG_PAGE 4096 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -220,6 +230,11 @@ typedef struct VhostUserMemory { VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; } VhostUserMemory; +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + typedef struct VhostUserMsg { VhostUserRequest request; @@ -234,6 +249,7 @@ typedef struct VhostUserMsg { struct vhost_vring_state state; struct vhost_vring_addr addr; VhostUserMemory memory; + VhostUserLog log; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; int fd_num; @@ -265,8 +281,13 @@ typedef struct VubrDev { uint32_t nregions; VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; VubrVirtq vq[MAX_NR_VIRTQUEUE]; + int log_call_fd; + uint64_t log_size; + uint8_t *log_table; int backend_udp_sock; struct sockaddr_in backend_udp_dest; + int ready; + uint64_t features; } VubrDev; static const char *vubr_request_str[] = { @@ -368,7 +389,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) rc = recvmsg(conn_fd, &msg, 0); - if (rc <= 0) { + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { vubr_die("recvmsg"); } @@ -395,7 +421,12 @@ vubr_message_read(int conn_fd, VhostUserMsg *vmsg) if (vmsg->size) { rc = read(conn_fd, &vmsg->payload, vmsg->size); - if (rc <= 0) { + if (rc == 0) { + vubr_die("recvmsg"); + fprintf(stderr, "Peer disconnected.\n"); + exit(1); + } + if (rc < 0) { vubr_die("recvmsg"); } @@ -455,6 +486,16 @@ vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); } +/* Kick the log_call_fd if required. */ +static void +vubr_log_kick(VubrDev *dev) +{ + if (dev->log_call_fd != -1) { + DPRINT("Kicking the QEMU's log...\n"); + eventfd_write(dev->log_call_fd, 1); + } +} + /* Kick the guest if necessary. */ static void vubr_virtqueue_kick(VubrVirtq *vq) @@ -466,11 +507,39 @@ vubr_virtqueue_kick(VubrVirtq *vq) } static void +vubr_log_page(uint8_t *log_table, uint64_t page) +{ + DPRINT("Logged dirty guest page: %"PRId64"\n", page); + atomic_or(&log_table[page / 8], 1 << (page % 8)); +} + +static void +vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length) +{ + uint64_t page; + + if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) || + !dev->log_table || !length) { + return; + } + + assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8)); + + page = address / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < address + length) { + vubr_log_page(dev->log_table, page); + page += VHOST_LOG_PAGE; + } + vubr_log_kick(dev); +} + +static void vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) { - struct vring_desc *desc = vq->desc; + struct vring_desc *desc = vq->desc; struct vring_avail *avail = vq->avail; - struct vring_used *used = vq->used; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; unsigned int size = vq->size; @@ -510,6 +579,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) if (len <= chunk_len) { memcpy(chunk_start, buf, len); + vubr_log_write(dev, desc[i].addr, len); } else { fprintf(stderr, "Received too long packet from the backend. Dropping...\n"); @@ -519,11 +589,17 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) /* Add descriptor to the used ring. */ used->ring[u_index].id = d_index; used->ring[u_index].len = len; + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, ring[u_index]), + sizeof(used->ring[u_index])); vq->last_avail_index++; vq->last_used_index++; atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, idx), + sizeof(used->idx)); /* Kick the guest if necessary. */ vubr_virtqueue_kick(vq); @@ -532,9 +608,10 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) static int vubr_process_desc(VubrDev *dev, VubrVirtq *vq) { - struct vring_desc *desc = vq->desc; + struct vring_desc *desc = vq->desc; struct vring_avail *avail = vq->avail; - struct vring_used *used = vq->used; + struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; unsigned int size = vq->size; @@ -552,6 +629,8 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq) void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); uint32_t chunk_len = desc[i].len; + assert(!(desc[i].flags & VRING_DESC_F_WRITE)); + if (len + chunk_len < buf_size) { memcpy(buf + len, chunk_start, chunk_len); DPRINT("%d ", chunk_len); @@ -577,6 +656,9 @@ vubr_process_desc(VubrDev *dev, VubrVirtq *vq) /* Add descriptor to the used ring. */ used->ring[u_index].id = d_index; used->ring[u_index].len = len; + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, ring[u_index]), + sizeof(used->ring[u_index])); vubr_consume_raw_packet(dev, buf, len); @@ -588,6 +670,7 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq) { struct vring_avail *avail = vq->avail; struct vring_used *used = vq->used; + uint64_t log_guest_addr = vq->log_guest_addr; while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { vubr_process_desc(dev, vq); @@ -596,6 +679,9 @@ vubr_process_avail(VubrDev *dev, VubrVirtq *vq) } atomic_mb_set(&used->idx, vq->last_used_index); + vubr_log_write(dev, + log_guest_addr + offsetof(struct vring_used, idx), + sizeof(used->idx)); } static void @@ -609,6 +695,10 @@ vubr_backend_recv_cb(int sock, void *ctx) int buflen = sizeof(buf); int len; + if (!dev->ready) { + return; + } + DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); @@ -656,14 +746,14 @@ vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { vmsg->payload.u64 = ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_NET_F_CTRL_VQ) | - (1ULL << VIRTIO_NET_F_CTRL_RX) | - (1ULL << VHOST_F_LOG_ALL)); + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); + vmsg->size = sizeof(vmsg->payload.u64); DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - /* reply */ + /* Reply */ return 1; } @@ -671,6 +761,7 @@ static int vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + dev->features = vmsg->payload.u64; return 0; } @@ -680,10 +771,28 @@ vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) return 0; } +static void +vubr_close_log(VubrDev *dev) +{ + if (dev->log_table) { + if (munmap(dev->log_table, dev->log_size) != 0) { + vubr_die("munmap()"); + } + + dev->log_table = 0; + } + if (dev->log_call_fd != -1) { + close(dev->log_call_fd); + dev->log_call_fd = -1; + } +} + static int vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + vubr_close_log(dev); + dev->ready = 0; + dev->features = 0; return 0; } @@ -710,9 +819,9 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) DPRINT(" mmap_offset 0x%016"PRIx64"\n", msg_region->mmap_offset); - dev_region->gpa = msg_region->guest_phys_addr; - dev_region->size = msg_region->memory_size; - dev_region->qva = msg_region->userspace_addr; + dev_region->gpa = msg_region->guest_phys_addr; + dev_region->size = msg_region->memory_size; + dev_region->qva = msg_region->userspace_addr; dev_region->mmap_offset = msg_region->mmap_offset; /* We don't use offset argument of mmap() since the @@ -736,14 +845,38 @@ vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return 0; + int fd; + uint64_t log_mmap_size, log_mmap_offset; + void *rc; + + assert(vmsg->fd_num == 1); + fd = vmsg->fds[0]; + + assert(vmsg->size == sizeof(vmsg->payload.log)); + log_mmap_offset = vmsg->payload.log.mmap_offset; + log_mmap_size = vmsg->payload.log.mmap_size; + DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset); + DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size); + + rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, + log_mmap_offset); + if (rc == MAP_FAILED) { + vubr_die("mmap"); + } + dev->log_table = rc; + dev->log_size = log_mmap_size; + + vmsg->size = sizeof(vmsg->payload.u64); + /* Reply */ + return 1; } static int vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + assert(vmsg->fd_num == 1); + dev->log_call_fd = vmsg->fds[0]; + DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]); return 0; } @@ -777,6 +910,7 @@ vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr); vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr); vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr); + vq->log_guest_addr = vra->log_guest_addr; DPRINT("Setting virtq addresses:\n"); DPRINT(" vring_desc at %p\n", vq->desc); @@ -803,8 +937,18 @@ vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return 0; + unsigned int index = vmsg->payload.state.index; + + DPRINT("State.index: %d\n", index); + vmsg->payload.state.num = dev->vq[index].last_avail_index; + vmsg->size = sizeof(vmsg->payload.state); + /* FIXME: this is a work-around for a bug in QEMU enabling + * too early vrings. When protocol features are enabled, + * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */ + dev->ready = 0; + + /* Reply */ + return 1; } static int @@ -829,7 +973,17 @@ vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) DPRINT("Waiting for kicks on fd: %d for vq: %d\n", dev->vq[index].kick_fd, index); } + /* We temporarily use this hack to determine that both TX and RX + * queues are set up and ready for processing. + * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and + * actual kicks. */ + if (dev->vq[0].kick_fd != -1 && + dev->vq[1].kick_fd != -1) { + dev->ready = 1; + DPRINT("vhost-user-bridge is ready for processing queues.\n"); + } return 0; + } static int @@ -858,9 +1012,12 @@ vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { - /* FIXME: unimplented */ + vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD; DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - return 0; + vmsg->size = sizeof(vmsg->payload.u64); + + /* Reply */ + return 1; } static int @@ -881,7 +1038,12 @@ vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) static int vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); + unsigned int index = vmsg->payload.state.index; + unsigned int enable = vmsg->payload.state.num; + + DPRINT("State.index: %d\n", index); + DPRINT("State.enable: %d\n", enable); + dev->vq[index].enable = enable; return 0; } @@ -987,7 +1149,7 @@ vubr_accept_cb(int sock, void *ctx) socklen_t len = sizeof(un); conn_fd = accept(sock, (struct sockaddr *) &un, &len); - if (conn_fd == -1) { + if (conn_fd == -1) { vubr_die("accept()"); } DPRINT("Got connection from remote peer on sock %d\n", conn_fd); @@ -1009,9 +1171,17 @@ vubr_new(const char *path) .size = 0, .last_avail_index = 0, .last_used_index = 0, .desc = 0, .avail = 0, .used = 0, + .enable = 0, }; } + /* Init log */ + dev->log_call_fd = -1; + dev->log_size = 0; + dev->log_table = 0; + dev->ready = 0; + dev->features = 0; + /* Get a UNIX socket. */ dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); if (dev->sock == -1) { diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 01cfc7e25d..022223b2a7 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -70,6 +70,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ERR = 14, VHOST_USER_GET_PROTOCOL_FEATURES = 15, VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_MAX } VhostUserRequest; @@ -315,8 +316,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) g_cond_signal(&s->data_cond); break; - case VHOST_USER_RESET_OWNER: - s->fds_num = 0; + case VHOST_USER_SET_VRING_ENABLE: + if (!msg.payload.state.num) { + s->fds_num = 0; + } break; default: diff --git a/trace-events b/trace-events index ef6bc41a56..0b0ff02442 100644 --- a/trace-events +++ b/trace-events @@ -1419,6 +1419,12 @@ ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "ad prep_io_800_writeb(uint32_t addr, uint32_t val) "0x%08" PRIx32 " => 0x%02" PRIx32 prep_io_800_readb(uint32_t addr, uint32_t retval) "0x%08" PRIx32 " <= 0x%02" PRIx32 +# io/buffer.c +buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd" +buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s" +buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s" +buffer_free(const char *buf, size_t len) "%s: capacity %zd" + # util/hbitmap.c hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx" hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c index 9512b87183..aa21191ea2 100644 --- a/ui/vnc-jobs.c +++ b/ui/vnc-jobs.c @@ -29,6 +29,7 @@ #include "vnc.h" #include "vnc-jobs.h" #include "qemu/sockets.h" +#include "qemu/main-loop.h" #include "block/aio.h" /* @@ -54,7 +55,6 @@ struct VncJobQueue { QemuCond cond; QemuMutex mutex; QemuThread thread; - Buffer buffer; bool exit; QTAILQ_HEAD(, VncJob) jobs; }; @@ -166,8 +166,11 @@ void vnc_jobs_consume_buffer(VncState *vs) vnc_lock_output(vs); if (vs->jobs_buffer.offset) { - vnc_write(vs, vs->jobs_buffer.buffer, vs->jobs_buffer.offset); - buffer_reset(&vs->jobs_buffer); + if (vs->csock != -1 && buffer_empty(&vs->output)) { + qemu_set_fd_handler(vs->csock, vnc_client_read, + vnc_client_write, vs); + } + buffer_move(&vs->output, &vs->jobs_buffer); } flush = vs->csock != -1 && vs->abort != true; vnc_unlock_output(vs); @@ -182,6 +185,9 @@ void vnc_jobs_consume_buffer(VncState *vs) */ static void vnc_async_encoding_start(VncState *orig, VncState *local) { + buffer_init(&local->output, "vnc-worker-output"); + local->csock = -1; /* Don't do any network work on this thread */ + local->vnc_encoding = orig->vnc_encoding; local->features = orig->features; local->vd = orig->vd; @@ -193,10 +199,6 @@ static void vnc_async_encoding_start(VncState *orig, VncState *local) local->zlib = orig->zlib; local->hextile = orig->hextile; local->zrle = orig->zrle; - local->output = queue->buffer; - local->csock = -1; /* Don't do any network work on this thread */ - - buffer_reset(&local->output); } static void vnc_async_encoding_end(VncState *orig, VncState *local) @@ -206,15 +208,13 @@ static void vnc_async_encoding_end(VncState *orig, VncState *local) orig->hextile = local->hextile; orig->zrle = local->zrle; orig->lossy_rect = local->lossy_rect; - - queue->buffer = local->output; } static int vnc_worker_thread_loop(VncJobQueue *queue) { VncJob *job; VncRectEntry *entry, *tmp; - VncState vs; + VncState vs = {}; int n_rectangles; int saved_offset; @@ -235,6 +235,14 @@ static int vnc_worker_thread_loop(VncJobQueue *queue) vnc_unlock_output(job->vs); goto disconnected; } + if (buffer_empty(&job->vs->output)) { + /* + * Looks like a NOP as it obviously moves no data. But it + * moves the empty buffer, so we don't have to malloc a new + * one for vs.output + */ + buffer_move_empty(&vs.output, &job->vs->output); + } vnc_unlock_output(job->vs); /* Make a local copy of vs and switch output buffers */ @@ -274,14 +282,13 @@ static int vnc_worker_thread_loop(VncJobQueue *queue) vnc_lock_output(job->vs); if (job->vs->csock != -1) { - buffer_reserve(&job->vs->jobs_buffer, vs.output.offset); - buffer_append(&job->vs->jobs_buffer, vs.output.buffer, - vs.output.offset); + buffer_move(&job->vs->jobs_buffer, &vs.output); /* Copy persistent encoding data */ vnc_async_encoding_end(job->vs, &vs); qemu_bh_schedule(job->vs->bh); } else { + buffer_reset(&vs.output); /* Copy persistent encoding data */ vnc_async_encoding_end(job->vs, &vs); } @@ -310,7 +317,6 @@ static void vnc_queue_clear(VncJobQueue *q) { qemu_cond_destroy(&queue->cond); qemu_mutex_destroy(&queue->mutex); - buffer_free(&queue->buffer); g_free(q); queue = NULL; /* Unset global queue */ } @@ -615,10 +615,25 @@ static void framebuffer_update_request(VncState *vs, int incremental, static void vnc_refresh(DisplayChangeListener *dcl); static int vnc_refresh_server_surface(VncDisplay *vd); +static int vnc_width(VncDisplay *vd) +{ + return MIN(VNC_MAX_WIDTH, ROUND_UP(surface_width(vd->ds), + VNC_DIRTY_PIXELS_PER_BIT)); +} + +static int vnc_height(VncDisplay *vd) +{ + return MIN(VNC_MAX_HEIGHT, surface_height(vd->ds)); +} + static void vnc_set_area_dirty(DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT), - int width, int height, - int x, int y, int w, int h) { + VncDisplay *vd, + int x, int y, int w, int h) +{ + int width = vnc_width(vd); + int height = vnc_height(vd); + /* this is needed this to ensure we updated all affected * blocks if x % VNC_DIRTY_PIXELS_PER_BIT != 0 */ w += (x % VNC_DIRTY_PIXELS_PER_BIT); @@ -640,10 +655,8 @@ static void vnc_dpy_update(DisplayChangeListener *dcl, { VncDisplay *vd = container_of(dcl, VncDisplay, dcl); struct VncSurface *s = &vd->guest; - int width = pixman_image_get_width(vd->server); - int height = pixman_image_get_height(vd->server); - vnc_set_area_dirty(s->dirty, width, height, x, y, w, h); + vnc_set_area_dirty(s->dirty, vd, x, y, w, h); } void vnc_framebuffer_update(VncState *vs, int x, int y, int w, int h, @@ -713,6 +726,21 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y) return ptr; } +static void vnc_update_server_surface(VncDisplay *vd) +{ + qemu_pixman_image_unref(vd->server); + vd->server = NULL; + + if (QTAILQ_EMPTY(&vd->clients)) { + return; + } + + vd->server = pixman_image_create_bits(VNC_SERVER_FB_FORMAT, + vnc_width(vd), + vnc_height(vd), + NULL, 0); +} + static void vnc_dpy_switch(DisplayChangeListener *dcl, DisplaySurface *surface) { @@ -721,26 +749,19 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, int width, height; vnc_abort_display_jobs(vd); + vd->ds = surface; /* server surface */ - qemu_pixman_image_unref(vd->server); - vd->ds = surface; - width = MIN(VNC_MAX_WIDTH, ROUND_UP(surface_width(vd->ds), - VNC_DIRTY_PIXELS_PER_BIT)); - height = MIN(VNC_MAX_HEIGHT, surface_height(vd->ds)); - vd->server = pixman_image_create_bits(VNC_SERVER_FB_FORMAT, - width, height, NULL, 0); + vnc_update_server_surface(vd); /* guest surface */ -#if 0 /* FIXME */ - if (ds_get_bytes_per_pixel(ds) != vd->guest.ds->pf.bytes_per_pixel) - console_color_init(ds); -#endif qemu_pixman_image_unref(vd->guest.fb); vd->guest.fb = pixman_image_ref(surface->image); vd->guest.format = surface->format; + width = vnc_width(vd); + height = vnc_height(vd); memset(vd->guest.dirty, 0x00, sizeof(vd->guest.dirty)); - vnc_set_area_dirty(vd->guest.dirty, width, height, 0, 0, + vnc_set_area_dirty(vd->guest.dirty, vd, 0, 0, width, height); QTAILQ_FOREACH(vs, &vd->clients, next) { @@ -750,7 +771,7 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, vnc_cursor_define(vs); } memset(vs->dirty, 0x00, sizeof(vs->dirty)); - vnc_set_area_dirty(vs->dirty, width, height, 0, 0, + vnc_set_area_dirty(vs->dirty, vd, 0, 0, width, height); } } @@ -1224,6 +1245,10 @@ void vnc_disconnect_finish(VncState *vs) if (vs->initialized) { QTAILQ_REMOVE(&vs->vd->clients, vs, next); qemu_remove_mouse_mode_change_notifier(&vs->mouse_mode_notifier); + if (QTAILQ_EMPTY(&vs->vd->clients)) { + /* last client gone */ + vnc_update_server_surface(vs->vd); + } } if (vs->vd->lock_key_sync) @@ -2006,9 +2031,6 @@ static void ext_key_event(VncState *vs, int down, static void framebuffer_update_request(VncState *vs, int incremental, int x, int y, int w, int h) { - int width = pixman_image_get_width(vs->vd->server); - int height = pixman_image_get_height(vs->vd->server); - vs->need_update = 1; if (incremental) { @@ -2016,7 +2038,7 @@ static void framebuffer_update_request(VncState *vs, int incremental, } vs->force_update = 1; - vnc_set_area_dirty(vs->dirty, width, height, x, y, w, h); + vnc_set_area_dirty(vs->dirty, vs->vd, x, y, w, h); } static void send_ext_key_event_ack(VncState *vs) @@ -2988,6 +3010,26 @@ static void vnc_connect(VncDisplay *vd, int csock, vs->csock = csock; vs->vd = vd; + buffer_init(&vs->input, "vnc-input/%d", csock); + buffer_init(&vs->output, "vnc-output/%d", csock); + buffer_init(&vs->ws_input, "vnc-ws_input/%d", csock); + buffer_init(&vs->ws_output, "vnc-ws_output/%d", csock); + buffer_init(&vs->jobs_buffer, "vnc-jobs_buffer/%d", csock); + + buffer_init(&vs->tight.tight, "vnc-tight/%d", csock); + buffer_init(&vs->tight.zlib, "vnc-tight-zlib/%d", csock); + buffer_init(&vs->tight.gradient, "vnc-tight-gradient/%d", csock); +#ifdef CONFIG_VNC_JPEG + buffer_init(&vs->tight.jpeg, "vnc-tight-jpeg/%d", csock); +#endif +#ifdef CONFIG_VNC_PNG + buffer_init(&vs->tight.png, "vnc-tight-png/%d", csock); +#endif + buffer_init(&vs->zlib.zlib, "vnc-zlib/%d", csock); + buffer_init(&vs->zrle.zrle, "vnc-zrle/%d", csock); + buffer_init(&vs->zrle.fb, "vnc-zrle-fb/%d", csock); + buffer_init(&vs->zrle.zlib, "vnc-zrle-zlib/%d", csock); + if (skipauth) { vs->auth = VNC_AUTH_NONE; vs->subauth = VNC_AUTH_INVALID; @@ -3045,6 +3087,7 @@ void vnc_init_state(VncState *vs) { vs->initialized = true; VncDisplay *vd = vs->vd; + bool first_client = QTAILQ_EMPTY(&vd->clients); vs->last_x = -1; vs->last_y = -1; @@ -3058,6 +3101,9 @@ void vnc_init_state(VncState *vs) vs->bh = qemu_bh_new(vnc_jobs_bh, vs); QTAILQ_INSERT_TAIL(&vd->clients, vs, next); + if (first_client) { + vnc_update_server_surface(vd); + } graphic_hw_update(vd->dcl.con); @@ -3571,8 +3617,6 @@ void vnc_display_open(const char *id, Error **errp) if (to) { saddr->u.inet->has_to = true; - saddr->u.inet->to = to; - saddr->u.inet->has_to = true; saddr->u.inet->to = to + 5900; } saddr->u.inet->ipv4 = saddr->u.inet->has_ipv4 = has_ipv4; diff --git a/util/buffer.c b/util/buffer.c index cedd055680..8b27c08aac 100644 --- a/util/buffer.c +++ b/util/buffer.c @@ -19,12 +19,77 @@ */ #include "qemu/buffer.h" +#include "trace.h" + +#define BUFFER_MIN_INIT_SIZE 4096 +#define BUFFER_MIN_SHRINK_SIZE 65536 + +/* define the factor alpha for the expentional smoothing + * that is used in the average size calculation. a shift + * of 7 results in an alpha of 1/2^7. */ +#define BUFFER_AVG_SIZE_SHIFT 7 + +static size_t buffer_req_size(Buffer *buffer, size_t len) +{ + return MAX(BUFFER_MIN_INIT_SIZE, + pow2ceil(buffer->offset + len)); +} + +static void buffer_adj_size(Buffer *buffer, size_t len) +{ + size_t old = buffer->capacity; + buffer->capacity = buffer_req_size(buffer, len); + buffer->buffer = g_realloc(buffer->buffer, buffer->capacity); + trace_buffer_resize(buffer->name ?: "unnamed", + old, buffer->capacity); + + /* make it even harder for the buffer to shrink, reset average size + * to currenty capacity if it is larger than the average. */ + buffer->avg_size = MAX(buffer->avg_size, + buffer->capacity << BUFFER_AVG_SIZE_SHIFT); +} + +void buffer_init(Buffer *buffer, const char *name, ...) +{ + va_list ap; + + va_start(ap, name); + buffer->name = g_strdup_vprintf(name, ap); + va_end(ap); +} + +static uint64_t buffer_get_avg_size(Buffer *buffer) +{ + return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT; +} + +void buffer_shrink(Buffer *buffer) +{ + size_t new; + + /* Calculate the average size of the buffer as + * avg_size = avg_size * ( 1 - a ) + required_size * a + * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */ + buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1; + buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT; + buffer->avg_size += buffer_req_size(buffer, 0); + + /* And then only shrink if the average size of the buffer is much + * too big, to avoid bumping up & down the buffers all the time. + * realloc() isn't exactly cheap ... */ + new = buffer_req_size(buffer, buffer_get_avg_size(buffer)); + if (new < buffer->capacity >> 3 && + new >= BUFFER_MIN_SHRINK_SIZE) { + buffer_adj_size(buffer, buffer_get_avg_size(buffer)); + } + + buffer_adj_size(buffer, 0); +} void buffer_reserve(Buffer *buffer, size_t len) { if ((buffer->capacity - buffer->offset) < len) { - buffer->capacity += (len + 1024); - buffer->buffer = g_realloc(buffer->buffer, buffer->capacity); + buffer_adj_size(buffer, len); } } @@ -41,14 +106,18 @@ uint8_t *buffer_end(Buffer *buffer) void buffer_reset(Buffer *buffer) { buffer->offset = 0; + buffer_shrink(buffer); } void buffer_free(Buffer *buffer) { + trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity); g_free(buffer->buffer); + g_free(buffer->name); buffer->offset = 0; buffer->capacity = 0; buffer->buffer = NULL; + buffer->name = NULL; } void buffer_append(Buffer *buffer, const void *data, size_t len) @@ -62,4 +131,41 @@ void buffer_advance(Buffer *buffer, size_t len) memmove(buffer->buffer, buffer->buffer + len, (buffer->offset - len)); buffer->offset -= len; + buffer_shrink(buffer); +} + +void buffer_move_empty(Buffer *to, Buffer *from) +{ + trace_buffer_move_empty(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + assert(to->offset == 0); + + g_free(to->buffer); + to->offset = from->offset; + to->capacity = from->capacity; + to->buffer = from->buffer; + + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; +} + +void buffer_move(Buffer *to, Buffer *from) +{ + if (to->offset == 0) { + buffer_move_empty(to, from); + return; + } + + trace_buffer_move(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + buffer_reserve(to, from->offset); + buffer_append(to, from->buffer, from->offset); + + g_free(from->buffer); + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; } @@ -4291,14 +4291,23 @@ int main(int argc, char **argv, char **envp) page_size_init(); socket_init(); - if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + if (qemu_opts_foreach(qemu_find_opts("chardev"), + chardev_init_func, NULL, NULL)) { exit(1); } - if (qemu_opts_foreach(qemu_find_opts("chardev"), - chardev_init_func, NULL, NULL)) { + if (qtest_chrdev) { + Error *local_err = NULL; + qtest_init(qtest_chrdev, qtest_log, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + } + + if (qemu_opts_foreach(qemu_find_opts("object"), + object_create, + object_create_initial, NULL)) { exit(1); } @@ -4328,15 +4337,6 @@ int main(int argc, char **argv, char **envp) configure_accelerator(current_machine); - if (qtest_chrdev) { - Error *local_err = NULL; - qtest_init(qtest_chrdev, qtest_log, &local_err); - if (local_err) { - error_report_err(local_err); - exit(1); - } - } - machine_opts = qemu_get_machine_opts(); kernel_filename = qemu_opt_get(machine_opts, "kernel"); initrd_filename = qemu_opt_get(machine_opts, "initrd"); |