diff options
108 files changed, 3816 insertions, 966 deletions
diff --git a/.gitignore b/.gitignore index c658613560..2286d0a109 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ /qapi-generated /qapi-types.[ch] /qapi-visit.[ch] +/qapi-event.[ch] /qmp-commands.h /qmp-marshal.c /qemu-doc.html @@ -248,7 +248,7 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py) qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \ $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \ - $(SRC_PATH)/qapi-event.json + $(SRC_PATH)/qapi/event.json qapi-types.c qapi-types.h :\ $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py) @@ -831,7 +831,7 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) * Clear flags that are internal to the block layer before opening the * image. */ - open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); /* * Snapshots should be writable. @@ -928,6 +928,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bs->zero_beyond_eof = true; open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); + bs->growable = !!(flags & BDRV_O_PROTOCOL); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { error_setg(errp, @@ -1005,94 +1006,124 @@ free_and_fail: return ret; } +static QDict *parse_json_filename(const char *filename, Error **errp) +{ + QObject *options_obj; + QDict *options; + int ret; + + ret = strstart(filename, "json:", &filename); + assert(ret); + + options_obj = qobject_from_json(filename); + if (!options_obj) { + error_setg(errp, "Could not parse the JSON options"); + return NULL; + } + + if (qobject_type(options_obj) != QTYPE_QDICT) { + qobject_decref(options_obj); + error_setg(errp, "Invalid JSON object given"); + return NULL; + } + + options = qobject_to_qdict(options_obj); + qdict_flatten(options); + + return options; +} + /* - * Opens a file using a protocol (file, host_device, nbd, ...) - * - * options is an indirect pointer to a QDict of options to pass to the block - * drivers, or pointer to NULL for an empty set of options. If this function - * takes ownership of the QDict reference, it will set *options to NULL; - * otherwise, it will contain unused/unrecognized options after this function - * returns. Then, the caller is responsible for freeing it. If it intends to - * reuse the QDict, QINCREF() should be called beforehand. + * Fills in default options for opening images and converts the legacy + * filename/flags pair to option QDict entries. */ -static int bdrv_file_open(BlockDriverState *bs, const char *filename, - QDict **options, int flags, Error **errp) +static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, + BlockDriver *drv, Error **errp) { - BlockDriver *drv; + const char *filename = *pfilename; const char *drvname; + bool protocol = flags & BDRV_O_PROTOCOL; bool parse_filename = false; Error *local_err = NULL; - int ret; + + /* Parse json: pseudo-protocol */ + if (filename && g_str_has_prefix(filename, "json:")) { + QDict *json_options = parse_json_filename(filename, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + /* Options given in the filename have lower priority than options + * specified directly */ + qdict_join(*options, json_options, false); + QDECREF(json_options); + *pfilename = filename = NULL; + } /* Fetch the file name from the options QDict if necessary */ - if (!filename) { - filename = qdict_get_try_str(*options, "filename"); - } else if (filename && !qdict_haskey(*options, "filename")) { - qdict_put(*options, "filename", qstring_from_str(filename)); - parse_filename = true; - } else { - error_setg(errp, "Can't specify 'file' and 'filename' options at the " - "same time"); - ret = -EINVAL; - goto fail; + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put(*options, "filename", qstring_from_str(filename)); + parse_filename = true; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); + return -EINVAL; + } } /* Find the right block driver */ + filename = qdict_get_try_str(*options, "filename"); drvname = qdict_get_try_str(*options, "driver"); - if (drvname) { - drv = bdrv_find_format(drvname); - if (!drv) { - error_setg(errp, "Unknown driver '%s'", drvname); - } - qdict_del(*options, "driver"); - } else if (filename) { - drv = bdrv_find_protocol(filename, parse_filename); - if (!drv) { - error_setg(errp, "Unknown protocol"); + + if (drv) { + if (drvname) { + error_setg(errp, "Driver specified twice"); + return -EINVAL; } + drvname = drv->format_name; + qdict_put(*options, "driver", qstring_from_str(drvname)); } else { - error_setg(errp, "Must specify either driver or file"); - drv = NULL; - } + if (!drvname && protocol) { + if (filename) { + drv = bdrv_find_protocol(filename, parse_filename); + if (!drv) { + error_setg(errp, "Unknown protocol"); + return -EINVAL; + } - if (!drv) { - /* errp has been set already */ - ret = -ENOENT; - goto fail; + drvname = drv->format_name; + qdict_put(*options, "driver", qstring_from_str(drvname)); + } else { + error_setg(errp, "Must specify either driver or file"); + return -EINVAL; + } + } else if (drvname) { + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver '%s'", drvname); + return -ENOENT; + } + } } - /* Parse the filename and open it */ - if (drv->bdrv_parse_filename && parse_filename) { + assert(drv || !protocol); + + /* Driver-specific filename parsing */ + if (drv && drv->bdrv_parse_filename && parse_filename) { drv->bdrv_parse_filename(filename, *options, &local_err); if (local_err) { error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; + return -EINVAL; } if (!drv->bdrv_needs_filename) { qdict_del(*options, "filename"); - } else { - filename = qdict_get_str(*options, "filename"); } } - if (!drv->bdrv_file_open) { - ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err); - *options = NULL; - } else { - ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err); - } - if (ret < 0) { - error_propagate(errp, local_err); - goto fail; - } - - bs->growable = 1; return 0; - -fail: - return ret; } void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) @@ -1162,6 +1193,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX); } + if (!bs->drv || !bs->drv->supports_backing) { + ret = -EINVAL; + error_setg(errp, "Driver doesn't support backing files"); + QDECREF(options); + goto free_exit; + } + backing_hd = bdrv_new("", errp); if (bs->backing_format[0] != '\0') { @@ -1240,7 +1278,7 @@ done: return ret; } -void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) +int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) { /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char *tmp_filename = g_malloc0(PATH_MAX + 1); @@ -1258,6 +1296,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) /* Get the required size from the image */ total_size = bdrv_getlength(bs); if (total_size < 0) { + ret = total_size; error_setg_errno(errp, -total_size, "Could not get image size"); goto out; } @@ -1304,33 +1343,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) out: g_free(tmp_filename); -} - -static QDict *parse_json_filename(const char *filename, Error **errp) -{ - QObject *options_obj; - QDict *options; - int ret; - - ret = strstart(filename, "json:", &filename); - assert(ret); - - options_obj = qobject_from_json(filename); - if (!options_obj) { - error_setg(errp, "Could not parse the JSON options"); - return NULL; - } - - if (qobject_type(options_obj) != QTYPE_QDICT) { - qobject_decref(options_obj); - error_setg(errp, "Invalid JSON object given"); - return NULL; - } - - options = qobject_to_qdict(options_obj); - qdict_flatten(options); - - return options; + return ret; } /* @@ -1396,77 +1409,62 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } - if (filename && g_str_has_prefix(filename, "json:")) { - QDict *json_options = parse_json_filename(filename, &local_err); - if (local_err) { - ret = -EINVAL; - goto fail; - } - - /* Options given in the filename have lower priority than options - * specified directly */ - qdict_join(options, json_options, false); - QDECREF(json_options); - filename = NULL; - } - - bs->options = options; - options = qdict_clone_shallow(options); - - if (flags & BDRV_O_PROTOCOL) { - assert(!drv); - ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL, - &local_err); - if (!ret) { - drv = bs->drv; - goto done; - } else if (bs->drv) { - goto close_and_fail; - } else { - goto fail; - } - } - - /* Open image file without format layer */ - if (flags & BDRV_O_RDWR) { - flags |= BDRV_O_ALLOW_RDWR; - } - if (flags & BDRV_O_SNAPSHOT) { - snapshot_flags = bdrv_temp_snapshot_flags(flags); - flags = bdrv_backing_flags(flags); - } - - assert(file == NULL); - ret = bdrv_open_image(&file, filename, options, "file", - bdrv_inherited_flags(flags), - true, &local_err); - if (ret < 0) { + ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); + if (local_err) { goto fail; } /* Find the right image format driver */ + drv = NULL; drvname = qdict_get_try_str(options, "driver"); if (drvname) { drv = bdrv_find_format(drvname); qdict_del(options, "driver"); if (!drv) { - error_setg(errp, "Invalid driver: '%s'", drvname); + error_setg(errp, "Unknown driver: '%s'", drvname); ret = -EINVAL; goto fail; } } - if (!drv) { - if (file) { - ret = find_image_format(file, filename, &drv, &local_err); - } else { - error_setg(errp, "Must specify either driver or file"); - ret = -EINVAL; + assert(drvname || !(flags & BDRV_O_PROTOCOL)); + if (drv && !drv->bdrv_file_open) { + /* If the user explicitly wants a format driver here, we'll need to add + * another layer for the protocol in bs->file */ + flags &= ~BDRV_O_PROTOCOL; + } + + bs->options = options; + options = qdict_clone_shallow(options); + + /* Open image file without format layer */ + if ((flags & BDRV_O_PROTOCOL) == 0) { + if (flags & BDRV_O_RDWR) { + flags |= BDRV_O_ALLOW_RDWR; + } + if (flags & BDRV_O_SNAPSHOT) { + snapshot_flags = bdrv_temp_snapshot_flags(flags); + flags = bdrv_backing_flags(flags); + } + + assert(file == NULL); + ret = bdrv_open_image(&file, filename, options, "file", + bdrv_inherited_flags(flags), + true, &local_err); + if (ret < 0) { goto fail; } } - if (!drv) { + /* Image format probing */ + if (!drv && file) { + ret = find_image_format(file, filename, &drv, &local_err); + if (ret < 0) { + goto fail; + } + } else if (!drv) { + error_setg(errp, "Must specify either driver or file"); + ret = -EINVAL; goto fail; } @@ -1495,15 +1493,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, /* For snapshot=on, create a temporary qcow2 overlay. bs points to the * temporary snapshot afterwards. */ if (snapshot_flags) { - bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); + ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); if (local_err) { - error_propagate(errp, local_err); goto close_and_fail; } } - -done: /* Check if any unknown options were used */ if (options && (qdict_size(options) != 0)) { const QDictEntry *entry = qdict_first(options); @@ -3489,9 +3484,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOTSUP; if (bs->read_only) return -EACCES; - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { - return -EBUSY; - } + ret = drv->bdrv_truncate(bs, offset); if (ret == 0) { ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); @@ -5774,3 +5767,28 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) return false; } + +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) +{ + BlockDriverState *to_replace_bs = bdrv_find_node(node_name); + if (!to_replace_bs) { + error_setg(errp, "Node name '%s' not found", node_name); + return NULL; + } + + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { + return NULL; + } + + /* We don't want arbitrary node of the BDS chain to be replaced only the top + * most non filter in order to prevent data corruption. + * Another benefit is that this tests exclude backing files which are + * blocked by the backing blockers. + */ + if (!bdrv_is_first_non_filter(to_replace_bs)) { + error_setg(errp, "Only top most non filter can be replaced"); + return NULL; + } + + return to_replace_bs; +} diff --git a/block/cow.c b/block/cow.c index a05a92cada..8f81ee6d56 100644 --- a/block/cow.c +++ b/block/cow.c @@ -414,6 +414,7 @@ static BlockDriver bdrv_cow = { .bdrv_close = cow_close, .bdrv_create = cow_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .supports_backing = true, .bdrv_read = cow_co_read, .bdrv_write = cow_co_write, diff --git a/block/mirror.c b/block/mirror.c index 301a04de8e..6c3ee7041c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob { RateLimit limit; BlockDriverState *target; BlockDriverState *base; + /* The name of the graph node to replace */ + char *replaces; + /* The BDS to replace */ + BlockDriverState *to_replace; + /* Used to block operations on the drive-mirror-replace target */ + Error *replace_blocker; bool is_none_mode; BlockdevOnError on_source_error, on_target_error; bool synced; @@ -324,9 +330,18 @@ static void coroutine_fn mirror_run(void *opaque) } s->common.len = bdrv_getlength(bs); - if (s->common.len <= 0) { + if (s->common.len < 0) { ret = s->common.len; goto immediate_exit; + } else if (s->common.len == 0) { + /* Report BLOCK_JOB_READY and wait for complete. */ + block_job_event_ready(&s->common); + s->synced = true; + while (!block_job_is_cancelled(&s->common) && !s->should_complete) { + block_job_yield(&s->common); + } + s->common.cancelled = false; + goto immediate_exit; } length = DIV_ROUND_UP(s->common.len, s->granularity); @@ -491,10 +506,14 @@ immediate_exit: bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { - if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { - bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + BlockDriverState *to_replace = s->common.bs; + if (s->to_replace) { + to_replace = s->to_replace; } - bdrv_swap(s->target, s->common.bs); + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); + } + bdrv_swap(s->target, to_replace); if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { /* drop the bs loop chain formed by the swap: break the loop then * trigger the unref from the top one */ @@ -503,6 +522,12 @@ immediate_exit: bdrv_unref(p); } } + if (s->to_replace) { + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } + g_free(s->replaces); bdrv_unref(s->target); block_job_completed(&s->common, ret); } @@ -541,6 +566,20 @@ static void mirror_complete(BlockJob *job, Error **errp) return; } + /* check the target bs is not blocked and block all operations on it */ + if (s->replaces) { + s->to_replace = check_to_replace_node(s->replaces, &local_err); + if (!s->to_replace) { + error_propagate(errp, local_err); + return; + } + + error_setg(&s->replace_blocker, + "block device is in use by block-job-complete"); + bdrv_op_block_all(s->to_replace, s->replace_blocker); + bdrv_ref(s->to_replace); + } + s->should_complete = true; block_job_resume(job); } @@ -563,14 +602,15 @@ static const BlockJobDriver commit_active_job_driver = { }; static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, int64_t granularity, - int64_t buf_size, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - BlockDriverCompletionFunc *cb, - void *opaque, Error **errp, - const BlockJobDriver *driver, - bool is_none_mode, BlockDriverState *base) + const char *replaces, + int64_t speed, int64_t granularity, + int64_t buf_size, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp, + const BlockJobDriver *driver, + bool is_none_mode, BlockDriverState *base) { MirrorBlockJob *s; @@ -601,6 +641,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, return; } + s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; s->on_target_error = on_target_error; s->target = target; @@ -622,6 +663,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, } void mirror_start(BlockDriverState *bs, BlockDriverState *target, + const char *replaces, int64_t speed, int64_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, @@ -633,7 +675,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL; - mirror_start_job(bs, target, speed, granularity, buf_size, + mirror_start_job(bs, target, replaces, + speed, granularity, buf_size, on_source_error, on_target_error, cb, opaque, errp, &mirror_job_driver, is_none_mode, base); } @@ -681,7 +724,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, } bdrv_ref(base); - mirror_start_job(bs, base, speed, 0, 0, + mirror_start_job(bs, base, NULL, speed, 0, 0, on_error, on_error, cb, opaque, &local_err, &commit_active_job_driver, false, base); if (local_err) { diff --git a/block/nfs.c b/block/nfs.c index ec43201817..8439e0d389 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -304,17 +304,27 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, qp = query_params_parse(uri->query); for (i = 0; i < qp->n; i++) { + unsigned long long val; if (!qp->p[i].value) { error_setg(errp, "Value for NFS parameter expected: %s", qp->p[i].name); goto fail; } - if (!strncmp(qp->p[i].name, "uid", 3)) { - nfs_set_uid(client->context, atoi(qp->p[i].value)); - } else if (!strncmp(qp->p[i].name, "gid", 3)) { - nfs_set_gid(client->context, atoi(qp->p[i].value)); - } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) { - nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value)); + if (parse_uint_full(qp->p[i].value, &val, 0)) { + error_setg(errp, "Illegal value for NFS parameter: %s", + qp->p[i].name); + goto fail; + } + if (!strcmp(qp->p[i].name, "uid")) { + nfs_set_uid(client->context, val); + } else if (!strcmp(qp->p[i].name, "gid")) { + nfs_set_gid(client->context, val); + } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) { + nfs_set_tcp_syncnt(client->context, val); +#ifdef LIBNFS_FEATURE_READAHEAD + } else if (!strcmp(qp->p[i].name, "readahead")) { + nfs_set_readahead(client->context, val); +#endif } else { error_setg(errp, "Unknown NFS parameter name: %s", qp->p[i].name); diff --git a/block/qapi.c b/block/qapi.c index 97e16418ef..f44f6b4012 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -293,7 +293,7 @@ void bdrv_query_info(BlockDriverState *bs, qapi_free_BlockInfo(info); } -BlockStats *bdrv_query_stats(const BlockDriverState *bs) +static BlockStats *bdrv_query_stats(const BlockDriverState *bs) { BlockStats *s; @@ -360,7 +360,11 @@ BlockStatsList *qmp_query_blockstats(Error **errp) while ((bs = bdrv_next(bs))) { BlockStatsList *info = g_malloc0(sizeof(*info)); + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); info->value = bdrv_query_stats(bs); + aio_context_release(ctx); *p_next = info; p_next = &info->next; diff --git a/block/qcow.c b/block/qcow.c index 1f2bac8a5f..a874056cf3 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -941,6 +941,7 @@ static BlockDriver bdrv_qcow = { .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .supports_backing = true, .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, diff --git a/block/qcow2.c b/block/qcow2.c index b9d2fa6632..67e55c9667 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2418,6 +2418,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_save_vmstate = qcow2_save_vmstate, .bdrv_load_vmstate = qcow2_load_vmstate, + .supports_backing = true, .bdrv_change_backing_file = qcow2_change_backing_file, .bdrv_refresh_limits = qcow2_refresh_limits, diff --git a/block/qed.c b/block/qed.c index 092e6fb1d2..eddae929eb 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1652,6 +1652,7 @@ static BlockDriver bdrv_qed = { .format_name = "qed", .instance_size = sizeof(BDRVQEDState), .create_opts = &qed_create_opts, + .supports_backing = true, .bdrv_probe = bdrv_qed_probe, .bdrv_rebind = bdrv_qed_rebind, diff --git a/block/quorum.c b/block/quorum.c index 86802d306b..d5ee9c0059 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -23,6 +23,7 @@ #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold" #define QUORUM_OPT_BLKVERIFY "blkverify" +#define QUORUM_OPT_REWRITE "rewrite-corrupted" /* This union holds a vote hash value */ typedef union QuorumVoteValue { @@ -70,6 +71,9 @@ typedef struct BDRVQuorumState { * It is useful to debug other block drivers by * comparing them with a reference one. */ + bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted + * block if Quorum is reached. + */ } BDRVQuorumState; typedef struct QuorumAIOCB QuorumAIOCB; @@ -105,13 +109,17 @@ struct QuorumAIOCB { int count; /* number of completed AIOCB */ int success_count; /* number of successfully completed AIOCB */ + int rewrite_count; /* number of replica to rewrite: count down to + * zero once writes are fired + */ + QuorumVotes votes; bool is_read; int vote_ret; }; -static void quorum_vote(QuorumAIOCB *acb); +static bool quorum_vote(QuorumAIOCB *acb); static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) { @@ -183,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, acb->qcrs = g_new0(QuorumChildRequest, s->num_children); acb->count = 0; acb->success_count = 0; + acb->rewrite_count = 0; acb->votes.compare = quorum_sha256_compare; QLIST_INIT(&acb->votes.vote_list); acb->is_read = false; @@ -232,11 +241,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) return false; } +static void quorum_rewrite_aio_cb(void *opaque, int ret) +{ + QuorumAIOCB *acb = opaque; + + /* one less rewrite to do */ + acb->rewrite_count--; + + /* wait until all rewrite callbacks have completed */ + if (acb->rewrite_count) { + return; + } + + quorum_aio_finalize(acb); +} + static void quorum_aio_cb(void *opaque, int ret) { QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; BDRVQuorumState *s = acb->common.bs->opaque; + bool rewrite = false; sacb->ret = ret; acb->count++; @@ -253,12 +278,15 @@ static void quorum_aio_cb(void *opaque, int ret) /* Do the vote on read */ if (acb->is_read) { - quorum_vote(acb); + rewrite = quorum_vote(acb); } else { quorum_has_too_much_io_failed(acb); } - quorum_aio_finalize(acb); + /* if no rewrite is done the code will finish right away */ + if (!rewrite) { + quorum_aio_finalize(acb); + } } static void quorum_report_bad_versions(BDRVQuorumState *s, @@ -278,6 +306,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, } } +static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, + QuorumVoteValue *value) +{ + QuorumVoteVersion *version; + QuorumVoteItem *item; + int count = 0; + + /* first count the number of bad versions: done first to avoid concurrency + * issues. + */ + QLIST_FOREACH(version, &acb->votes.vote_list, next) { + if (acb->votes.compare(&version->value, value)) { + continue; + } + QLIST_FOREACH(item, &version->items, next) { + count++; + } + } + + /* quorum_rewrite_aio_cb will count down this to zero */ + acb->rewrite_count = count; + + /* now fire the correcting rewrites */ + QLIST_FOREACH(version, &acb->votes.vote_list, next) { + if (acb->votes.compare(&version->value, value)) { + continue; + } + QLIST_FOREACH(item, &version->items, next) { + bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov, + acb->nb_sectors, quorum_rewrite_aio_cb, acb); + } + } + + /* return true if any rewrite is done else false */ + return count; +} + static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) { int i; @@ -468,16 +533,17 @@ static int quorum_vote_error(QuorumAIOCB *acb) return ret; } -static void quorum_vote(QuorumAIOCB *acb) +static bool quorum_vote(QuorumAIOCB *acb) { bool quorum = true; + bool rewrite = false; int i, j, ret; QuorumVoteValue hash; BDRVQuorumState *s = acb->common.bs->opaque; QuorumVoteVersion *winner; if (quorum_has_too_much_io_failed(acb)) { - return; + return false; } /* get the index of the first successful read */ @@ -505,7 +571,7 @@ static void quorum_vote(QuorumAIOCB *acb) /* Every successful read agrees */ if (quorum) { quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); - return; + return false; } /* compute hashes for each successful read, also store indexes */ @@ -538,9 +604,15 @@ static void quorum_vote(QuorumAIOCB *acb) /* some versions are bad print them */ quorum_report_bad_versions(s, acb, &winner->value); + /* corruption correction is enabled */ + if (s->rewrite_corrupted) { + rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value); + } + free_exit: /* free lists */ quorum_free_vote_list(&acb->votes); + return rewrite; } static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs, @@ -705,6 +777,11 @@ static QemuOptsList quorum_runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Trigger block verify mode if set", }, + { + .name = QUORUM_OPT_REWRITE, + .type = QEMU_OPT_BOOL, + .help = "Rewrite corrupted block on read quorum", + }, { /* end of list */ } }, }; @@ -766,6 +843,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, "and using two files with vote_threshold=2\n"); } + s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false); + if (s->rewrite_corrupted && s->is_blkverify) { + error_setg(&local_err, + "rewrite-corrupted=on cannot be used with blkverify=on"); + ret = -EINVAL; + goto exit; + } + /* allocate the children BlockDriverState array */ s->bs = g_new0(BlockDriverState *, s->num_children); opened = g_new0(bool, s->num_children); diff --git a/block/vmdk.c b/block/vmdk.c index 83dd6fe4fb..d0de0193fc 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2180,6 +2180,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_detach_aio_context = vmdk_detach_aio_context, .bdrv_attach_aio_context = vmdk_attach_aio_context, + .supports_backing = true, .create_opts = &vmdk_create_opts, }; diff --git a/blockdev.c b/blockdev.c index 03ab153d01..69b7c2a8c5 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1819,6 +1819,11 @@ void qmp_block_resize(bool has_device, const char *device, return; } + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + } + /* complete all in-flight operations before resizing the device */ bdrv_drain_all(); @@ -2094,6 +2099,8 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, + bool has_node_name, const char *node_name, + bool has_replaces, const char *replaces, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, bool has_speed, int64_t speed, @@ -2107,6 +2114,7 @@ void qmp_drive_mirror(const char *device, const char *target, BlockDriverState *source, *target_bs; BlockDriver *drv = NULL; Error *local_err = NULL; + QDict *options = NULL; int flags; int64_t size; int ret; @@ -2180,6 +2188,29 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + if (has_replaces) { + BlockDriverState *to_replace_bs; + + if (!has_node_name) { + error_setg(errp, "a node-name must be provided when replacing a" + " named node of the graph"); + return; + } + + to_replace_bs = check_to_replace_node(replaces, &local_err); + + if (!to_replace_bs) { + error_propagate(errp, local_err); + return; + } + + if (size != bdrv_getlength(to_replace_bs)) { + error_setg(errp, "cannot replace image with a mirror image of " + "different size"); + return; + } + } + if ((sync == MIRROR_SYNC_MODE_FULL || !source) && mode != NEW_IMAGE_MODE_EXISTING) { @@ -2208,18 +2239,28 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + if (has_node_name) { + options = qdict_new(); + qdict_put(options, "node-name", qstring_from_str(node_name)); + } + /* Mirroring takes care of copy-on-write using the source's backing * file. */ target_bs = NULL; - ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING, - drv, &local_err); + ret = bdrv_open(&target_bs, target, NULL, options, + flags | BDRV_O_NO_BACKING, drv, &local_err); if (ret < 0) { error_propagate(errp, local_err); return; } - mirror_start(bs, target_bs, speed, granularity, buf_size, sync, + /* pass the node name to replace to mirror start since it's loose coupling + * and will allow to check whether the node still exist at mirror completion + */ + mirror_start(bs, target_bs, + has_replaces ? replaces : NULL, + speed, granularity, buf_size, sync, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { diff --git a/blockjob.c b/blockjob.c index 4da86cdfcd..67a64ea318 100644 --- a/blockjob.c +++ b/blockjob.c @@ -210,6 +210,20 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) job->busy = true; } +void block_job_yield(BlockJob *job) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + qemu_coroutine_yield(); + job->busy = true; +} + BlockJobInfo *block_job_query(BlockJob *job) { BlockJobInfo *info = g_new0(BlockJobInfo, 1); @@ -256,7 +270,11 @@ void block_job_event_completed(BlockJob *job, const char *msg) void block_job_event_ready(BlockJob *job) { - qapi_event_send_block_job_ready(bdrv_get_device_name(job->bs), &error_abort); + qapi_event_send_block_job_ready(job->driver->job_type, + bdrv_get_device_name(job->bs), + job->len, + job->offset, + job->speed, &error_abort); } BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, @@ -282,7 +300,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, default: abort(); } - qapi_event_send_block_job_error(bdrv_get_device_name(bs), + qapi_event_send_block_job_error(bdrv_get_device_name(job->bs), is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE, action, &error_abort); @@ -5273,6 +5273,18 @@ if test "$docs" = "yes" ; then mkdir -p QMP fi +# set up qemu-iotests in this build directory +iotests_common_env="tests/qemu-iotests/common.env" +iotests_check="tests/qemu-iotests/check" + +echo "# Automatically generated by configure - do not modify" > "$iotests_common_env" +echo >> "$iotests_common_env" +echo "export PYTHON='$python'" >> "$iotests_common_env" + +if [ ! -e "$iotests_check" ]; then + symlink "$source_path/$iotests_check" "$iotests_check" +fi + # Save the configure command line for later reuse. cat <<EOD >config.status #!/bin/sh diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt index 3a0c99e1da..a6197a9133 100644 --- a/docs/qapi-code-gen.txt +++ b/docs/qapi-code-gen.txt @@ -218,10 +218,10 @@ An example command is: === Events === Events are defined with the keyword 'event'. When 'data' is also specified, -additional info will be carried on. Finally there will be C API generated -in qapi-event.h; when called by QEMU code, a message with timestamp will -be emitted on the wire. If timestamp is -1, it means failure to retrieve host -time. +additional info will be included in the event. Finally there will be C API +generated in qapi-event.h; when called by QEMU code, a message with timestamp +will be emitted on the wire. If timestamp is -1, it means failure to retrieve +host time. An example event is: diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt new file mode 100644 index 0000000000..44be891261 --- /dev/null +++ b/docs/qmp/qmp-events.txt @@ -0,0 +1,559 @@ + QEMU Machine Protocol Events + ============================ + +ACPI_DEVICE_OST +--------------- + +Emitted when guest executes ACPI _OST method. + + - data: ACPIOSTInfo type as described in qapi-schema.json + +{ "event": "ACPI_DEVICE_OST", + "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } } + +BALLOON_CHANGE +-------------- + +Emitted when the guest changes the actual BALLOON level. This +value is equivalent to the 'actual' field return by the +'query-balloon' command + +Data: + +- "actual": actual level of the guest memory balloon in bytes (json-number) + +Example: + +{ "event": "BALLOON_CHANGE", + "data": { "actual": 944766976 }, + "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } + +BLOCK_IMAGE_CORRUPTED +--------------------- + +Emitted when a disk image is being marked corrupt. + +Data: + +- "device": Device name (json-string) +- "msg": Informative message (e.g., reason for the corruption) (json-string) +- "offset": If the corruption resulted from an image access, this is the access + offset into the image (json-int) +- "size": If the corruption resulted from an image access, this is the access + size (json-int) + +Example: + +{ "event": "BLOCK_IMAGE_CORRUPTED", + "data": { "device": "ide0-hd0", + "msg": "Prevented active L1 table overwrite", "offset": 196608, + "size": 65536 }, + "timestamp": { "seconds": 1378126126, "microseconds": 966463 } } + +BLOCK_IO_ERROR +-------------- + +Emitted when a disk I/O error occurs. + +Data: + +- "device": device name (json-string) +- "operation": I/O operation (json-string, "read" or "write") +- "action": action that has been taken, it's one of the following (json-string): + "ignore": error has been ignored + "report": error has been reported to the device + "stop": the VM is going to stop because of the error + +Example: + +{ "event": "BLOCK_IO_ERROR", + "data": { "device": "ide0-hd1", + "operation": "write", + "action": "stop" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +Note: If action is "stop", a STOP event will eventually follow the +BLOCK_IO_ERROR event. + +BLOCK_JOB_CANCELLED +------------------- + +Emitted when a block job has been cancelled. + +Data: + +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) +- "device": Device name (json-string) +- "len": Maximum progress value (json-int) +- "offset": Current progress value (json-int) + On success this is equal to len. + On failure this is less than len. +- "speed": Rate limit, bytes per second (json-int) + +Example: + +{ "event": "BLOCK_JOB_CANCELLED", + "data": { "type": "stream", "device": "virtio-disk0", + "len": 10737418240, "offset": 134217728, + "speed": 0 }, + "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } + +BLOCK_JOB_COMPLETED +------------------- + +Emitted when a block job has completed. + +Data: + +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) +- "device": Device name (json-string) +- "len": Maximum progress value (json-int) +- "offset": Current progress value (json-int) + On success this is equal to len. + On failure this is less than len. +- "speed": Rate limit, bytes per second (json-int) +- "error": Error message (json-string, optional) + Only present on failure. This field contains a human-readable + error message. There are no semantics other than that streaming + has failed and clients should not try to interpret the error + string. + +Example: + +{ "event": "BLOCK_JOB_COMPLETED", + "data": { "type": "stream", "device": "virtio-disk0", + "len": 10737418240, "offset": 10737418240, + "speed": 0 }, + "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } + +BLOCK_JOB_ERROR +--------------- + +Emitted when a block job encounters an error. + +Data: + +- "device": device name (json-string) +- "operation": I/O operation (json-string, "read" or "write") +- "action": action that has been taken, it's one of the following (json-string): + "ignore": error has been ignored, the job may fail later + "report": error will be reported and the job canceled + "stop": error caused job to be paused + +Example: + +{ "event": "BLOCK_JOB_ERROR", + "data": { "device": "ide0-hd1", + "operation": "write", + "action": "stop" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +BLOCK_JOB_READY +--------------- + +Emitted when a block job is ready to complete. + +Data: + +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) +- "device": Device name (json-string) +- "len": Maximum progress value (json-int) +- "offset": Current progress value (json-int) + On success this is equal to len. + On failure this is less than len. +- "speed": Rate limit, bytes per second (json-int) + +Example: + +{ "event": "BLOCK_JOB_READY", + "data": { "device": "drive0", "type": "mirror", "speed": 0, + "len": 2097152, "offset": 2097152 } + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR +event. + +DEVICE_DELETED +-------------- + +Emitted whenever the device removal completion is acknowledged +by the guest. +At this point, it's safe to reuse the specified device ID. +Device removal can be initiated by the guest or by HMP/QMP commands. + +Data: + +- "device": device name (json-string, optional) +- "path": device path (json-string) + +{ "event": "DEVICE_DELETED", + "data": { "device": "virtio-net-pci-0", + "path": "/machine/peripheral/virtio-net-pci-0" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +DEVICE_TRAY_MOVED +----------------- + +It's emitted whenever the tray of a removable device is moved by the guest +or by HMP/QMP commands. + +Data: + +- "device": device name (json-string) +- "tray-open": true if the tray has been opened or false if it has been closed + (json-bool) + +{ "event": "DEVICE_TRAY_MOVED", + "data": { "device": "ide1-cd0", + "tray-open": true + }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +GUEST_PANICKED +-------------- + +Emitted when guest OS panic is detected. + +Data: + +- "action": Action that has been taken (json-string, currently always "pause"). + +Example: + +{ "event": "GUEST_PANICKED", + "data": { "action": "pause" } } + +NIC_RX_FILTER_CHANGED +--------------------- + +The event is emitted once until the query command is executed, +the first event will always be emitted. + +Data: + +- "name": net client name (json-string) +- "path": device path (json-string) + +{ "event": "NIC_RX_FILTER_CHANGED", + "data": { "name": "vnet0", + "path": "/machine/peripheral/vnet0/virtio-backend" }, + "timestamp": { "seconds": 1368697518, "microseconds": 326866 } } +} + +QUORUM_FAILURE +-------------- + +Emitted by the Quorum block driver if it fails to establish a quorum. + +Data: + +- "reference": device name if defined else node name. +- "sector-num": Number of the first sector of the failed read operation. +- "sector-count": Failed read operation sector count. + +Example: + +{ "event": "QUORUM_FAILURE", + "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 }, + "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } + +QUORUM_REPORT_BAD +----------------- + +Emitted to report a corruption of a Quorum file. + +Data: + +- "error": Error message (json-string, optional) + Only present on failure. This field contains a human-readable + error message. There are no semantics other than that the + block layer reported an error and clients should not try to + interpret the error string. +- "node-name": The graph node name of the block driver state. +- "sector-num": Number of the first sector of the failed read operation. +- "sector-count": Failed read operation sector count. + +Example: + +{ "event": "QUORUM_REPORT_BAD", + "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 }, + "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } + +RESET +----- + +Emitted when the Virtual Machine is reseted. + +Data: None. + +Example: + +{ "event": "RESET", + "timestamp": { "seconds": 1267041653, "microseconds": 9518 } } + +RESUME +------ + +Emitted when the Virtual Machine resumes execution. + +Data: None. + +Example: + +{ "event": "RESUME", + "timestamp": { "seconds": 1271770767, "microseconds": 582542 } } + +RTC_CHANGE +---------- + +Emitted when the guest changes the RTC time. + +Data: + +- "offset": Offset between base RTC clock (as specified by -rtc base), and +new RTC clock value (json-number) + +Example: + +{ "event": "RTC_CHANGE", + "data": { "offset": 78 }, + "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } + +SHUTDOWN +-------- + +Emitted when the Virtual Machine is powered down. + +Data: None. + +Example: + +{ "event": "SHUTDOWN", + "timestamp": { "seconds": 1267040730, "microseconds": 682951 } } + +Note: If the command-line option "-no-shutdown" has been specified, a STOP +event will eventually follow the SHUTDOWN event. + +SPICE_CONNECTED, SPICE_DISCONNECTED +----------------------------------- + +Emitted when a SPICE client connects or disconnects. + +Data: + +- "server": Server information (json-object) + - "host": IP address (json-string) + - "port": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") +- "client": Client information (json-object) + - "host": IP address (json-string) + - "port": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + +Example: + +{ "timestamp": {"seconds": 1290688046, "microseconds": 388707}, + "event": "SPICE_CONNECTED", + "data": { + "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"}, + "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"} +}} + +SPICE_INITIALIZED +----------------- + +Emitted after initial handshake and authentication takes place (if any) +and the SPICE channel is up'n'running + +Data: + +- "server": Server information (json-object) + - "host": IP address (json-string) + - "port": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "auth": authentication method (json-string, optional) +- "client": Client information (json-object) + - "host": IP address (json-string) + - "port": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "connection-id": spice connection id. All channels with the same id + belong to the same spice session (json-int) + - "channel-type": channel type. "1" is the main control channel, filter for + this one if you want track spice sessions only (json-int) + - "channel-id": channel id. Usually "0", might be different needed when + multiple channels of the same type exist, such as multiple + display channels in a multihead setup (json-int) + - "tls": whevener the channel is encrypted (json-bool) + +Example: + +{ "timestamp": {"seconds": 1290688046, "microseconds": 417172}, + "event": "SPICE_INITIALIZED", + "data": {"server": {"auth": "spice", "port": "5921", + "family": "ipv4", "host": "127.0.0.1"}, + "client": {"port": "49004", "family": "ipv4", "channel-type": 3, + "connection-id": 1804289383, "host": "127.0.0.1", + "channel-id": 0, "tls": true} +}} + +STOP +---- + +Emitted when the Virtual Machine is stopped. + +Data: None. + +Example: + +{ "event": "STOP", + "timestamp": { "seconds": 1267041730, "microseconds": 281295 } } + +SUSPEND +------- + +Emitted when guest enters S3 state. + +Data: None. + +Example: + +{ "event": "SUSPEND", + "timestamp": { "seconds": 1344456160, "microseconds": 309119 } } + +SUSPEND_DISK +------------ + +Emitted when the guest makes a request to enter S4 state. + +Data: None. + +Example: + +{ "event": "SUSPEND_DISK", + "timestamp": { "seconds": 1344456160, "microseconds": 309119 } } + +Note: QEMU shuts down when entering S4 state. + +VNC_CONNECTED +------------- + +Emitted when a VNC client establishes a connection. + +Data: + +- "server": Server information (json-object) + - "host": IP address (json-string) + - "service": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "auth": authentication method (json-string, optional) +- "client": Client information (json-object) + - "host": IP address (json-string) + - "service": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + +Example: + +{ "event": "VNC_CONNECTED", + "data": { + "server": { "auth": "sasl", "family": "ipv4", + "service": "5901", "host": "0.0.0.0" }, + "client": { "family": "ipv4", "service": "58425", + "host": "127.0.0.1" } }, + "timestamp": { "seconds": 1262976601, "microseconds": 975795 } } + + +Note: This event is emitted before any authentication takes place, thus +the authentication ID is not provided. + +VNC_DISCONNECTED +---------------- + +Emitted when the connection is closed. + +Data: + +- "server": Server information (json-object) + - "host": IP address (json-string) + - "service": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "auth": authentication method (json-string, optional) +- "client": Client information (json-object) + - "host": IP address (json-string) + - "service": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "x509_dname": TLS dname (json-string, optional) + - "sasl_username": SASL username (json-string, optional) + +Example: + +{ "event": "VNC_DISCONNECTED", + "data": { + "server": { "auth": "sasl", "family": "ipv4", + "service": "5901", "host": "0.0.0.0" }, + "client": { "family": "ipv4", "service": "58425", + "host": "127.0.0.1", "sasl_username": "luiz" } }, + "timestamp": { "seconds": 1262976601, "microseconds": 975795 } } + +VNC_INITIALIZED +--------------- + +Emitted after authentication takes place (if any) and the VNC session is +made active. + +Data: + +- "server": Server information (json-object) + - "host": IP address (json-string) + - "service": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "auth": authentication method (json-string, optional) +- "client": Client information (json-object) + - "host": IP address (json-string) + - "service": port number (json-string) + - "family": address family (json-string, "ipv4" or "ipv6") + - "x509_dname": TLS dname (json-string, optional) + - "sasl_username": SASL username (json-string, optional) + +Example: + +{ "event": "VNC_INITIALIZED", + "data": { + "server": { "auth": "sasl", "family": "ipv4", + "service": "5901", "host": "0.0.0.0"}, + "client": { "family": "ipv4", "service": "46089", + "host": "127.0.0.1", "sasl_username": "luiz" } }, + "timestamp": { "seconds": 1263475302, "microseconds": 150772 } } + +WAKEUP +------ + +Emitted when the guest has woken up from S3 and is running. + +Data: None. + +Example: + +{ "event": "WAKEUP", + "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } + +WATCHDOG +-------- + +Emitted when the watchdog device's timer is expired. + +Data: + +- "action": Action that has been taken, it's one of the following (json-string): + "reset", "shutdown", "poweroff", "pause", "debug", or "none" + +Example: + +{ "event": "WATCHDOG", + "data": { "action": "reset" }, + "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } + +Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is +followed respectively by the RESET, SHUTDOWN, or STOP events. @@ -933,6 +933,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) } qmp_drive_mirror(device, filename, !!format, format, + false, NULL, false, NULL, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, true, mode, false, 0, false, 0, false, 0, false, 0, false, 0, &err); diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index c10b7b70fb..09bd2c70ab 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -24,16 +24,6 @@ #include "hw/virtio/virtio-bus.h" #include "qom/object_interfaces.h" -typedef struct { - VirtIOBlockDataPlane *s; - QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */ - VirtQueueElement *elem; /* saved data from the virtqueue */ - QEMUIOVector qiov; /* original request iovecs */ - struct iovec bounce_iov; /* used if guest buffers are unaligned */ - QEMUIOVector bounce_qiov; /* bounce buffer iovecs */ - bool read; /* read or write? */ -} VirtIOBlockRequest; - struct VirtIOBlockDataPlane { bool started; bool starting; @@ -57,6 +47,8 @@ struct VirtIOBlockDataPlane { /* Operation blocker on BDS */ Error *blocker; + void (*saved_complete_request)(struct VirtIOBlockReq *req, + unsigned char status); }; /* Raise an interrupt to signal guest, if necessary */ @@ -69,215 +61,14 @@ static void notify_guest(VirtIOBlockDataPlane *s) event_notifier_set(s->guest_notifier); } -static void complete_rdwr(void *opaque, int ret) -{ - VirtIOBlockRequest *req = opaque; - struct virtio_blk_inhdr hdr; - int len; - - if (likely(ret == 0)) { - hdr.status = VIRTIO_BLK_S_OK; - len = req->qiov.size; - } else { - hdr.status = VIRTIO_BLK_S_IOERR; - len = 0; - } - - trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret); - - if (req->read && req->bounce_iov.iov_base) { - qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len); - } - - if (req->bounce_iov.iov_base) { - qemu_vfree(req->bounce_iov.iov_base); - } - - qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr)); - qemu_iovec_destroy(req->inhdr); - g_slice_free(QEMUIOVector, req->inhdr); - - /* According to the virtio specification len should be the number of bytes - * written to, but for virtio-blk it seems to be the number of bytes - * transferred plus the status bytes. - */ - vring_push(&req->s->vring, req->elem, len + sizeof(hdr)); - notify_guest(req->s); - g_slice_free(VirtIOBlockRequest, req); -} - -static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem, - QEMUIOVector *inhdr, unsigned char status) -{ - struct virtio_blk_inhdr hdr = { - .status = status, - }; - - qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr)); - qemu_iovec_destroy(inhdr); - g_slice_free(QEMUIOVector, inhdr); - - vring_push(&s->vring, elem, sizeof(hdr)); - notify_guest(s); -} - -/* Get disk serial number */ -static void do_get_id_cmd(VirtIOBlockDataPlane *s, - struct iovec *iov, unsigned int iov_cnt, - VirtQueueElement *elem, QEMUIOVector *inhdr) -{ - char id[VIRTIO_BLK_ID_BYTES]; - - /* Serial number not NUL-terminated when longer than buffer */ - strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id)); - iov_from_buf(iov, iov_cnt, 0, id, sizeof(id)); - complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK); -} - -static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read, - struct iovec *iov, unsigned iov_cnt, - int64_t sector_num, VirtQueueElement *elem, - QEMUIOVector *inhdr) -{ - VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest); - QEMUIOVector *qiov; - int nb_sectors; - - /* Fill in virtio block metadata needed for completion */ - req->s = s; - req->elem = elem; - req->inhdr = inhdr; - req->read = read; - qemu_iovec_init_external(&req->qiov, iov, iov_cnt); - - qiov = &req->qiov; - - if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) { - void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size); - - /* Populate bounce buffer with data for writes */ - if (!read) { - qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size); - } - - /* Redirect I/O to aligned bounce buffer */ - req->bounce_iov.iov_base = bounce_buffer; - req->bounce_iov.iov_len = qiov->size; - qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1); - qiov = &req->bounce_qiov; - } - - nb_sectors = qiov->size / BDRV_SECTOR_SIZE; - - if (read) { - bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors, - complete_rdwr, req); - } else { - bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors, - complete_rdwr, req); - } -} - -static void complete_flush(void *opaque, int ret) -{ - VirtIOBlockRequest *req = opaque; - unsigned char status; - - if (ret == 0) { - status = VIRTIO_BLK_S_OK; - } else { - status = VIRTIO_BLK_S_IOERR; - } - - complete_request_early(req->s, req->elem, req->inhdr, status); - g_slice_free(VirtIOBlockRequest, req); -} - -static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem, - QEMUIOVector *inhdr) +static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) { - VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest); - req->s = s; - req->elem = elem; - req->inhdr = inhdr; + stb_p(&req->in->status, status); - bdrv_aio_flush(s->blk->conf.bs, complete_flush, req); -} - -static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem, - QEMUIOVector *inhdr) -{ - int status; - - status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem); - complete_request_early(s, elem, inhdr, status); -} - -static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem) -{ - struct iovec *iov = elem->out_sg; - struct iovec *in_iov = elem->in_sg; - unsigned out_num = elem->out_num; - unsigned in_num = elem->in_num; - struct virtio_blk_outhdr outhdr; - QEMUIOVector *inhdr; - size_t in_size; - - /* Copy in outhdr */ - if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr, - sizeof(outhdr)) != sizeof(outhdr))) { - error_report("virtio-blk request outhdr too short"); - return -EFAULT; - } - iov_discard_front(&iov, &out_num, sizeof(outhdr)); - - /* Grab inhdr for later */ - in_size = iov_size(in_iov, in_num); - if (in_size < sizeof(struct virtio_blk_inhdr)) { - error_report("virtio_blk request inhdr too short"); - return -EFAULT; - } - inhdr = g_slice_new(QEMUIOVector); - qemu_iovec_init(inhdr, 1); - qemu_iovec_concat_iov(inhdr, in_iov, in_num, - in_size - sizeof(struct virtio_blk_inhdr), - sizeof(struct virtio_blk_inhdr)); - iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); - - /* TODO Linux sets the barrier bit even when not advertised! */ - outhdr.type &= ~VIRTIO_BLK_T_BARRIER; - - switch (outhdr.type) { - case VIRTIO_BLK_T_IN: - do_rdwr_cmd(s, true, in_iov, in_num, - outhdr.sector * 512 / BDRV_SECTOR_SIZE, - elem, inhdr); - return 0; - - case VIRTIO_BLK_T_OUT: - do_rdwr_cmd(s, false, iov, out_num, - outhdr.sector * 512 / BDRV_SECTOR_SIZE, - elem, inhdr); - return 0; - - case VIRTIO_BLK_T_SCSI_CMD: - do_scsi_cmd(s, elem, inhdr); - return 0; - - case VIRTIO_BLK_T_FLUSH: - do_flush_cmd(s, elem, inhdr); - return 0; - - case VIRTIO_BLK_T_GET_ID: - do_get_id_cmd(s, in_iov, in_num, elem, inhdr); - return 0; - - default: - error_report("virtio-blk unsupported request type %#x", outhdr.type); - qemu_iovec_destroy(inhdr); - g_slice_free(QEMUIOVector, inhdr); - return -EFAULT; - } + vring_push(&req->dev->dataplane->vring, req->elem, + req->qiov.size + sizeof(*req->in)); + notify_guest(req->dev->dataplane); + g_slice_free(VirtIOBlockReq, req); } static void handle_notify(EventNotifier *e) @@ -286,7 +77,11 @@ static void handle_notify(EventNotifier *e) host_notifier); VirtQueueElement *elem; + VirtIOBlockReq *req; int ret; + MultiReqBuffer mrb = { + .num_writes = 0, + }; event_notifier_test_and_clear(&s->host_notifier); for (;;) { @@ -303,14 +98,14 @@ static void handle_notify(EventNotifier *e) trace_virtio_blk_data_plane_process_request(s, elem->out_num, elem->in_num, elem->index); - if (process_request(s, elem) < 0) { - vring_set_broken(&s->vring); - vring_free_element(elem); - ret = -EFAULT; - break; - } + req = g_slice_new(VirtIOBlockReq); + req->dev = VIRTIO_BLK(s->vdev); + req->elem = elem; + virtio_blk_handle_request(req, &mrb); } + virtio_submit_multiwrite(s->blk->conf.bs, &mrb); + if (likely(ret == -EAGAIN)) { /* vring emptied */ /* Re-enable guest->host notifies and stop processing the vring. * But if the guest has snuck in more descriptors, keep processing. @@ -330,6 +125,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, Error **errp) { VirtIOBlockDataPlane *s; + VirtIOBlock *vblk = VIRTIO_BLK(vdev); Error *local_err = NULL; *dataplane = NULL; @@ -372,6 +168,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk, bdrv_op_block_all(blk->conf.bs, s->blocker); *dataplane = s; + s->saved_complete_request = vblk->complete_request; + vblk->complete_request = complete_request_vring; } /* Context: QEMU global mutex held */ @@ -446,10 +244,12 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); if (!s->started || s->stopping) { return; } s->stopping = true; + vblk->complete_request = s->saved_complete_request; trace_virtio_blk_data_plane_stop(s); aio_context_acquire(s->ctx); diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 08562ea390..a222e3f9a4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -12,6 +12,7 @@ */ #include "qemu-common.h" +#include "qemu/iov.h" #include "qemu/error-report.h" #include "trace.h" #include "hw/block/block.h" @@ -27,18 +28,24 @@ #endif #include "hw/virtio/virtio-bus.h" -typedef struct VirtIOBlockReq +static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) +{ + VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq); + req->dev = s; + req->elem = g_slice_new0(VirtQueueElement); + return req; +} + +static void virtio_blk_free_request(VirtIOBlockReq *req) { - VirtIOBlock *dev; - VirtQueueElement elem; - struct virtio_blk_inhdr *in; - struct virtio_blk_outhdr *out; - QEMUIOVector qiov; - struct VirtIOBlockReq *next; - BlockAcctCookie acct; -} VirtIOBlockReq; + if (req) { + g_slice_free(VirtQueueElement, req->elem); + g_slice_free(VirtIOBlockReq, req); + } +} -static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) +static void virtio_blk_complete_request(VirtIOBlockReq *req, + unsigned char status) { VirtIOBlock *s = req->dev; VirtIODevice *vdev = VIRTIO_DEVICE(s); @@ -46,10 +53,15 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) trace_virtio_blk_req_complete(req, status); stb_p(&req->in->status, status); - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); + virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in)); virtio_notify(vdev, s->vq); } +static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) +{ + req->dev->complete_request(req, status); +} + static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, bool is_read) { @@ -62,7 +74,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, } else if (action == BLOCK_ERROR_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); bdrv_acct_done(s->bs, &req->acct); - g_free(req); + virtio_blk_free_request(req); } bdrv_error_action(s->bs, action, is_read, error); @@ -76,14 +88,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) trace_virtio_blk_rw_complete(req, ret); if (ret) { - bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); + bool is_read = !(ldl_p(&req->out.type) & VIRTIO_BLK_T_OUT); if (virtio_blk_handle_rw_error(req, -ret, is_read)) return; } virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); bdrv_acct_done(req->dev->bs, &req->acct); - g_free(req); + virtio_blk_free_request(req); } static void virtio_blk_flush_complete(void *opaque, int ret) @@ -98,27 +110,16 @@ static void virtio_blk_flush_complete(void *opaque, int ret) virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); bdrv_acct_done(req->dev->bs, &req->acct); - g_free(req); -} - -static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) -{ - VirtIOBlockReq *req = g_malloc(sizeof(*req)); - req->dev = s; - req->qiov.size = 0; - req->next = NULL; - return req; + virtio_blk_free_request(req); } static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); - if (req != NULL) { - if (!virtqueue_pop(s->vq, &req->elem)) { - g_free(req); - return NULL; - } + if (!virtqueue_pop(s->vq, req->elem)) { + virtio_blk_free_request(req); + return NULL; } return req; @@ -247,17 +248,12 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) { int status; - status = virtio_blk_handle_scsi_req(req->dev, &req->elem); + status = virtio_blk_handle_scsi_req(req->dev, req->elem); virtio_blk_req_complete(req, status); - g_free(req); + virtio_blk_free_request(req); } -typedef struct MultiReqBuffer { - BlockRequest blkreq[32]; - unsigned int num_writes; -} MultiReqBuffer; - -static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb) +void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb) { int i, ret; @@ -293,7 +289,7 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) BlockRequest *blkreq; uint64_t sector; - sector = ldq_p(&req->out->sector); + sector = ldq_p(&req->out.sector); bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE); @@ -327,7 +323,7 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) { uint64_t sector; - sector = ldq_p(&req->out->sector); + sector = ldq_p(&req->out.sector); bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ); @@ -346,26 +342,39 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) virtio_blk_rw_complete, req); } -static void virtio_blk_handle_request(VirtIOBlockReq *req, - MultiReqBuffer *mrb) +void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) { uint32_t type; + struct iovec *in_iov = req->elem->in_sg; + struct iovec *iov = req->elem->out_sg; + unsigned in_num = req->elem->in_num; + unsigned out_num = req->elem->out_num; - if (req->elem.out_num < 1 || req->elem.in_num < 1) { + if (req->elem->out_num < 1 || req->elem->in_num < 1) { error_report("virtio-blk missing headers"); exit(1); } - if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || - req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { - error_report("virtio-blk header not in correct element"); + if (unlikely(iov_to_buf(iov, out_num, 0, &req->out, + sizeof(req->out)) != sizeof(req->out))) { + error_report("virtio-blk request outhdr too short"); + exit(1); + } + + iov_discard_front(&iov, &out_num, sizeof(req->out)); + + if (in_num < 1 || + in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { + error_report("virtio-blk request inhdr too short"); exit(1); } - req->out = (void *)req->elem.out_sg[0].iov_base; - req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; + req->in = (void *)in_iov[in_num - 1].iov_base + + in_iov[in_num - 1].iov_len + - sizeof(struct virtio_blk_inhdr); + iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); - type = ldl_p(&req->out->type); + type = ldl_p(&req->out.type); if (type & VIRTIO_BLK_T_FLUSH) { virtio_blk_handle_flush(req, mrb); @@ -378,23 +387,23 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req, * NB: per existing s/n string convention the string is * terminated by '\0' only when shorter than buffer. */ - strncpy(req->elem.in_sg[0].iov_base, + strncpy(req->elem->in_sg[0].iov_base, s->blk.serial ? s->blk.serial : "", - MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); + MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); - g_free(req); + virtio_blk_free_request(req); } else if (type & VIRTIO_BLK_T_OUT) { - qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1], - req->elem.out_num - 1); + qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1], + req->elem->out_num - 1); virtio_blk_handle_write(req, mrb); } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ - qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0], - req->elem.in_num - 1); + qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0], + req->elem->in_num - 1); virtio_blk_handle_read(req); } else { virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); - g_free(req); + virtio_blk_free_request(req); } } @@ -460,7 +469,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running, } if (!s->bh) { - s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s); + s->bh = aio_bh_new(bdrv_get_aio_context(s->blk.conf.bs), + virtio_blk_dma_restart_bh, s); qemu_bh_schedule(s->bh); } } @@ -609,7 +619,8 @@ static void virtio_blk_save(QEMUFile *f, void *opaque) while (req) { qemu_put_sbyte(f, 1); - qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); + qemu_put_buffer(f, (unsigned char *)req->elem, + sizeof(VirtQueueElement)); req = req->next; } qemu_put_sbyte(f, 0); @@ -631,14 +642,15 @@ static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) while (qemu_get_sbyte(f)) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); - qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem)); + qemu_get_buffer(f, (unsigned char *)req->elem, + sizeof(VirtQueueElement)); req->next = s->rq; s->rq = req; - virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, - req->elem.in_num, 1); - virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, - req->elem.out_num, 0); + virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr, + req->elem->in_num, 1); + virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr, + req->elem->out_num, 0); } return 0; @@ -729,6 +741,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1; s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); + s->complete_request = virtio_blk_complete_request; #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err); if (err != NULL) { diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c index 6c8be0fe26..54eb15f3af 100644 --- a/hw/char/virtio-console.c +++ b/hw/char/virtio-console.c @@ -14,6 +14,7 @@ #include "qemu/error-report.h" #include "trace.h" #include "hw/virtio/virtio-serial.h" +#include "qapi-event.h" #define TYPE_VIRTIO_CONSOLE_SERIAL_PORT "virtserialport" #define VIRTIO_CONSOLE(obj) \ @@ -81,11 +82,16 @@ static ssize_t flush_buf(VirtIOSerialPort *port, static void set_guest_connected(VirtIOSerialPort *port, int guest_connected) { VirtConsole *vcon = VIRTIO_CONSOLE(port); + DeviceState *dev = DEVICE(port); - if (!vcon->chr) { - return; + if (vcon->chr) { + qemu_chr_fe_set_open(vcon->chr, guest_connected); + } + + if (dev->id) { + qapi_event_send_vserport_change(dev->id, guest_connected, + &error_abort); } - qemu_chr_fe_set_open(vcon->chr, guest_connected); } /* Readiness of the guest to accept data on a port */ diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index de433b2e38..52c2f8afa5 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = { static int parse_netdev(DeviceState *dev, const char *str, void **ptr) { NICPeers *peers_ptr = (NICPeers *)ptr; - NICConf *conf = container_of(peers_ptr, NICConf, peers); NetClientState **ncs = peers_ptr->ncs; NetClientState *peers[MAX_QUEUE_NUM]; int queues, i = 0; @@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr) ncs[i]->queue_index = i; } - conf->queues = queues; + peers_ptr->queues = queues; return 0; diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 76dd6f5708..0fd2a84c7b 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val) { ICSState *ics = (ICSState *)opaque; - if (ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { set_irq_lsi(ics, srcno, val); } else { set_irq_msi(ics, srcno, val); @@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server, trace_xics_ics_write_xive(nr, srcno, server, priority); - if (ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { write_xive_lsi(ics, srcno); } else { write_xive_msi(ics, srcno); @@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics) for (i = 0; i < ics->nr_irqs; i++) { /* FIXME: filter by server#? */ - if (ics->islsi[i]) { + if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { resend_lsi(ics, i); } else { resend_msi(ics, i); @@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr) trace_xics_ics_eoi(nr); - if (ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { irq->status &= ~XICS_STATUS_SENT; } } @@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev) { ICSState *ics = ICS(dev); int i; + uint8_t flags[ics->nr_irqs]; + + for (i = 0; i < ics->nr_irqs; i++) { + flags[i] = ics->irqs[i].flags; + } memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs); + for (i = 0; i < ics->nr_irqs; i++) { ics->irqs[i].priority = 0xff; ics->irqs[i].saved_priority = 0xff; + ics->irqs[i].flags = flags[i]; } } @@ -563,13 +570,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id) static const VMStateDescription vmstate_ics_irq = { .name = "ics/irq", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(server, ICSIRQState), VMSTATE_UINT8(priority, ICSIRQState), VMSTATE_UINT8(saved_priority, ICSIRQState), VMSTATE_UINT8(status, ICSIRQState), + VMSTATE_UINT8(flags, ICSIRQState), VMSTATE_END_OF_LIST() }, }; @@ -606,7 +614,6 @@ static void ics_realize(DeviceState *dev, Error **errp) return; } ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); - ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool)); ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs); } @@ -633,21 +640,166 @@ static const TypeInfo ics_info = { /* * Exported functions */ +static int xics_find_source(XICSState *icp, int irq) +{ + int sources = 1; + int src; + + /* FIXME: implement multiple sources */ + for (src = 0; src < sources; ++src) { + ICSState *ics = &icp->ics[src]; + if (ics_valid_irq(ics, irq)) { + return src; + } + } + + return -1; +} qemu_irq xics_get_qirq(XICSState *icp, int irq) { - if (!ics_valid_irq(icp->ics, irq)) { - return NULL; + int src = xics_find_source(icp, irq); + + if (src >= 0) { + ICSState *ics = &icp->ics[src]; + return ics->qirqs[irq - ics->offset]; } - return icp->ics->qirqs[irq - icp->ics->offset]; + return NULL; +} + +static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) +{ + assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK)); + + ics->irqs[srcno].flags |= + lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI; } void xics_set_irq_type(XICSState *icp, int irq, bool lsi) { - assert(ics_valid_irq(icp->ics, irq)); + int src = xics_find_source(icp, irq); + ICSState *ics; - icp->ics->islsi[irq - icp->ics->offset] = lsi; + assert(src >= 0); + + ics = &icp->ics[src]; + ics_set_irq_type(ics, irq - ics->offset, lsi); +} + +#define ICS_IRQ_FREE(ics, srcno) \ + (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK))) + +static int ics_find_free_block(ICSState *ics, int num, int alignnum) +{ + int first, i; + + for (first = 0; first < ics->nr_irqs; first += alignnum) { + if (num > (ics->nr_irqs - first)) { + return -1; + } + for (i = first; i < first + num; ++i) { + if (!ICS_IRQ_FREE(ics, i)) { + break; + } + } + if (i == (first + num)) { + return first; + } + } + + return -1; +} + +int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi) +{ + ICSState *ics = &icp->ics[src]; + int irq; + + if (irq_hint) { + assert(src == xics_find_source(icp, irq_hint)); + if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { + trace_xics_alloc_failed_hint(src, irq_hint); + return -1; + } + irq = irq_hint; + } else { + irq = ics_find_free_block(ics, 1, 1); + if (irq < 0) { + trace_xics_alloc_failed_no_left(src); + return -1; + } + irq += ics->offset; + } + + ics_set_irq_type(ics, irq - ics->offset, lsi); + trace_xics_alloc(src, irq); + + return irq; +} + +/* + * Allocate block of consequtive IRQs, returns a number of the first. + * If align==true, aligns the first IRQ number to num. + */ +int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align) +{ + int i, first = -1; + ICSState *ics = &icp->ics[src]; + + assert(src == 0); + /* + * MSIMesage::data is used for storing VIRQ so + * it has to be aligned to num to support multiple + * MSI vectors. MSI-X is not affected by this. + * The hint is used for the first IRQ, the rest should + * be allocated continuously. + */ + if (align) { + assert((num == 1) || (num == 2) || (num == 4) || + (num == 8) || (num == 16) || (num == 32)); + first = ics_find_free_block(ics, num, num); + } else { + first = ics_find_free_block(ics, num, 1); + } + + if (first >= 0) { + for (i = first; i < first + num; ++i) { + ics_set_irq_type(ics, i, lsi); + } + } + first += ics->offset; + + trace_xics_alloc_block(src, first, num, lsi, align); + + return first; +} + +static void ics_free(ICSState *ics, int srcno, int num) +{ + int i; + + for (i = srcno; i < srcno + num; ++i) { + if (ICS_IRQ_FREE(ics, i)) { + trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset); + } + memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); + } +} + +void xics_free(XICSState *icp, int irq, int num) +{ + int src = xics_find_source(icp, irq); + + if (src >= 0) { + ICSState *ics = &icp->ics[src]; + + /* FIXME: implement multiple sources */ + assert(src == 0); + + trace_xics_ics_free(ics - icp->ics, irq, num); + ics_free(ics, irq - ics->offset, num); + } } /* @@ -866,10 +1018,10 @@ static void xics_realize(DeviceState *dev, Error **errp) } /* Registration of global state belongs into realize */ - spapr_rtas_register("ibm,set-xive", rtas_set_xive); - spapr_rtas_register("ibm,get-xive", rtas_get_xive); - spapr_rtas_register("ibm,int-off", rtas_int_off); - spapr_rtas_register("ibm,int-on", rtas_int_on); + spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); + spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive); + spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off); + spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on); spapr_register_hypercall(H_CPPR, h_cppr); spapr_register_hypercall(H_IPI, h_ipi); diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 09476ae34d..20b19e9d4f 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -38,10 +38,6 @@ typedef struct KVMXICSState { XICSState parent_obj; - uint32_t set_xive_token; - uint32_t get_xive_token; - uint32_t int_off_token; - uint32_t int_on_token; int kernel_xics_fd; } KVMXICSState; @@ -224,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id) state |= KVM_XICS_MASKED; } - if (ics->islsi[i]) { + if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { state |= KVM_XICS_LEVEL_SENSITIVE; if (irq->status & XICS_STATUS_ASSERTED) { state |= KVM_XICS_PENDING; @@ -253,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val) int rc; args.irq = srcno + ics->offset; - if (!ics->islsi[srcno]) { + if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) { if (!val) { return; } @@ -271,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev) { ICSState *ics = ICS(dev); int i; + uint8_t flags[ics->nr_irqs]; + + for (i = 0; i < ics->nr_irqs; i++) { + flags[i] = ics->irqs[i].flags; + } memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs); + for (i = 0; i < ics->nr_irqs; i++) { ics->irqs[i].priority = 0xff; ics->irqs[i].saved_priority = 0xff; + ics->irqs[i].flags = flags[i]; } ics_set_kvm_state(ics, 1); @@ -290,7 +293,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp) return; } ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); - ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool)); ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs); } @@ -392,32 +394,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp) goto fail; } - icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy); - icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy); - icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy); - icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy); + spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy); + spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy); + spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy); + spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy); - rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token, - "ibm,set-xive"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive"); goto fail; } - rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token, - "ibm,get-xive"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive"); goto fail; } - rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on"); goto fail; } - rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off"); + rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off"); if (rc < 0) { error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off"); goto fail; diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c index 7437c2e3c3..7b279c4f05 100644 --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -39,6 +39,7 @@ #include "qemu/range.h" #include "sysemu/kvm.h" #include "sysemu/sysemu.h" +#include "hw/misc/vfio.h" /* #define DEBUG_VFIO */ #ifdef DEBUG_VFIO @@ -3649,6 +3650,39 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as) container->iommu_data.type1.initialized = true; + } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) { + ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); + if (ret) { + error_report("vfio: failed to set group container: %m"); + ret = -errno; + goto free_container_exit; + } + + ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU); + if (ret) { + error_report("vfio: failed to set iommu for container: %m"); + ret = -errno; + goto free_container_exit; + } + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_report("vfio: failed to enable container: %m"); + ret = -errno; + goto free_container_exit; + } + + container->iommu_data.type1.listener = vfio_memory_listener; + container->iommu_data.release = vfio_listener_release; + + memory_listener_register(&container->iommu_data.type1.listener, + container->space->as); + } else { error_report("vfio: No available IOMMU models"); ret = -EINVAL; @@ -4318,3 +4352,47 @@ static void register_vfio_pci_dev_type(void) } type_init(register_vfio_pci_dev_type) + +static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param) +{ + VFIOGroup *group; + VFIOContainer *container; + int ret = -1; + + group = vfio_get_group(groupid, as); + if (!group) { + error_report("vfio: group %d not registered", groupid); + return ret; + } + + container = group->container; + if (group->container) { + ret = ioctl(container->fd, req, param); + if (ret < 0) { + error_report("vfio: failed to ioctl container: ret=%d, %s", + ret, strerror(errno)); + } + } + + vfio_put_group(group); + + return ret; +} + +int vfio_container_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param) +{ + /* We allow only certain ioctls to the container */ + switch (req) { + case VFIO_CHECK_EXTENSION: + case VFIO_IOMMU_SPAPR_TCE_GET_INFO: + break; + default: + /* Return an error on unknown requests */ + error_report("vfio: unsupported ioctl %X", req); + return -1; + } + + return vfio_container_do_ioctl(as, groupid, req, param); +} diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index aaa3ff2360..3263e3fe90 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1217,7 +1217,6 @@ static void eepro100_write_mdi(EEPRO100State *s) break; case 1: /* Status Register */ missing("not writable"); - data = s->mdimem[reg]; break; case 2: /* PHY Identification Register (Word 1) */ case 3: /* PHY Identification Register (Word 2) */ @@ -1230,7 +1229,8 @@ static void eepro100_write_mdi(EEPRO100State *s) default: missing("not implemented"); } - s->mdimem[reg] = data; + s->mdimem[reg] &= eepro100_mdi_mask[reg]; + s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg]; } else if (opcode == 2) { /* MDI read */ switch (reg) { diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 00b5e07ddd..e51d753cee 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1542,7 +1542,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); - n->max_queues = MAX(n->nic_conf.queues, 1); + n->max_queues = MAX(n->nic_conf.peers.queues, 1); n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx); n->curr_queues = 1; diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index af49c4667d..6a72ef4814 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -153,8 +153,8 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev) return -1; } - spapr_rtas_register("nvram-fetch", rtas_nvram_fetch); - spapr_rtas_register("nvram-store", rtas_nvram_store); + spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch); + spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store); return 0; } diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c index e72fe2a70b..21f805f314 100644 --- a/hw/pci-host/uninorth.c +++ b/hw/pci-host/uninorth.c @@ -230,7 +230,7 @@ PCIBus *pci_pmac_init(qemu_irq *pic, d = UNI_NORTH_PCI_HOST_BRIDGE(dev); memory_region_init(&d->pci_mmio, OBJECT(d), "pci-mmio", 0x100000000ULL); memory_region_init_alias(&d->pci_hole, OBJECT(d), "pci-hole", &d->pci_mmio, - 0x80000000ULL, 0x70000000ULL); + 0x80000000ULL, 0x10000000ULL); memory_region_add_subregion(address_space_mem, 0x80000000ULL, &d->pci_hole); diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index ea747f0a20..edd44d03e7 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o +ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) +obj-y += spapr_pci_vfio.o +endif # PowerPC 4xx boards obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o obj-y += ppc4xx_pci.o diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index a973c18d88..bb2e75fe1b 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -316,10 +316,15 @@ static int ppce500_load_device_tree(MachineState *machine, * device it finds in the dt as serial output device. And we generate * devices in reverse order to the dt. */ - dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, - soc, mpic, "serial1", 1, false); - dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, - soc, mpic, "serial0", 0, true); + if (serial_hds[1]) { + dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, + soc, mpic, "serial1", 1, false); + } + + if (serial_hds[0]) { + dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, + soc, mpic, "serial0", 0, true); + } snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc, MPC8544_UTIL_OFFSET); diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index e493dc1b4b..89d3cadf19 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -373,18 +373,11 @@ static void ppc_core99_init(MachineState *machine) machine_arch = ARCH_MAC99; } /* init basic PC hardware */ - pci_vga_init(pci_bus); - escc_mem = escc_init(0, pic[0x25], pic[0x24], serial_hds[0], serial_hds[1], ESCC_CLOCK, 4); memory_region_init_alias(escc_bar, NULL, "escc-bar", escc_mem, 0, memory_region_size(escc_mem)); - for(i = 0; i < nb_nics; i++) - pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); - - ide_drive_get(hd, MAX_IDE_BUS); - macio = pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO); dev = DEVICE(macio); qdev_connect_gpio_out(dev, 0, pic[0x19]); /* CUDA */ @@ -395,6 +388,8 @@ static void ppc_core99_init(MachineState *machine) macio_init(macio, pic_mem, escc_bar); /* We only emulate 2 out of 3 IDE controllers for now */ + ide_drive_get(hd, MAX_IDE_BUS); + macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), "ide[0]")); macio_ide_init_drives(macio_ide, hd); @@ -420,8 +415,15 @@ static void ppc_core99_init(MachineState *machine) } } - if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) + pci_vga_init(pci_bus); + + if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) { graphic_depth = 15; + } + + for (i = 0; i < nb_nics; i++) { + pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); + } /* The NewWorld NVRAM is not located in the MacIO device */ dev = qdev_create(NULL, TYPE_MACIO_NVRAM); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 82f183f173..a8ba916970 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -85,16 +85,16 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) +typedef struct sPAPRMachineState sPAPRMachineState; -typedef struct SPAPRMachine SPAPRMachine; #define TYPE_SPAPR_MACHINE "spapr-machine" #define SPAPR_MACHINE(obj) \ - OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE) + OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) /** - * SPAPRMachine: + * sPAPRMachineState: */ -struct SPAPRMachine { +struct sPAPRMachineState { /*< private >*/ MachineState parent_obj; @@ -102,76 +102,8 @@ struct SPAPRMachine { char *kvm_type; }; - sPAPREnvironment *spapr; -int spapr_allocate_irq(int hint, bool lsi) -{ - int irq; - - if (hint) { - irq = hint; - if (hint >= spapr->next_irq) { - spapr->next_irq = hint + 1; - } - /* FIXME: we should probably check for collisions somehow */ - } else { - irq = spapr->next_irq++; - } - - /* Configure irq type */ - if (!xics_get_qirq(spapr->icp, irq)) { - return 0; - } - - xics_set_irq_type(spapr->icp, irq, lsi); - - return irq; -} - -/* - * Allocate block of consequtive IRQs, returns a number of the first. - * If msi==true, aligns the first IRQ number to num. - */ -int spapr_allocate_irq_block(int num, bool lsi, bool msi) -{ - int first = -1; - int i, hint = 0; - - /* - * MSIMesage::data is used for storing VIRQ so - * it has to be aligned to num to support multiple - * MSI vectors. MSI-X is not affected by this. - * The hint is used for the first IRQ, the rest should - * be allocated continuously. - */ - if (msi) { - assert((num == 1) || (num == 2) || (num == 4) || - (num == 8) || (num == 16) || (num == 32)); - hint = (spapr->next_irq + num - 1) & ~(num - 1); - } - - for (i = 0; i < num; ++i) { - int irq; - - irq = spapr_allocate_irq(hint, lsi); - if (!irq) { - return -1; - } - - if (0 == i) { - first = irq; - hint = 0; - } - - /* If the above doesn't create a consecutive block then that's - * an internal bug */ - assert(irq == (first + i)); - } - - return first; -} - static XICSState *try_create_xics(const char *type, int nr_servers, int nr_irqs) { @@ -442,6 +374,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, if (boot_device) { _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); } + if (boot_menu) { + _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu))); + } _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width))); _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height))); _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth))); @@ -956,7 +891,7 @@ static const VMStateDescription vmstate_spapr = { .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32(next_irq, sPAPREnvironment), + VMSTATE_UNUSED(4), /* used to be @next_irq */ /* RTC offset */ VMSTATE_UINT64(rtc_offset, sPAPREnvironment), @@ -1360,7 +1295,6 @@ static void ppc_spapr_init(MachineState *machine) /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads, XICS_IRQS); - spapr->next_irq = XICS_IRQ_BASE; /* init CPUs */ if (cpu_model == NULL) { @@ -1619,14 +1553,14 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, static char *spapr_get_kvm_type(Object *obj, Error **errp) { - SPAPRMachine *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *sm = SPAPR_MACHINE(obj); return g_strdup(sm->kvm_type); } static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) { - SPAPRMachine *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *sm = SPAPR_MACHINE(obj); g_free(sm->kvm_type); sm->kvm_type = g_strdup(value); @@ -1660,7 +1594,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) static const TypeInfo spapr_machine_info = { .name = TYPE_SPAPR_MACHINE, .parent = TYPE_MACHINE, - .instance_size = sizeof(SPAPRMachine), + .instance_size = sizeof(sPAPRMachineState), .instance_init = spapr_machine_initfn, .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { @@ -1669,9 +1603,25 @@ static const TypeInfo spapr_machine_info = { }, }; +static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->name = "pseries-2.1"; + mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1"; + mc->is_default = 0; +} + +static const TypeInfo spapr_machine_2_1_info = { + .name = TYPE_SPAPR_MACHINE "2.1", + .parent = TYPE_SPAPR_MACHINE, + .class_init = spapr_machine_2_1_class_init, +}; + static void spapr_machine_register_types(void) { type_register_static(&spapr_machine_info); + type_register_static(&spapr_machine_2_1_info); } type_init(spapr_machine_register_types) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 16fa49e886..1b6157dec4 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -314,8 +314,9 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, void spapr_events_init(sPAPREnvironment *spapr) { - spapr->epow_irq = spapr_allocate_msi(0); + spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false); spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); - spapr_rtas_register("check-exception", check_exception); + spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", + check_exception); } diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 9e49ec4a5c..698ae60953 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -118,7 +118,8 @@ static int spapr_tce_table_realize(DeviceState *dev) tcet->table = kvmppc_create_spapr_tce(tcet->liobn, tcet->nb_table << tcet->page_shift, - &tcet->fd); + &tcet->fd, + tcet->vfio_accel); } if (!tcet->table) { @@ -142,7 +143,8 @@ static int spapr_tce_table_realize(DeviceState *dev) sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, - uint32_t nb_table) + uint32_t nb_table, + bool vfio_accel) { sPAPRTCETable *tcet; @@ -161,6 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, tcet->bus_offset = bus_offset; tcet->page_shift = page_shift; tcet->nb_table = nb_table; + tcet->vfio_accel = vfio_accel; object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 988f8cb858..9ed39a93b7 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, } /* - * Find an entry with config_addr or returns the empty one if not found AND - * alloc_new is set. - * At the moment the msi_table entries are never released so there is - * no point to look till the end of the list if we need to find the free entry. - */ -static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr, - bool alloc_new) -{ - int i; - - for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) { - if (!phb->msi_table[i].nvec) { - break; - } - if (phb->msi_table[i].config_addr == config_addr) { - return i; - } - } - if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) { - trace_spapr_pci_msi("Allocating new MSI config", i, config_addr); - return i; - } - - return -1; -} - -/* * Set MSI/MSIX message data. * This is required for msi_notify()/msix_notify() which * will write at the addresses via spapr_msi_write(). + * + * If hwaddr == 0, all entries will have .data == first_irq i.e. + * table will be reset. */ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, unsigned first_irq, unsigned req_num) @@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, return; } - for (i = 0; i < req_num; ++i, ++msg.data) { + for (i = 0; i < req_num; ++i) { msix_set_message(pdev, i, msg); trace_spapr_pci_msi_setup(pdev->name, i, msg.address); + if (addr) { + ++msg.data; + } } } @@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ unsigned int seq_num = rtas_ld(args, 5); unsigned int ret_intr_type; - int ndev, irq, max_irqs = 0; + unsigned int irq, max_irqs = 0, num = 0; sPAPRPHBState *phb = NULL; PCIDevice *pdev = NULL; + bool msix = false; + spapr_pci_msi *msi; + int *config_addr_key; switch (func) { case RTAS_CHANGE_MSI_FN: @@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, /* Releasing MSIs */ if (!req_num) { - ndev = spapr_msicfg_find(phb, config_addr, false); - if (ndev < 0) { - trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr); + msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + if (!msi) { + trace_spapr_pci_msi("Releasing wrong config", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); return; } - trace_spapr_pci_msi("Released MSIs", ndev, config_addr); + + xics_free(spapr->icp, msi->first_irq, msi->num); + spapr_msi_setmsg(pdev, 0, msix, 0, num); + g_hash_table_remove(phb->msi, &config_addr); + + trace_spapr_pci_msi("Released MSIs", config_addr); rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, 0); return; @@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, /* Enabling MSI */ - /* Find a device number in the map to add or reuse the existing one */ - ndev = spapr_msicfg_find(phb, config_addr, true); - if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) { - error_report("No free entry for a new MSI device"); - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); - return; - } - trace_spapr_pci_msi("Configuring MSI", ndev, config_addr); - /* Check if the device supports as many IRQs as requested */ if (ret_intr_type == RTAS_TYPE_MSI) { max_irqs = msi_nr_vectors_allocated(pdev); @@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, max_irqs = pdev->msix_entries_nr; } if (!max_irqs) { - error_report("Requested interrupt type %d is not enabled for device#%d", - ret_intr_type, ndev); + error_report("Requested interrupt type %d is not enabled for device %x", + ret_intr_type, config_addr); rtas_st(rets, 0, -1); /* Hardware error */ return; } /* Correct the number if the guest asked for too many */ if (req_num > max_irqs) { + trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs); req_num = max_irqs; + irq = 0; /* to avoid misleading trace */ + goto out; } - /* Check if there is an old config and MSI number has not changed */ - if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) { - /* Unexpected behaviour */ - error_report("Cannot reuse MSI config for device#%d", ndev); + /* Allocate MSIs */ + irq = xics_alloc_block(spapr->icp, 0, req_num, false, + ret_intr_type == RTAS_TYPE_MSI); + if (!irq) { + error_report("Cannot allocate MSIs for device %x", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); return; } - /* There is no cached config, allocate MSIs */ - if (!phb->msi_table[ndev].nvec) { - irq = spapr_allocate_irq_block(req_num, false, - ret_intr_type == RTAS_TYPE_MSI); - if (irq < 0) { - error_report("Cannot allocate MSIs for device#%d", ndev); - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); - return; - } - phb->msi_table[ndev].irq = irq; - phb->msi_table[ndev].nvec = req_num; - phb->msi_table[ndev].config_addr = config_addr; - } - /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX, - phb->msi_table[ndev].irq, req_num); + irq, req_num); + /* Add MSI device to cache */ + msi = g_new(spapr_pci_msi, 1); + msi->first_irq = irq; + msi->num = req_num; + config_addr_key = g_new(int, 1); + *config_addr_key = config_addr; + g_hash_table_insert(phb->msi, config_addr_key, msi); + +out: rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, req_num); rtas_st(rets, 2, ++seq_num); rtas_st(rets, 3, ret_intr_type); - trace_spapr_pci_rtas_ibm_change_msi(func, req_num); + trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq); } static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, @@ -395,25 +372,28 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, uint32_t config_addr = rtas_ld(args, 0); uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); - int ndev; sPAPRPHBState *phb = NULL; + PCIDevice *pdev = NULL; + spapr_pci_msi *msi; - /* Fins sPAPRPHBState */ + /* Find sPAPRPHBState */ phb = find_phb(spapr, buid); - if (!phb) { + if (phb) { + pdev = find_dev(spapr, buid, config_addr); + } + if (!phb || !pdev) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } /* Find device descriptor and start IRQ */ - ndev = spapr_msicfg_find(phb, config_addr, false); - if (ndev < 0) { - trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr); + msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) { + trace_spapr_pci_msi("Failed to return vector", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); return; } - - intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num; + intr_src_num = msi->first_irq + ioa_intr_num; trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num, intr_src_num); @@ -634,7 +614,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) for (i = 0; i < PCI_NUM_PINS; i++) { uint32_t irq; - irq = spapr_allocate_lsi(0); + irq = xics_alloc_block(spapr->icp, 0, 1, true, false); if (!irq) { error_setg(errp, "spapr_allocate_lsi failed"); return; @@ -649,6 +629,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } info->finish_realize(sphb, errp); + + sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); } static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) @@ -658,7 +640,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, 0, SPAPR_TCE_PAGE_SHIFT, - 0x40000000 >> SPAPR_TCE_PAGE_SHIFT); + 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false); if (!tcet) { error_setg(errp, "Unable to create TCE table for %s", sphb->dtbusname); @@ -712,22 +694,69 @@ static const VMStateDescription vmstate_spapr_pci_lsi = { }; static const VMStateDescription vmstate_spapr_pci_msi = { - .name = "spapr_pci/lsi", + .name = "spapr_pci/msi", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32(config_addr, struct spapr_pci_msi), - VMSTATE_UINT32(irq, struct spapr_pci_msi), - VMSTATE_UINT32(nvec, struct spapr_pci_msi), - + .fields = (VMStateField []) { + VMSTATE_UINT32(key, spapr_pci_msi_mig), + VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig), + VMSTATE_UINT32(value.num, spapr_pci_msi_mig), VMSTATE_END_OF_LIST() }, }; +static void spapr_pci_pre_save(void *opaque) +{ + sPAPRPHBState *sphb = opaque; + GHashTableIter iter; + gpointer key, value; + int i; + + if (sphb->msi_devs) { + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + } + sphb->msi_devs_num = g_hash_table_size(sphb->msi); + if (!sphb->msi_devs_num) { + return; + } + sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig)); + + g_hash_table_iter_init(&iter, sphb->msi); + for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { + sphb->msi_devs[i].key = *(uint32_t *) key; + sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + } +} + +static int spapr_pci_post_load(void *opaque, int version_id) +{ + sPAPRPHBState *sphb = opaque; + gpointer key, value; + int i; + + for (i = 0; i < sphb->msi_devs_num; ++i) { + key = g_memdup(&sphb->msi_devs[i].key, + sizeof(sphb->msi_devs[i].key)); + value = g_memdup(&sphb->msi_devs[i].value, + sizeof(sphb->msi_devs[i].value)); + g_hash_table_insert(sphb->msi, key, value); + } + if (sphb->msi_devs) { + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + } + sphb->msi_devs_num = 0; + + return 0; +} + static const VMStateDescription vmstate_spapr_pci = { .name = "spapr_pci", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .pre_save = spapr_pci_pre_save, + .post_load = spapr_pci_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState), VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState), @@ -737,9 +766,9 @@ static const VMStateDescription vmstate_spapr_pci = { VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState), VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0, vmstate_spapr_pci_lsi, struct spapr_pci_lsi), - VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0, - vmstate_spapr_pci_msi, struct spapr_pci_msi), - + VMSTATE_INT32(msi_devs_num, sPAPRPHBState), + VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0, + vmstate_spapr_pci_msi, spapr_pci_msi_mig), VMSTATE_END_OF_LIST() }, }; @@ -909,14 +938,20 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, void spapr_pci_rtas_init(void) { - spapr_rtas_register("read-pci-config", rtas_read_pci_config); - spapr_rtas_register("write-pci-config", rtas_write_pci_config); - spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config); - spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config); + spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config", + rtas_read_pci_config); + spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config", + rtas_write_pci_config); + spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config", + rtas_ibm_read_pci_config); + spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config", + rtas_ibm_write_pci_config); if (msi_supported) { - spapr_rtas_register("ibm,query-interrupt-source-number", + spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER, + "ibm,query-interrupt-source-number", rtas_ibm_query_interrupt_source_number); - spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi); + spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi", + rtas_ibm_change_msi); } } diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c new file mode 100644 index 0000000000..d3bddf2887 --- /dev/null +++ b/hw/ppc/spapr_pci_vfio.c @@ -0,0 +1,102 @@ +/* + * QEMU sPAPR PCI host for VFIO + * + * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "linux/vfio.h" +#include "hw/misc/vfio.h" + +static Property spapr_phb_vfio_properties[] = { + DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) +{ + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); + struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) }; + int ret; + sPAPRTCETable *tcet; + uint32_t liobn = svphb->phb.dma_liobn; + + if (svphb->iommugroupid == -1) { + error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); + return; + } + + ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + VFIO_CHECK_EXTENSION, + (void *) VFIO_SPAPR_TCE_IOMMU); + if (ret != 1) { + error_setg_errno(errp, -ret, + "spapr-vfio: SPAPR extension is not supported"); + return; + } + + ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, -ret, + "spapr-vfio: get info from container failed"); + return; + } + + tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start, + SPAPR_TCE_PAGE_SHIFT, + info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT, + true); + if (!tcet) { + error_setg(errp, "spapr-vfio: failed to create VFIO TCE table"); + return; + } + + /* Register default 32bit DMA window */ + memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset, + spapr_tce_get_iommu(tcet)); +} + +static void spapr_phb_vfio_reset(DeviceState *qdev) +{ + /* Do nothing */ +} + +static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); + + dc->props = spapr_phb_vfio_properties; + dc->reset = spapr_phb_vfio_reset; + spc->finish_realize = spapr_phb_vfio_finish_realize; +} + +static const TypeInfo spapr_phb_vfio_info = { + .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE, + .parent = TYPE_SPAPR_PCI_HOST_BRIDGE, + .instance_size = sizeof(sPAPRPHBVFIOState), + .class_init = spapr_phb_vfio_class_init, + .class_size = sizeof(sPAPRPHBClass), +}; + +static void spapr_pci_vfio_register_types(void) +{ + type_register_static(&spapr_phb_vfio_info); +} + +type_init(spapr_pci_vfio_register_types) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 8d08539baa..9ba1ba69f9 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -36,9 +36,6 @@ #include <libfdt.h> -#define TOKEN_BASE 0x2000 -#define TOKEN_MAX 0x100 - static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -225,8 +222,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, env->msr = 0; } -#define DIAGNOSTICS_RUN_MODE 42 - static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, @@ -236,16 +231,28 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, target_ulong parameter = rtas_ld(args, 0); target_ulong buffer = rtas_ld(args, 1); target_ulong length = rtas_ld(args, 2); - target_ulong ret = RTAS_OUT_NOT_SUPPORTED; + target_ulong ret = RTAS_OUT_SUCCESS; switch (parameter) { - case DIAGNOSTICS_RUN_MODE: - if (length == 1) { - rtas_st(buffer, 0, 0); - ret = RTAS_OUT_SUCCESS; - } + case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: { + char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d", + max_cpus, smp_cpus); + rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val)); + g_free(param_val); break; } + case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: { + uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED; + + rtas_st_buffer(buffer, length, ¶m_val, sizeof(param_val)); + break; + } + case RTAS_SYSPARM_UUID: + rtas_st_buffer(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0)); + break; + default: + ret = RTAS_OUT_NOT_SUPPORTED; + } rtas_st(rets, 0, ret); } @@ -260,7 +267,9 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, target_ulong ret = RTAS_OUT_NOT_SUPPORTED; switch (parameter) { - case DIAGNOSTICS_RUN_MODE: + case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: + case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: + case RTAS_SYSPARM_UUID: ret = RTAS_OUT_NOT_AUTHORIZED; break; } @@ -271,17 +280,14 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, static struct rtas_call { const char *name; spapr_rtas_fn fn; -} rtas_table[TOKEN_MAX]; - -static struct rtas_call *rtas_next = rtas_table; +} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE]; target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - if ((token >= TOKEN_BASE) - && ((token - TOKEN_BASE) < TOKEN_MAX)) { - struct rtas_call *call = rtas_table + (token - TOKEN_BASE); + if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) { + struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE); if (call->fn) { call->fn(cpu, spapr, token, nargs, args, nret, rets); @@ -303,23 +309,22 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -int spapr_rtas_register(const char *name, spapr_rtas_fn fn) +void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn) { - int i; - - for (i = 0; i < (rtas_next - rtas_table); i++) { - if (strcmp(name, rtas_table[i].name) == 0) { - fprintf(stderr, "RTAS call \"%s\" registered twice\n", name); - exit(1); - } + if (!((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX))) { + fprintf(stderr, "RTAS invalid token 0x%x\n", token); + exit(1); } - assert(rtas_next < (rtas_table + TOKEN_MAX)); - - rtas_next->name = name; - rtas_next->fn = fn; + token -= RTAS_TOKEN_BASE; + if (rtas_table[token].name) { + fprintf(stderr, "RTAS call \"%s\" is registered already as 0x%x\n", + rtas_table[token].name, token); + exit(1); + } - return (rtas_next++ - rtas_table) + TOKEN_BASE; + rtas_table[token].name = name; + rtas_table[token].fn = fn; } int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, @@ -359,7 +364,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, return ret; } - for (i = 0; i < TOKEN_MAX; i++) { + for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) { struct rtas_call *call = &rtas_table[i]; if (!call->name) { @@ -367,7 +372,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, } ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name, - i + TOKEN_BASE); + i + RTAS_TOKEN_BASE); if (ret < 0) { fprintf(stderr, "Couldn't add rtas token for %s: %s\n", call->name, fdt_strerror(ret)); @@ -380,18 +385,24 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, static void core_rtas_register_types(void) { - spapr_rtas_register("display-character", rtas_display_character); - spapr_rtas_register("get-time-of-day", rtas_get_time_of_day); - spapr_rtas_register("set-time-of-day", rtas_set_time_of_day); - spapr_rtas_register("power-off", rtas_power_off); - spapr_rtas_register("system-reboot", rtas_system_reboot); - spapr_rtas_register("query-cpu-stopped-state", + spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", + rtas_display_character); + spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day", + rtas_get_time_of_day); + spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day", + rtas_set_time_of_day); + spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off); + spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot", + rtas_system_reboot); + spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state", rtas_query_cpu_stopped_state); - spapr_rtas_register("start-cpu", rtas_start_cpu); - spapr_rtas_register("stop-self", rtas_stop_self); - spapr_rtas_register("ibm,get-system-parameter", + spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu); + spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self); + spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER, + "ibm,get-system-parameter", rtas_ibm_get_system_parameter); - spapr_rtas_register("ibm,set-system-parameter", + spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER, + "ibm,set-system-parameter", rtas_ibm_set_system_parameter); } diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 04e16ae04d..dc9e46a7b1 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -449,7 +449,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) dev->qdev.id = id; } - dev->irq = spapr_allocate_msi(dev->irq); + dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false); if (!dev->irq) { return -1; } @@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) 0, SPAPR_TCE_PAGE_SHIFT, pc->rtce_window_size >> - SPAPR_TCE_PAGE_SHIFT); + SPAPR_TCE_PAGE_SHIFT, false); address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id); } @@ -517,8 +517,9 @@ VIOsPAPRBus *spapr_vio_bus_init(void) spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq); /* RTAS calls */ - spapr_rtas_register("ibm,set-tce-bypass", rtas_set_tce_bypass); - spapr_rtas_register("quiesce", rtas_quiesce); + spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass", + rtas_set_tce_bypass); + spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce); return bus; } diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c index 4aebd34924..9f607d42bb 100644 --- a/hw/watchdog/watchdog.c +++ b/hw/watchdog/watchdog.c @@ -106,7 +106,7 @@ int select_watchdog_action(const char *p) */ void watchdog_perform_action(void) { - switch(watchdog_action) { + switch (watchdog_action) { case WDT_RESET: /* same as 'system_reset' in monitor */ qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort); qemu_system_reset_request(); diff --git a/hw/xtensa/Makefile.objs b/hw/xtensa/Makefile.objs index 6ead7820c4..cb77dc3793 100644 --- a/hw/xtensa/Makefile.objs +++ b/hw/xtensa/Makefile.objs @@ -1,3 +1,3 @@ obj-y += pic_cpu.o -obj-y += xtensa_sim.o -obj-y += xtensa_lx60.o +obj-y += sim.o +obj-y += xtfpga.o diff --git a/hw/xtensa/bootparam.h b/hw/xtensa/bootparam.h new file mode 100644 index 0000000000..955f4e86e3 --- /dev/null +++ b/hw/xtensa/bootparam.h @@ -0,0 +1,49 @@ +#ifndef HW_XTENSA_BOOTPARAM +#define HW_XTENSA_BOOTPARAM + +#define BP_TAG_COMMAND_LINE 0x1001 /* command line (0-terminated string)*/ +#define BP_TAG_INITRD 0x1002 /* ramdisk addr and size (bp_meminfo) */ +#define BP_TAG_MEMORY 0x1003 /* memory addr and size (bp_meminfo) */ +#define BP_TAG_SERIAL_BAUDRATE 0x1004 /* baud rate of current console. */ +#define BP_TAG_SERIAL_PORT 0x1005 /* serial device of current console */ +#define BP_TAG_FDT 0x1006 /* flat device tree addr */ + +#define BP_TAG_FIRST 0x7B0B /* first tag with a version number */ +#define BP_TAG_LAST 0x7E0B /* last tag */ + +typedef struct BpTag { + uint16_t tag; + uint16_t size; +} BpTag; + +typedef struct BpMemInfo { + uint32_t type; + uint32_t start; + uint32_t end; +} BpMemInfo; + +#define MEMORY_TYPE_CONVENTIONAL 0x1000 +#define MEMORY_TYPE_NONE 0x2000 + +static inline size_t get_tag_size(size_t data_size) +{ + return data_size + sizeof(BpTag) + 4; +} + +static inline ram_addr_t put_tag(ram_addr_t addr, uint16_t tag, + size_t size, const void *data) +{ + BpTag bp_tag = { + .tag = tswap16(tag), + .size = tswap16((size + 3) & ~3), + }; + + cpu_physical_memory_write(addr, &bp_tag, sizeof(bp_tag)); + addr += sizeof(bp_tag); + cpu_physical_memory_write(addr, data, size); + addr += (size + 3) & ~3; + + return addr; +} + +#endif diff --git a/hw/xtensa/xtensa_sim.c b/hw/xtensa/sim.c index 89da43c160..9642bf54c7 100644 --- a/hw/xtensa/xtensa_sim.c +++ b/hw/xtensa/sim.c @@ -31,6 +31,7 @@ #include "elf.h" #include "exec/memory.h" #include "exec/address-spaces.h" +#include "qemu/error-report.h" static uint64_t translate_phys_addr(void *opaque, uint64_t addr) { @@ -63,8 +64,9 @@ static void xtensa_sim_init(MachineState *machine) for (n = 0; n < smp_cpus; n++) { cpu = cpu_xtensa_init(cpu_model); if (cpu == NULL) { - fprintf(stderr, "Unable to find CPU definition\n"); - exit(1); + error_report("unable to find CPU definition '%s'\n", + cpu_model); + exit(EXIT_FAILURE); } env = &cpu->env; diff --git a/hw/xtensa/xtensa_bootparam.h b/hw/xtensa/xtensa_bootparam.h deleted file mode 100644 index 38ef32bdb6..0000000000 --- a/hw/xtensa/xtensa_bootparam.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef HW_XTENSA_BOOTPARAM -#define HW_XTENSA_BOOTPARAM - -typedef struct BpTag { - uint16_t tag; - uint16_t size; -} BpTag; - -static inline ram_addr_t put_tag(ram_addr_t addr, uint16_t tag, - size_t size, const void *data) -{ - BpTag bp_tag = { - .tag = tswap16(tag), - .size = tswap16((size + 3) & ~3), - }; - - cpu_physical_memory_write(addr, &bp_tag, sizeof(bp_tag)); - addr += sizeof(bp_tag); - cpu_physical_memory_write(addr, data, size); - addr += (size + 3) & ~3; - - return addr; -} - -#endif diff --git a/hw/xtensa/xtensa_lx60.c b/hw/xtensa/xtfpga.c index 507dd88452..a2dff5a13e 100644 --- a/hw/xtensa/xtensa_lx60.c +++ b/hw/xtensa/xtfpga.c @@ -37,11 +37,14 @@ #include "hw/block/flash.h" #include "sysemu/blockdev.h" #include "sysemu/char.h" -#include "xtensa_bootparam.h" +#include "sysemu/device_tree.h" +#include "qemu/error-report.h" +#include "bootparam.h" typedef struct LxBoardDesc { hwaddr flash_base; size_t flash_size; + size_t flash_boot_base; size_t flash_sector_size; size_t sram_size; } LxBoardDesc; @@ -172,9 +175,12 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine) MemoryRegion *ram, *rom, *system_io; DriveInfo *dinfo; pflash_t *flash = NULL; + QemuOpts *machine_opts = qemu_get_machine_opts(); const char *cpu_model = machine->cpu_model; - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; + const char *kernel_filename = qemu_opt_get(machine_opts, "kernel"); + const char *kernel_cmdline = qemu_opt_get(machine_opts, "append"); + const char *dtb_filename = qemu_opt_get(machine_opts, "dtb"); + const char *initrd_filename = qemu_opt_get(machine_opts, "initrd"); int n; if (!cpu_model) { @@ -184,8 +190,9 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine) for (n = 0; n < smp_cpus; n++) { cpu = cpu_xtensa_init(cpu_model); if (cpu == NULL) { - fprintf(stderr, "Unable to find CPU definition\n"); - exit(1); + error_report("unable to find CPU definition '%s'\n", + cpu_model); + exit(EXIT_FAILURE); } env = &cpu->env; @@ -226,39 +233,117 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine) board->flash_size / board->flash_sector_size, 4, 0x0000, 0x0000, 0x0000, 0x0000, be); if (flash == NULL) { - fprintf(stderr, "Unable to mount pflash\n"); - exit(1); + error_report("unable to mount pflash\n"); + exit(EXIT_FAILURE); } } /* Use presence of kernel file name as 'boot from SRAM' switch. */ if (kernel_filename) { + uint32_t entry_point = env->pc; + size_t bp_size = 3 * get_tag_size(0); /* first/last and memory tags */ + uint32_t tagptr = 0xfe000000 + board->sram_size; + uint32_t cur_tagptr; + BpMemInfo memory_location = { + .type = tswap32(MEMORY_TYPE_CONVENTIONAL), + .start = tswap32(0), + .end = tswap32(machine->ram_size), + }; + uint32_t lowmem_end = machine->ram_size < 0x08000000 ? + machine->ram_size : 0x08000000; + uint32_t cur_lowmem = QEMU_ALIGN_UP(lowmem_end / 2, 4096); + rom = g_malloc(sizeof(*rom)); memory_region_init_ram(rom, NULL, "lx60.sram", board->sram_size); vmstate_register_ram_global(rom); memory_region_add_subregion(system_memory, 0xfe000000, rom); - /* Put kernel bootparameters to the end of that SRAM */ if (kernel_cmdline) { - size_t cmdline_size = strlen(kernel_cmdline) + 1; - size_t bp_size = sizeof(BpTag[4]) + cmdline_size; - uint32_t tagptr = (0xfe000000 + board->sram_size - bp_size) & ~0xff; + bp_size += get_tag_size(strlen(kernel_cmdline) + 1); + } + if (dtb_filename) { + bp_size += get_tag_size(sizeof(uint32_t)); + } + if (initrd_filename) { + bp_size += get_tag_size(sizeof(BpMemInfo)); + } - env->regs[2] = tagptr; + /* Put kernel bootparameters to the end of that SRAM */ + tagptr = (tagptr - bp_size) & ~0xff; + cur_tagptr = put_tag(tagptr, BP_TAG_FIRST, 0, NULL); + cur_tagptr = put_tag(cur_tagptr, BP_TAG_MEMORY, + sizeof(memory_location), &memory_location); + + if (kernel_cmdline) { + cur_tagptr = put_tag(cur_tagptr, BP_TAG_COMMAND_LINE, + strlen(kernel_cmdline) + 1, kernel_cmdline); + } + if (dtb_filename) { + int fdt_size; + void *fdt = load_device_tree(dtb_filename, &fdt_size); + uint32_t dtb_addr = tswap32(cur_lowmem); + + if (!fdt) { + error_report("could not load DTB '%s'\n", dtb_filename); + exit(EXIT_FAILURE); + } - tagptr = put_tag(tagptr, 0x7b0b, 0, NULL); - if (cmdline_size > 1) { - tagptr = put_tag(tagptr, 0x1001, - cmdline_size, kernel_cmdline); + cpu_physical_memory_write(cur_lowmem, fdt, fdt_size); + cur_tagptr = put_tag(cur_tagptr, BP_TAG_FDT, + sizeof(dtb_addr), &dtb_addr); + cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + fdt_size, 4096); + } + if (initrd_filename) { + BpMemInfo initrd_location = { 0 }; + int initrd_size = load_ramdisk(initrd_filename, cur_lowmem, + lowmem_end - cur_lowmem); + + if (initrd_size < 0) { + initrd_size = load_image_targphys(initrd_filename, + cur_lowmem, + lowmem_end - cur_lowmem); + } + if (initrd_size < 0) { + error_report("could not load initrd '%s'\n", initrd_filename); + exit(EXIT_FAILURE); } - tagptr = put_tag(tagptr, 0x7e0b, 0, NULL); + initrd_location.start = tswap32(cur_lowmem); + initrd_location.end = tswap32(cur_lowmem + initrd_size); + cur_tagptr = put_tag(cur_tagptr, BP_TAG_INITRD, + sizeof(initrd_location), &initrd_location); + cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + initrd_size, 4096); } + cur_tagptr = put_tag(cur_tagptr, BP_TAG_LAST, 0, NULL); + env->regs[2] = tagptr; + uint64_t elf_entry; uint64_t elf_lowaddr; int success = load_elf(kernel_filename, translate_phys_addr, cpu, &elf_entry, &elf_lowaddr, NULL, be, ELF_MACHINE, 0); if (success > 0) { - env->pc = elf_entry; + entry_point = elf_entry; + } else { + hwaddr ep; + int is_linux; + success = load_uimage(kernel_filename, &ep, NULL, &is_linux); + if (success > 0 && is_linux) { + entry_point = ep; + } else { + error_report("could not load kernel '%s'\n", + kernel_filename); + exit(EXIT_FAILURE); + } + } + if (entry_point != env->pc) { + static const uint8_t jx_a0[] = { +#ifdef TARGET_WORDS_BIGENDIAN + 0x0a, 0, 0, +#else + 0xa0, 0, 0, +#endif + }; + env->regs[0] = entry_point; + cpu_physical_memory_write(env->pc, jx_a0, sizeof(jx_a0)); } } else { if (flash) { @@ -266,9 +351,9 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine) MemoryRegion *flash_io = g_malloc(sizeof(*flash_io)); memory_region_init_alias(flash_io, NULL, "lx60.flash", - flash_mr, 0, - board->flash_size < 0x02000000 ? - board->flash_size : 0x02000000); + flash_mr, board->flash_boot_base, + board->flash_size - board->flash_boot_base < 0x02000000 ? + board->flash_size - board->flash_boot_base : 0x02000000); memory_region_add_subregion(system_memory, 0xfe000000, flash_io); } @@ -313,6 +398,7 @@ static void xtensa_kc705_init(MachineState *machine) static const LxBoardDesc kc705_board = { .flash_base = 0xf0000000, .flash_size = 0x08000000, + .flash_boot_base = 0x06000000, .flash_sector_size = 0x20000, .sram_size = 0x2000000, }; diff --git a/include/block/block.h b/include/block/block.h index d0baf4fb83..7e92f549fb 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -175,6 +175,7 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MIRROR, BLOCK_OP_TYPE_RESIZE, BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, BLOCK_OP_TYPE_MAX, } BlockOpType; @@ -213,7 +214,7 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); -void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); +int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); int bdrv_open(BlockDriverState **pbs, const char *filename, const char *reference, QDict *options, int flags, BlockDriver *drv, Error **errp); @@ -314,6 +315,9 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, BlockDriverState *candidate); bool bdrv_is_first_non_filter(BlockDriverState *candidate); +/* check if a named node can be replaced when doing drive-mirror */ +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp); + /* async block I/O */ typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, int sector_num); diff --git a/include/block/block_int.h b/include/block/block_int.h index 715c761fad..53e77cf11e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -100,6 +100,9 @@ struct BlockDriver { */ bool bdrv_needs_filename; + /* Set if a driver can support backing files */ + bool supports_backing; + /* For handling image reopen for split or non-split files */ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); @@ -486,6 +489,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * mirror_start: * @bs: Block device to operate on. * @target: Block device to write to. + * @replaces: Block graph node name to replace once the mirror is done. Can + * only be used when full mirroring is selected. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @granularity: The chosen granularity for the dirty bitmap. * @buf_size: The amount of data that can be in flight at one time. @@ -502,6 +507,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * @bs will be switched to read from @target. */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, + const char *replaces, int64_t speed, int64_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, diff --git a/include/block/blockjob.h b/include/block/blockjob.h index e443987ea8..f3cf63f9f5 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -147,6 +147,14 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); /** + * block_job_yield: + * @job: The job that calls the function. + * + * Yield the block job coroutine. + */ +void block_job_yield(BlockJob *job); + +/** * block_job_completed: * @job: The job being completed. * @ret: The status code. @@ -217,7 +225,7 @@ void block_job_pause(BlockJob *job); void block_job_resume(BlockJob *job); /** - * block_job_event_cancle: + * block_job_event_cancelled: * @job: The job whose information is requested. * * Send a BLOCK_JOB_CANCELLED event for the specified job. diff --git a/include/block/qapi.h b/include/block/qapi.h index e92c00daf6..03745464d6 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -39,7 +39,6 @@ void bdrv_query_image_info(BlockDriverState *bs, void bdrv_query_info(BlockDriverState *bs, BlockInfo **p_info, Error **errp); -BlockStats *bdrv_query_stats(const BlockDriverState *bs); void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f, QEMUSnapshotInfo *sn); diff --git a/include/hw/misc/vfio.h b/include/hw/misc/vfio.h new file mode 100644 index 0000000000..0b26cd8e11 --- /dev/null +++ b/include/hw/misc/vfio.h @@ -0,0 +1,9 @@ +#ifndef VFIO_API_H +#define VFIO_API_H + +#include "qemu/typedefs.h" + +extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, + int req, void *param); + +#endif diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 0934518bbd..32f0aa7d10 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -27,13 +27,15 @@ #include "hw/pci/pci_host.h" #include "hw/ppc/xics.h" -#define SPAPR_MSIX_MAX_DEVS 32 - #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge" +#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" #define SPAPR_PCI_HOST_BRIDGE(obj) \ OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE) +#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \ + OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE) + #define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \ OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE) #define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \ @@ -41,6 +43,7 @@ typedef struct sPAPRPHBClass sPAPRPHBClass; typedef struct sPAPRPHBState sPAPRPHBState; +typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; struct sPAPRPHBClass { PCIHostBridgeClass parent_class; @@ -48,6 +51,16 @@ struct sPAPRPHBClass { void (*finish_realize)(sPAPRPHBState *sphb, Error **errp); }; +typedef struct spapr_pci_msi { + uint32_t first_irq; + uint32_t num; +} spapr_pci_msi; + +typedef struct spapr_pci_msi_mig { + uint32_t key; + spapr_pci_msi value; +} spapr_pci_msi_mig; + struct sPAPRPHBState { PCIHostState parent_obj; @@ -67,15 +80,20 @@ struct sPAPRPHBState { uint32_t irq; } lsi_table[PCI_NUM_PINS]; - struct spapr_pci_msi { - uint32_t config_addr; - uint32_t irq; - uint32_t nvec; - } msi_table[SPAPR_MSIX_MAX_DEVS]; + GHashTable *msi; + /* Temporary cache for migration purposes */ + int32_t msi_devs_num; + spapr_pci_msi_mig *msi_devs; QLIST_ENTRY(sPAPRPHBState) list; }; +struct sPAPRPHBVFIOState { + sPAPRPHBState phb; + + int32_t iommugroupid; +}; + #define SPAPR_PCI_BASE_BUID 0x800000020000000ULL #define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 08c301f38d..bbba51a703 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -27,7 +27,6 @@ typedef struct sPAPREnvironment { long rtas_size; void *fdt_skel; target_ulong entry_point; - uint32_t next_irq; uint64_t rtc_offset; struct PPCTimebase tb; bool has_graphics; @@ -339,16 +338,6 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, int spapr_allocate_irq(int hint, bool lsi); int spapr_allocate_irq_block(int num, bool lsi, bool msi); -static inline int spapr_allocate_msi(int hint) -{ - return spapr_allocate_irq(hint, false); -} - -static inline int spapr_allocate_lsi(int hint) -{ - return spapr_allocate_irq(hint, true); -} - /* RTAS return codes */ #define RTAS_OUT_SUCCESS 0 #define RTAS_OUT_NO_ERRORS_FOUND 1 @@ -358,6 +347,58 @@ static inline int spapr_allocate_lsi(int hint) #define RTAS_OUT_NOT_SUPPORTED -3 #define RTAS_OUT_NOT_AUTHORIZED -9002 +/* RTAS tokens */ +#define RTAS_TOKEN_BASE 0x2000 + +#define RTAS_DISPLAY_CHARACTER (RTAS_TOKEN_BASE + 0x00) +#define RTAS_GET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x01) +#define RTAS_SET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x02) +#define RTAS_POWER_OFF (RTAS_TOKEN_BASE + 0x03) +#define RTAS_SYSTEM_REBOOT (RTAS_TOKEN_BASE + 0x04) +#define RTAS_QUERY_CPU_STOPPED_STATE (RTAS_TOKEN_BASE + 0x05) +#define RTAS_START_CPU (RTAS_TOKEN_BASE + 0x06) +#define RTAS_STOP_SELF (RTAS_TOKEN_BASE + 0x07) +#define RTAS_IBM_GET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x08) +#define RTAS_IBM_SET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x09) +#define RTAS_IBM_SET_XIVE (RTAS_TOKEN_BASE + 0x0A) +#define RTAS_IBM_GET_XIVE (RTAS_TOKEN_BASE + 0x0B) +#define RTAS_IBM_INT_OFF (RTAS_TOKEN_BASE + 0x0C) +#define RTAS_IBM_INT_ON (RTAS_TOKEN_BASE + 0x0D) +#define RTAS_CHECK_EXCEPTION (RTAS_TOKEN_BASE + 0x0E) +#define RTAS_EVENT_SCAN (RTAS_TOKEN_BASE + 0x0F) +#define RTAS_IBM_SET_TCE_BYPASS (RTAS_TOKEN_BASE + 0x10) +#define RTAS_QUIESCE (RTAS_TOKEN_BASE + 0x11) +#define RTAS_NVRAM_FETCH (RTAS_TOKEN_BASE + 0x12) +#define RTAS_NVRAM_STORE (RTAS_TOKEN_BASE + 0x13) +#define RTAS_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x14) +#define RTAS_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x15) +#define RTAS_IBM_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x16) +#define RTAS_IBM_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x17) +#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER (RTAS_TOKEN_BASE + 0x18) +#define RTAS_IBM_CHANGE_MSI (RTAS_TOKEN_BASE + 0x19) +#define RTAS_SET_INDICATOR (RTAS_TOKEN_BASE + 0x1A) +#define RTAS_SET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1B) +#define RTAS_GET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1C) +#define RTAS_GET_SENSOR_STATE (RTAS_TOKEN_BASE + 0x1D) +#define RTAS_IBM_CONFIGURE_CONNECTOR (RTAS_TOKEN_BASE + 0x1E) +#define RTAS_IBM_OS_TERM (RTAS_TOKEN_BASE + 0x1F) +#define RTAS_IBM_EXTENDED_OS_TERM (RTAS_TOKEN_BASE + 0x20) + +#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x21) + +/* RTAS ibm,get-system-parameter token values */ +#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20 +#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42 +#define RTAS_SYSPARM_UUID 48 + +/* Possible values for the platform-processor-diagnostics-run-mode parameter + * of the RTAS ibm,get-system-parameter call. + */ +#define DIAGNOSTICS_RUN_MODE_DISABLED 0 +#define DIAGNOSTICS_RUN_MODE_STAGGERED 1 +#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2 +#define DIAGNOSTICS_RUN_MODE_PERIODIC 3 + static inline uint64_t ppc64_phys_to_real(uint64_t addr) { return addr & ~0xF000000000000000ULL; @@ -373,11 +414,24 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val) stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val); } + +static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len, + uint8_t *buffer, uint16_t buffer_len) +{ + if (phys_len < 2) { + return; + } + stw_be_phys(&address_space_memory, + ppc64_phys_to_real(phys), buffer_len); + cpu_physical_memory_write(ppc64_phys_to_real(phys + 2), + buffer, MIN(buffer_len, phys_len - 2)); +} + typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); -int spapr_rtas_register(const char *name, spapr_rtas_fn fn); +void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn); target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); @@ -407,6 +461,7 @@ struct sPAPRTCETable { uint32_t page_shift; uint64_t *table; bool bypass; + bool vfio_accel; int fd; MemoryRegion iommu; QLIST_ENTRY(sPAPRTCETable) list; @@ -418,7 +473,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, - uint32_t nb_table); + uint32_t nb_table, + bool vfio_accel); MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet); void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass); int spapr_dma_dt(void *fdt, int node_off, const char *propname, diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 85e4c8a3dd..a214dd7f28 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -136,7 +136,6 @@ struct ICSState { uint32_t nr_irqs; uint32_t offset; qemu_irq *qirqs; - bool *islsi; ICSIRQState *irqs; XICSState *icp; }; @@ -150,12 +149,20 @@ struct ICSIRQState { #define XICS_STATUS_REJECTED 0x4 #define XICS_STATUS_MASKED_PENDING 0x8 uint8_t status; +/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */ +#define XICS_FLAGS_IRQ_LSI 0x1 +#define XICS_FLAGS_IRQ_MSI 0x2 +#define XICS_FLAGS_IRQ_MASK 0x3 + uint8_t flags; }; #define XICS_IRQS 1024 qemu_irq xics_get_qirq(XICSState *icp, int irq); void xics_set_irq_type(XICSState *icp, int irq, bool lsi); +int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi); +int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align); +void xics_free(XICSState *icp, int irq, int num); void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 4bc9b549ad..d0fb26f963 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -17,6 +17,7 @@ #include "hw/virtio/virtio.h" #include "hw/block/block.h" #include "sysemu/iothread.h" +#include "block/block.h" #define TYPE_VIRTIO_BLK "virtio-blk-device" #define VIRTIO_BLK(obj) \ @@ -116,6 +117,7 @@ struct VirtIOBlkConf struct VirtIOBlockDataPlane; +struct VirtIOBlockReq; typedef struct VirtIOBlock { VirtIODevice parent_obj; BlockDriverState *bs; @@ -127,12 +129,29 @@ typedef struct VirtIOBlock { unsigned short sector_mask; bool original_wce; VMChangeStateEntry *change; + /* Function to push to vq and notify guest */ + void (*complete_request)(struct VirtIOBlockReq *req, unsigned char status); #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE Notifier migration_state_notifier; struct VirtIOBlockDataPlane *dataplane; #endif } VirtIOBlock; +typedef struct MultiReqBuffer { + BlockRequest blkreq[32]; + unsigned int num_writes; +} MultiReqBuffer; + +typedef struct VirtIOBlockReq { + VirtIOBlock *dev; + VirtQueueElement *elem; + struct virtio_blk_inhdr *in; + struct virtio_blk_outhdr out; + QEMUIOVector qiov; + struct VirtIOBlockReq *next; + BlockAcctCookie acct; +} VirtIOBlockReq; + #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \ DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) @@ -158,4 +177,8 @@ void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk); int virtio_blk_handle_scsi_req(VirtIOBlock *blk, VirtQueueElement *elem); +void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb); + +void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb); + #endif diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 71a8a95641..9a001bd28f 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -101,6 +101,7 @@ enum VMStateFlags { VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/ VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/ VMS_MUST_EXIST = 0x1000, /* Field must exist in input */ + VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */ }; typedef struct { @@ -429,6 +430,16 @@ extern const VMStateInfo vmstate_info_bitmap; .offset = offsetof(_state, _field), \ } +#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + #define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \ .name = (stringify(_field)), \ .version_id = (_version), \ diff --git a/include/net/net.h b/include/net/net.h index 8b189da5ee..ed594f9bdb 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -24,13 +24,13 @@ struct MACAddr { typedef struct NICPeers { NetClientState *ncs[MAX_QUEUE_NUM]; + int32_t queues; } NICPeers; typedef struct NICConf { MACAddr macaddr; NICPeers peers; int32_t bootindex; - int32_t queues; } NICConf; #define DEFINE_NIC_PROPERTIES(_state, _conf) \ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 1248eda272..60777fecf6 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -736,6 +736,14 @@ enum { QEMU_PPC_FEATURE_TRUE_LE = 0x00000002, QEMU_PPC_FEATURE_PPC_LE = 0x00000001, + + /* Feature definitions in AT_HWCAP2. */ + QEMU_PPC_FEATURE2_ARCH_2_07 = 0x80000000, /* ISA 2.07 */ + QEMU_PPC_FEATURE2_HAS_HTM = 0x40000000, /* Hardware Transactional Memory */ + QEMU_PPC_FEATURE2_HAS_DSCR = 0x20000000, /* Data Stream Control Register */ + QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */ + QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */ + QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */ }; #define ELF_HWCAP get_elf_hwcap() @@ -749,6 +757,8 @@ static uint32_t get_elf_hwcap(void) Altivec/FP/SPE support. Anything else is just a bonus. */ #define GET_FEATURE(flag, feature) \ do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0) +#define GET_FEATURE2(flag, feature) \ + do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0) GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64); GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU); GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC); @@ -757,7 +767,36 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE(PPC_SPE_DOUBLE, QEMU_PPC_FEATURE_HAS_EFP_DOUBLE); GET_FEATURE(PPC_BOOKE, QEMU_PPC_FEATURE_BOOKE); GET_FEATURE(PPC_405_MAC, QEMU_PPC_FEATURE_HAS_4xxMAC); + GET_FEATURE2(PPC2_DFP, QEMU_PPC_FEATURE_HAS_DFP); + GET_FEATURE2(PPC2_VSX, QEMU_PPC_FEATURE_HAS_VSX); + GET_FEATURE2((PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | + PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206), + QEMU_PPC_FEATURE_ARCH_2_06); +#undef GET_FEATURE +#undef GET_FEATURE2 + + return features; +} + +#define ELF_HWCAP2 get_elf_hwcap2() + +static uint32_t get_elf_hwcap2(void) +{ + PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); + uint32_t features = 0; + +#define GET_FEATURE(flag, feature) \ + do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0) +#define GET_FEATURE2(flag, feature) \ + do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0) + + GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL); + GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR); + GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | + PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07); + #undef GET_FEATURE +#undef GET_FEATURE2 return features; } @@ -774,8 +813,9 @@ static uint32_t get_elf_hwcap(void) #define DLINFO_ARCH_ITEMS 5 #define ARCH_DLINFO \ do { \ - NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20); \ - NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20); \ + PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); \ + NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \ + NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \ NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \ /* \ * Now handle glibc compatibility. \ @@ -570,6 +570,7 @@ monitor_qapi_event_throttle(QAPIEvent event, int64_t rate) trace_monitor_protocol_event_throttle(event, rate); evstate->event = event; + assert(rate * SCALE_MS <= INT64_MAX); evstate->rate = rate * SCALE_MS; evstate->last = 0; evstate->data = NULL; @@ -585,9 +586,9 @@ static void monitor_qapi_event_init(void) monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000); monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000); monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000); - /* limit the rate of quorum events to avoid hammering the management */ monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000); monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000); + monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000); qmp_event_set_func_emit(monitor_qapi_event_queue); } diff --git a/net/Makefile.objs b/net/Makefile.objs index 301f6b6b51..a06ba59dad 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o common-obj-y += socket.o common-obj-y += dump.o common-obj-y += eth.o +common-obj-$(CONFIG_LINUX) += l2tpv3.o common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o common-obj-$(CONFIG_LINUX) += tap-linux.o common-obj-$(CONFIG_WIN32) += tap-win32.o diff --git a/net/clients.h b/net/clients.h index 7f3d4ae9f3..2e8fedad8d 100644 --- a/net/clients.h +++ b/net/clients.h @@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, int net_init_bridge(const NetClientOptions *opts, const char *name, NetClientState *peer); +int net_init_l2tpv3(const NetClientOptions *opts, const char *name, + NetClientState *peer); #ifdef CONFIG_VDE int net_init_vde(const NetClientOptions *opts, const char *name, NetClientState *peer); diff --git a/net/l2tpv3.c b/net/l2tpv3.c new file mode 100644 index 0000000000..528d95b641 --- /dev/null +++ b/net/l2tpv3.c @@ -0,0 +1,757 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2012-2014 Cisco Systems + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <linux/ip.h> +#include <netdb.h> +#include "config-host.h" +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + + +/* The buffer size needs to be investigated for optimum numbers and + * optimum means of paging in on different systems. This size is + * chosen to be sufficient to accommodate one packet with some headers + */ + +#define BUFFER_ALIGN sysconf(_SC_PAGESIZE) +#define BUFFER_SIZE 2048 +#define IOVSIZE 2 +#define MAX_L2TPV3_MSGCNT 64 +#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) + +/* Header set to 0x30000 signifies a data packet */ + +#define L2TPV3_DATA_PACKET 0x30000 + +/* IANA-assigned IP protocol ID for L2TPv3 */ + +#ifndef IPPROTO_L2TP +#define IPPROTO_L2TP 0x73 +#endif + +typedef struct NetL2TPV3State { + NetClientState nc; + int fd; + + /* + * these are used for xmit - that happens packet a time + * and for first sign of life packet (easier to parse that once) + */ + + uint8_t *header_buf; + struct iovec *vec; + + /* + * these are used for receive - try to "eat" up to 32 packets at a time + */ + + struct mmsghdr *msgvec; + + /* + * peer address + */ + + struct sockaddr_storage *dgram_dst; + uint32_t dst_size; + + /* + * L2TPv3 parameters + */ + + uint64_t rx_cookie; + uint64_t tx_cookie; + uint32_t rx_session; + uint32_t tx_session; + uint32_t header_size; + uint32_t counter; + + /* + * DOS avoidance in error handling + */ + + bool header_mismatch; + + /* + * Ring buffer handling + */ + + int queue_head; + int queue_tail; + int queue_depth; + + /* + * Precomputed offsets + */ + + uint32_t offset; + uint32_t cookie_offset; + uint32_t counter_offset; + uint32_t session_offset; + + /* Poll Control */ + + bool read_poll; + bool write_poll; + + /* Flags */ + + bool ipv6; + bool udp; + bool has_counter; + bool pin_counter; + bool cookie; + bool cookie_is_64; + +} NetL2TPV3State; + +static int l2tpv3_can_send(void *opaque); +static void net_l2tpv3_send(void *opaque); +static void l2tpv3_writable(void *opaque); + +static void l2tpv3_update_fd_handler(NetL2TPV3State *s) +{ + qemu_set_fd_handler2(s->fd, + s->read_poll ? l2tpv3_can_send : NULL, + s->read_poll ? net_l2tpv3_send : NULL, + s->write_poll ? l2tpv3_writable : NULL, + s); +} + +static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable) +{ + if (s->read_poll != enable) { + s->read_poll = enable; + l2tpv3_update_fd_handler(s); + } +} + +static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable) +{ + if (s->write_poll != enable) { + s->write_poll = enable; + l2tpv3_update_fd_handler(s); + } +} + +static void l2tpv3_writable(void *opaque) +{ + NetL2TPV3State *s = opaque; + l2tpv3_write_poll(s, false); + qemu_flush_queued_packets(&s->nc); +} + +static int l2tpv3_can_send(void *opaque) +{ + NetL2TPV3State *s = opaque; + + return qemu_can_send_packet(&s->nc); +} + +static void l2tpv3_send_completed(NetClientState *nc, ssize_t len) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + l2tpv3_read_poll(s, true); +} + +static void l2tpv3_poll(NetClientState *nc, bool enable) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + l2tpv3_write_poll(s, enable); + l2tpv3_read_poll(s, enable); +} + +static void l2tpv3_form_header(NetL2TPV3State *s) +{ + uint32_t *counter; + + if (s->udp) { + stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET); + } + stl_be_p( + (uint32_t *) (s->header_buf + s->session_offset), + s->tx_session + ); + if (s->cookie) { + if (s->cookie_is_64) { + stq_be_p( + (uint64_t *)(s->header_buf + s->cookie_offset), + s->tx_cookie + ); + } else { + stl_be_p( + (uint32_t *) (s->header_buf + s->cookie_offset), + s->tx_cookie + ); + } + } + if (s->has_counter) { + counter = (uint32_t *)(s->header_buf + s->counter_offset); + if (s->pin_counter) { + *counter = 0; + } else { + stl_be_p(counter, ++s->counter); + } + } +} + +static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, + const struct iovec *iov, + int iovcnt) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct msghdr message; + int ret; + + if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { + error_report( + "iovec too long %d > %d, change l2tpv3.h", + iovcnt, MAX_L2TPV3_IOVCNT + ); + return -1; + } + l2tpv3_form_header(s); + memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); + s->vec->iov_base = s->header_buf; + s->vec->iov_len = s->offset; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = s->vec; + message.msg_iovlen = iovcnt + 1; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + do { + ret = sendmsg(s->fd, &message, 0); + } while ((ret == -1) && (errno == EINTR)); + if (ret > 0) { + ret -= s->offset; + } else if (ret == 0) { + /* belt and braces - should not occur on DGRAM + * we should get an error and never a 0 send + */ + ret = iov_size(iov, iovcnt); + } else { + /* signal upper layer that socket buffer is full */ + ret = -errno; + if (ret == -EAGAIN || ret == -ENOBUFS) { + l2tpv3_write_poll(s, true); + ret = 0; + } + } + return ret; +} + +static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, + const uint8_t *buf, + size_t size) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct iovec *vec; + struct msghdr message; + ssize_t ret = 0; + + l2tpv3_form_header(s); + vec = s->vec; + vec->iov_base = s->header_buf; + vec->iov_len = s->offset; + vec++; + vec->iov_base = (void *) buf; + vec->iov_len = size; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = s->vec; + message.msg_iovlen = 2; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + do { + ret = sendmsg(s->fd, &message, 0); + } while ((ret == -1) && (errno == EINTR)); + if (ret > 0) { + ret -= s->offset; + } else if (ret == 0) { + /* belt and braces - should not occur on DGRAM + * we should get an error and never a 0 send + */ + ret = size; + } else { + ret = -errno; + if (ret == -EAGAIN || ret == -ENOBUFS) { + /* signal upper layer that socket buffer is full */ + l2tpv3_write_poll(s, true); + ret = 0; + } + } + return ret; +} + +static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) +{ + + uint32_t *session; + uint64_t cookie; + + if ((!s->udp) && (!s->ipv6)) { + buf += sizeof(struct iphdr) /* fix for ipv4 raw */; + } + + /* we do not do a strict check for "data" packets as per + * the RFC spec because the pure IP spec does not have + * that anyway. + */ + + if (s->cookie) { + if (s->cookie_is_64) { + cookie = ldq_be_p(buf + s->cookie_offset); + } else { + cookie = ldl_be_p(buf + s->cookie_offset); + } + if (cookie != s->rx_cookie) { + if (!s->header_mismatch) { + error_report("unknown cookie id"); + } + return -1; + } + } + session = (uint32_t *) (buf + s->session_offset); + if (ldl_be_p(session) != s->rx_session) { + if (!s->header_mismatch) { + error_report("session mismatch"); + } + return -1; + } + return 0; +} + +static void net_l2tpv3_process_queue(NetL2TPV3State *s) +{ + int size = 0; + struct iovec *vec; + bool bad_read; + int data_size; + struct mmsghdr *msgvec; + + /* go into ring mode only if there is a "pending" tail */ + if (s->queue_depth > 0) { + do { + msgvec = s->msgvec + s->queue_tail; + if (msgvec->msg_len > 0) { + data_size = msgvec->msg_len - s->header_size; + vec = msgvec->msg_hdr.msg_iov; + if ((data_size > 0) && + (l2tpv3_verify_header(s, vec->iov_base) == 0)) { + vec++; + /* Use the legacy delivery for now, we will + * switch to using our own ring as a queueing mechanism + * at a later date + */ + size = qemu_send_packet_async( + &s->nc, + vec->iov_base, + data_size, + l2tpv3_send_completed + ); + if (size == 0) { + l2tpv3_read_poll(s, false); + } + bad_read = false; + } else { + bad_read = true; + if (!s->header_mismatch) { + /* report error only once */ + error_report("l2tpv3 header verification failed"); + s->header_mismatch = true; + } + } + } else { + bad_read = true; + } + s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT; + s->queue_depth--; + } while ( + (s->queue_depth > 0) && + qemu_can_send_packet(&s->nc) && + ((size > 0) || bad_read) + ); + } +} + +static void net_l2tpv3_send(void *opaque) +{ + NetL2TPV3State *s = opaque; + int target_count, count; + struct mmsghdr *msgvec; + + /* go into ring mode only if there is a "pending" tail */ + + if (s->queue_depth) { + + /* The ring buffer we use has variable intake + * count of how much we can read varies - adjust accordingly + */ + + target_count = MAX_L2TPV3_MSGCNT - s->queue_depth; + + /* Ensure we do not overrun the ring when we have + * a lot of enqueued packets + */ + + if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) { + target_count = MAX_L2TPV3_MSGCNT - s->queue_head; + } + } else { + + /* we do not have any pending packets - we can use + * the whole message vector linearly instead of using + * it as a ring + */ + + s->queue_head = 0; + s->queue_tail = 0; + target_count = MAX_L2TPV3_MSGCNT; + } + + msgvec = s->msgvec + s->queue_head; + if (target_count > 0) { + do { + count = recvmmsg( + s->fd, + msgvec, + target_count, MSG_DONTWAIT, NULL); + } while ((count == -1) && (errno == EINTR)); + if (count < 0) { + /* Recv error - we still need to flush packets here, + * (re)set queue head to current position + */ + count = 0; + } + s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT; + s->queue_depth += count; + } + net_l2tpv3_process_queue(s); +} + +static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount) +{ + int i, j; + struct iovec *iov; + struct mmsghdr *cleanup = msgvec; + if (cleanup) { + for (i = 0; i < count; i++) { + if (cleanup->msg_hdr.msg_iov) { + iov = cleanup->msg_hdr.msg_iov; + for (j = 0; j < iovcount; j++) { + g_free(iov->iov_base); + iov++; + } + g_free(cleanup->msg_hdr.msg_iov); + } + cleanup++; + } + g_free(msgvec); + } +} + +static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count) +{ + int i; + struct iovec *iov; + struct mmsghdr *msgvec, *result; + + msgvec = g_malloc(sizeof(struct mmsghdr) * count); + result = msgvec; + for (i = 0; i < count ; i++) { + msgvec->msg_hdr.msg_name = NULL; + msgvec->msg_hdr.msg_namelen = 0; + iov = g_malloc(sizeof(struct iovec) * IOVSIZE); + msgvec->msg_hdr.msg_iov = iov; + iov->iov_base = g_malloc(s->header_size); + iov->iov_len = s->header_size; + iov++ ; + iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE); + iov->iov_len = BUFFER_SIZE; + msgvec->msg_hdr.msg_iovlen = 2; + msgvec->msg_hdr.msg_control = NULL; + msgvec->msg_hdr.msg_controllen = 0; + msgvec->msg_hdr.msg_flags = 0; + msgvec++; + } + return result; +} + +static void net_l2tpv3_cleanup(NetClientState *nc) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + qemu_purge_queued_packets(nc); + l2tpv3_read_poll(s, false); + l2tpv3_write_poll(s, false); + if (s->fd > 0) { + close(s->fd); + } + destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); + g_free(s->vec); + g_free(s->header_buf); + g_free(s->dgram_dst); +} + +static NetClientInfo net_l2tpv3_info = { + .type = NET_CLIENT_OPTIONS_KIND_L2TPV3, + .size = sizeof(NetL2TPV3State), + .receive = net_l2tpv3_receive_dgram, + .receive_iov = net_l2tpv3_receive_dgram_iov, + .poll = l2tpv3_poll, + .cleanup = net_l2tpv3_cleanup, +}; + +int net_init_l2tpv3(const NetClientOptions *opts, + const char *name, + NetClientState *peer) +{ + + + const NetdevL2TPv3Options *l2tpv3; + NetL2TPV3State *s; + NetClientState *nc; + int fd = -1, gairet; + struct addrinfo hints; + struct addrinfo *result = NULL; + char *srcport, *dstport; + + nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); + + s = DO_UPCAST(NetL2TPV3State, nc, nc); + + s->queue_head = 0; + s->queue_tail = 0; + s->header_mismatch = false; + + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->l2tpv3; + + if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { + s->ipv6 = l2tpv3->ipv6; + } else { + s->ipv6 = false; + } + + if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) { + error_report("l2tpv3_open : offset must be less than 256 bytes"); + goto outerr; + } + + if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) { + if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) { + s->cookie = true; + } else { + goto outerr; + } + } else { + s->cookie = false; + } + + if (l2tpv3->has_cookie64 || l2tpv3->cookie64) { + s->cookie_is_64 = true; + } else { + s->cookie_is_64 = false; + } + + if (l2tpv3->has_udp && l2tpv3->udp) { + s->udp = true; + if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) { + error_report("l2tpv3_open : need both src and dst port for udp"); + goto outerr; + } else { + srcport = l2tpv3->srcport; + dstport = l2tpv3->dstport; + } + } else { + s->udp = false; + srcport = NULL; + dstport = NULL; + } + + + s->offset = 4; + s->session_offset = 0; + s->cookie_offset = 4; + s->counter_offset = 4; + + s->tx_session = l2tpv3->txsession; + if (l2tpv3->has_rxsession) { + s->rx_session = l2tpv3->rxsession; + } else { + s->rx_session = s->tx_session; + } + + if (s->cookie) { + s->rx_cookie = l2tpv3->rxcookie; + s->tx_cookie = l2tpv3->txcookie; + if (s->cookie_is_64 == true) { + /* 64 bit cookie */ + s->offset += 8; + s->counter_offset += 8; + } else { + /* 32 bit cookie */ + s->offset += 4; + s->counter_offset += 4; + } + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + s->offset += 4; + s->counter_offset += 4; + s->session_offset += 4; + s->cookie_offset += 4; + } else { + hints.ai_socktype = SOCK_RAW; + hints.ai_protocol = IPPROTO_L2TP; + } + + gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result); + + if ((gairet != 0) || (result == NULL)) { + error_report( + "l2tpv3_open : could not resolve src, errno = %s", + gai_strerror(gairet) + ); + goto outerr; + } + fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol); + if (fd == -1) { + fd = -errno; + error_report("l2tpv3_open : socket creation failed, errno = %d", -fd); + freeaddrinfo(result); + goto outerr; + } + if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { + error_report("l2tpv3_open : could not bind socket err=%i", errno); + goto outerr; + } + if (result) { + freeaddrinfo(result); + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + } else { + hints.ai_socktype = SOCK_RAW; + hints.ai_protocol = IPPROTO_L2TP; + } + + result = NULL; + gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result); + if ((gairet != 0) || (result == NULL)) { + error_report( + "l2tpv3_open : could not resolve dst, error = %s", + gai_strerror(gairet) + ); + goto outerr; + } + + s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage)); + memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage)); + memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); + s->dst_size = result->ai_addrlen; + + if (result) { + freeaddrinfo(result); + } + + if (l2tpv3->has_counter && l2tpv3->counter) { + s->has_counter = true; + s->offset += 4; + } else { + s->has_counter = false; + } + + if (l2tpv3->has_pincounter && l2tpv3->pincounter) { + s->has_counter = true; /* pin counter implies that there is counter */ + s->pin_counter = true; + } else { + s->pin_counter = false; + } + + if (l2tpv3->has_offset) { + /* extra offset */ + s->offset += l2tpv3->offset; + } + + if ((s->ipv6) || (s->udp)) { + s->header_size = s->offset; + } else { + s->header_size = s->offset + sizeof(struct iphdr); + } + + s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); + s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT); + s->header_buf = g_malloc(s->header_size); + + qemu_set_nonblock(fd); + + s->fd = fd; + s->counter = 0; + + l2tpv3_read_poll(s, true); + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "l2tpv3: connected"); + return 0; +outerr: + qemu_del_net_client(nc); + if (fd > 0) { + close(fd); + } + if (result) { + freeaddrinfo(result); + } + return -1; +} + @@ -250,7 +250,7 @@ NICState *qemu_new_nic(NetClientInfo *info, { NetClientState **peers = conf->peers.ncs; NICState *nic; - int i, queues = MAX(1, conf->queues); + int i, queues = MAX(1, conf->peers.queues); assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC); assert(info->size >= sizeof(NICState)); @@ -363,7 +363,7 @@ void qemu_del_net_client(NetClientState *nc) void qemu_del_nic(NICState *nic) { - int i, queues = MAX(nic->conf->queues, 1); + int i, queues = MAX(nic->conf->peers.queues, 1); /* If this is a peer NIC and peer has already been deleted, free it now. */ if (nic->peer_deleted) { @@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( #ifdef CONFIG_VHOST_NET_USED [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user, #endif +#ifdef CONFIG_LINUX + [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3, +#endif }; @@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) #ifdef CONFIG_VHOST_NET_USED case NET_CLIENT_OPTIONS_KIND_VHOST_USER: #endif +#ifdef CONFIG_LINUX + case NET_CLIENT_OPTIONS_KIND_L2TPV3: +#endif break; default: diff --git a/qapi-schema.json b/qapi-schema.json index e7727a1153..a83befc05b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -8,6 +8,9 @@ # QAPI block definitions { 'include': 'qapi/block.json' } +# QAPI event definitions +{ 'include': 'qapi/event.json' } + ## # LostTickPolicy: # @@ -211,12 +214,18 @@ # # @filename: the filename of the character device # +# @frontend-open: shows whether the frontend device attached to this backend +# (eg. with the chardev=... option) is in open or closed state +# (since 2.1) +# # Notes: @filename is encoded using the QEMU command line character device # encoding. See the QEMU man page for details. # # Since: 0.14.0 ## -{ 'type': 'ChardevInfo', 'data': {'label': 'str', 'filename': 'str'} } +{ 'type': 'ChardevInfo', 'data': {'label': 'str', + 'filename': 'str', + 'frontend-open': 'bool'} } ## # @query-chardev: @@ -654,8 +663,9 @@ # # @host: IP address # -# @service: The service name of vnc port. This may depend on the host system's -# service database so symbolic names should not be relied on. +# @service: The service name of the vnc port. This may depend on the host +# system's service database so symbolic names should not be relied +# on. # # @family: address family # @@ -694,7 +704,7 @@ ## { 'type': 'VncClientInfo', 'base': 'VncBasicInfo', - 'data': { '*x509_dname' : 'str', '*sasl_username': 'str' } } + 'data': { '*x509_dname': 'str', '*sasl_username': 'str' } } ## # @VncInfo: @@ -2040,6 +2050,62 @@ '*udp': 'str' } } ## +# @NetdevL2TPv3Options +# +# Connect the VLAN to Ethernet over L2TPv3 Static tunnel +# +# @src: source address +# +# @dst: destination address +# +# @srcport: #optional source port - mandatory for udp, optional for ip +# +# @dstport: #optional destination port - mandatory for udp, optional for ip +# +# @ipv6: #optional - force the use of ipv6 +# +# @udp: #optional - use the udp version of l2tpv3 encapsulation +# +# @cookie64: #optional - use 64 bit coookies +# +# @counter: #optional have sequence counter +# +# @pincounter: #optional pin sequence counter to zero - +# workaround for buggy implementations or +# networks with packet reorder +# +# @txcookie: #optional 32 or 64 bit transmit cookie +# +# @rxcookie: #optional 32 or 64 bit receive cookie +# +# @txsession: 32 bit transmit session +# +# @rxsession: #optional 32 bit receive session - if not specified +# set to the same value as transmit +# +# @offset: #optional additional offset - allows the insertion of +# additional application-specific data before the packet payload +# +# Since 2.1 +## +{ 'type': 'NetdevL2TPv3Options', + 'data': { + 'src': 'str', + 'dst': 'str', + '*srcport': 'str', + '*dstport': 'str', + '*ipv6': 'bool', + '*udp': 'bool', + '*cookie64': 'bool', + '*counter': 'bool', + '*pincounter': 'bool', + '*txcookie': 'uint64', + '*rxcookie': 'uint64', + 'txsession': 'uint32', + '*rxsession': 'uint32', + '*offset': 'uint32' } } + +## # @NetdevVdeOptions # # Connect the VLAN to a vde switch running on the host. @@ -2150,6 +2216,9 @@ # A discriminated record of network device traits. # # Since 1.2 +# +# 'l2tpv3' - since 2.1 +# ## { 'union': 'NetClientOptions', 'data': { @@ -2157,6 +2226,7 @@ 'nic': 'NetLegacyNicOptions', 'user': 'NetdevUserOptions', 'tap': 'NetdevTapOptions', + 'l2tpv3': 'NetdevL2TPv3Options', 'socket': 'NetdevSocketOptions', 'vde': 'NetdevVdeOptions', 'dump': 'NetdevDumpOptions', @@ -3398,5 +3468,3 @@ ## { 'enum': 'GuestPanicAction', 'data': [ 'pause' ] } - -{ 'include': 'qapi-event.json' } diff --git a/qapi/block-core.json b/qapi/block-core.json index af6b436540..faf394cc76 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -765,6 +765,13 @@ # @format: #optional the format of the new destination, default is to # probe if @mode is 'existing', else the format of the source # +# @node-name: #optional the new block driver state node name in the graph +# (Since 2.1) +# +# @replaces: #optional with sync=full graph node name to be replaced by the new +# image when a whole image copy is done. This can be used to repair +# broken Quorum files. (Since 2.1) +# # @mode: #optional whether and how QEMU should create a new image, default is # 'absolute-paths'. # @@ -797,6 +804,7 @@ ## { 'command': 'drive-mirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + '*node-name': 'str', '*replaces': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', @@ -1329,12 +1337,15 @@ # # @vote-threshold: the vote limit under which a read will fail # +# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached +# (Since 2.1) +# # Since: 2.0 ## { 'type': 'BlockdevOptionsQuorum', 'data': { '*blkverify': 'bool', 'children': [ 'BlockdevRef' ], - 'vote-threshold': 'int' } } + 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } } ## # @BlockdevOptions @@ -1436,7 +1447,8 @@ # @device: device name # # @msg: informative message for human consumption, such as the kind of -# corruption being detected +# corruption being detected. It should not be parsed by machine as it is +# not guaranteed to be stable # # @offset: #optional, if the corruption resulted from an image access, this is # the access offset into the image @@ -1544,19 +1556,32 @@ { 'event': 'BLOCK_JOB_ERROR', 'data': { 'device' : 'str', 'operation': 'IoOperationType', - 'action' : 'BlockdevOnError' } } + 'action' : 'BlockErrorAction' } } ## # @BLOCK_JOB_READY # # Emitted when a block job is ready to complete # +# @type: job type +# # @device: device name # +# @len: maximum progress value +# +# @offset: current progress value. On success this is equal to len. +# On failure this is less than len +# +# @speed: rate limit, bytes per second +# # Note: The "ready to complete" status is always reset by a @BLOCK_JOB_ERROR # event # # Since: 1.3 ## { 'event': 'BLOCK_JOB_READY', - 'data': { 'device': 'str' } } + 'data': { 'type' : 'BlockJobType', + 'device': 'str', + 'len' : 'int', + 'offset': 'int', + 'speed' : 'int' } } diff --git a/qapi-event.json b/qapi/event.json index e7a47f9927..ff97aeb377 100644 --- a/qapi-event.json +++ b/qapi/event.json @@ -1,8 +1,8 @@ ## # @SHUTDOWN # -# Emitted when the virtual machine has shutdown, possibly indicating that QEMU -# is about about to exit. +# Emitted when the virtual machine has shut down, indicating that qemu is +# about to exit. # # Note: If the command-line option "-no-shutdown" has been specified, qemu will # not exit, and a STOP event will eventually follow the SHUTDOWN event @@ -316,3 +316,17 @@ { 'event': 'QUORUM_REPORT_BAD', 'data': { '*error': 'str', 'node-name': 'str', 'sector-num': 'int', 'sector-count': 'int' } } + +## +# @VSERPORT_CHANGE +# +# Emitted when the guest opens or closes a virtio-serial port. +# +# @id: device identifier of the virtio-serial port +# +# @open: true if the guest has opened the virtio-serial port +# +# Since: 2.1 +## +{ 'event': 'VSERPORT_CHANGE', + 'data': { 'id': 'str', 'open': 'bool' } } diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c index 6a0974eb48..36eb3bcfd6 100644 --- a/qemu-bridge-helper.c +++ b/qemu-bridge-helper.c @@ -229,7 +229,7 @@ int main(int argc, char **argv) unsigned long ifargs[4]; #endif int ifindex; - int fd, ctlfd, unixfd = -1; + int fd = -1, ctlfd = -1, unixfd = -1; int use_vnet = 0; int mtu; const char *bridge = NULL; @@ -436,7 +436,12 @@ int main(int argc, char **argv) /* profit! */ cleanup: - + if (fd >= 0) { + close(fd); + } + if (ctlfd >= 0) { + close(ctlfd); + } while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&acl_list, entry); g_free(acl_rule); diff --git a/qemu-char.c b/qemu-char.c index cbd6b9a025..51917de462 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3705,6 +3705,7 @@ ChardevInfoList *qmp_query_chardev(Error **errp) info->value = g_malloc0(sizeof(*info->value)); info->value->label = g_strdup(chr->label); info->value->filename = g_strdup(chr->filename); + info->value->frontend_open = chr->fe_open; info->next = chr_list; chr_list = info; diff --git a/qemu-options.hx b/qemu-options.hx index ff76ad4830..9e5468678b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, " (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n" " (default=" DEFAULT_BRIDGE_HELPER ")\n" #endif +#ifdef __linux__ + "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n" + " connect the VLAN to an Ethernet over L2TPv3 pseudowire\n" + " Linux kernel 3.3+ as well as most routers can talk\n" + " L2TPv3. This transport allows to connect a VM to a VM,\n" + " VM to a router and even VM to Host. It is a nearly-universal\n" + " standard (RFC3391). Note - this implementation uses static\n" + " pre-configured tunnels (same as the Linux kernel).\n" + " use 'src=' to specify source address\n" + " use 'dst=' to specify destination address\n" + " use 'udp=on' to specify udp encapsulation\n" + " use 'dstport=' to specify destination udp port\n" + " use 'dstport=' to specify destination udp port\n" + " use 'ipv6=on' to force v6\n" + " L2TPv3 uses cookies to prevent misconfiguration as\n" + " well as a weak security measure\n" + " use 'rxcookie=0x012345678' to specify a rxcookie\n" + " use 'txcookie=0x012345678' to specify a txcookie\n" + " use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n" + " use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n" + " use 'pincounter=on' to work around broken counter handling in peer\n" + " use 'offset=X' to add an extra offset between header and data\n" +#endif "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n" " connect the vlan 'n' to another VLAN using a socket connection\n" "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n" @@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \ -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 @end example +@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] +@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] +Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular +protocol to transport Ethernet (and other Layer 2) data frames between +two systems. It is present in routers, firewalls and the Linux kernel +(from version 3.3 onwards). + +This transport allows a VM to communicate to another VM, router or firewall directly. + +@item src=@var{srcaddr} + source address (mandatory) +@item dst=@var{dstaddr} + destination address (mandatory) +@item udp + select udp encapsulation (default is ip). +@item srcport=@var{srcport} + source udp port. +@item dstport=@var{dstport} + destination udp port. +@item ipv6 + force v6, otherwise defaults to v4. +@item rxcookie=@var{rxcookie} +@item txcookie=@var{txcookie} + Cookies are a weak form of security in the l2tpv3 specification. +Their function is mostly to prevent misconfiguration. By default they are 32 +bit. +@item cookie64 + Set cookie size to 64 bit instead of the default 32 +@item counter=off + Force a 'cut-down' L2TPv3 with no counter as in +draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00 +@item pincounter=on + Work around broken counter handling in peer. This may also help on +networks which have packet reorder. +@item offset=@var{offset} + Add an extra offset between header and data + +For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan +on the remote Linux host 1.2.3.4: +@example +# Setup tunnel on linux host using raw ip as encapsulation +# on 1.2.3.4 +ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \ + encap udp udp_sport 16384 udp_dport 16384 +ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \ + 0xFFFFFFFF peer_session_id 0xFFFFFFFF +ifconfig vmtunnel0 mtu 1500 +ifconfig vmtunnel0 up +brctl addif br-lan vmtunnel0 + + +# on 4.3.2.1 +# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter + +qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter + + +@end example + @item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and diff --git a/qmp-commands.hx b/qmp-commands.hx index e4a1c80434..65218bc147 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1293,6 +1293,7 @@ EQMP { .name = "drive-mirror", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," + "node-name:s?,replaces:s?," "on-source-error:s?,on-target-error:s?," "granularity:i?,buf-size:i?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, @@ -1314,6 +1315,10 @@ Arguments: - "device": device name to operate on (json-string) - "target": name of new image file (json-string) - "format": format of new image (json-string, optional) +- "node-name": the name of the new block driver state in the node graph + (json-string, optional) +- "replaces": the block driver node name to replace when finished + (json-string, optional) - "mode": how an image file should be created into the target file/device (NewImageMode, optional, default 'absolute-paths') - "speed": maximum speed of the streaming job, in bytes per second @@ -1921,19 +1926,28 @@ Each json-object contain the following: - "label": device's label (json-string) - "filename": device's file (json-string) +- "frontend-open": open/closed state of the frontend device attached to this + backend (json-bool) Example: -> { "execute": "query-chardev" } <- { - "return":[ + "return": [ + { + "label": "charchannel0", + "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server", + "frontend-open": false + }, { - "label":"monitor", - "filename":"stdio" + "label": "charmonitor", + "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server", + "frontend-open": true }, { - "label":"serial0", - "filename":"vc" + "label": "charserial0", + "filename": "pty:/dev/pts/2", + "frontend-open": true } ] } diff --git a/scripts/qapi-event.py b/scripts/qapi-event.py index 3a1cd61914..601e3076ab 100644 --- a/scripts/qapi-event.py +++ b/scripts/qapi-event.py @@ -26,9 +26,8 @@ def _generate_event_api_name(event_name, params): api_name += "bool has_%s,\n" % c_var(argname) api_name += "".ljust(l) - if argentry == "str": - api_name += "const " - api_name += "%s %s,\n" % (c_type(argentry), c_var(argname)) + api_name += "%s %s,\n" % (c_type(argentry, is_param=True), + c_var(argname)) api_name += "".ljust(l) api_name += "Error **errp)" diff --git a/scripts/qapi.py b/scripts/qapi.py index 54b97cb48e..f2c6d1f840 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -255,7 +255,7 @@ def check_event(expr, expr_info): if structured: raise QAPIExprError(expr_info, "Nested structure define in event is not " - "supported now, event '%s', argname '%s'" + "supported, event '%s', argname '%s'" % (expr['event'], argname)) def check_union(expr, expr_info): diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c index 97a81d8660..9a91af9dbf 100644 --- a/target-ppc/cpu-models.c +++ b/target-ppc/cpu-models.c @@ -1138,6 +1138,8 @@ "POWER7 v2.3") POWERPC_DEF("POWER7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7P, "POWER7+ v2.1") + POWERPC_DEF("POWER8E_v1.0", CPU_POWERPC_POWER8E_v10, POWER8E, + "POWER8E v1.0") POWERPC_DEF("POWER8_v1.0", CPU_POWERPC_POWER8_v10, POWER8, "POWER8 v1.0") POWERPC_DEF("970", CPU_POWERPC_970, 970, @@ -1386,6 +1388,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "POWER5gs", "POWER5+" }, { "POWER7", "POWER7_v2.3" }, { "POWER7+", "POWER7+_v2.1" }, + { "POWER8E", "POWER8E_v1.0" }, { "POWER8", "POWER8_v1.0" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h index db75896012..c39d03a504 100644 --- a/target-ppc/cpu-models.h +++ b/target-ppc/cpu-models.h @@ -559,9 +559,12 @@ enum { CPU_POWERPC_POWER7P_BASE = 0x004A0000, CPU_POWERPC_POWER7P_MASK = 0xFFFF0000, CPU_POWERPC_POWER7P_v21 = 0x004A0201, - CPU_POWERPC_POWER8_BASE = 0x004B0000, + CPU_POWERPC_POWER8E_BASE = 0x004B0000, + CPU_POWERPC_POWER8E_MASK = 0xFFFF0000, + CPU_POWERPC_POWER8E_v10 = 0x004B0100, + CPU_POWERPC_POWER8_BASE = 0x004D0000, CPU_POWERPC_POWER8_MASK = 0xFFFF0000, - CPU_POWERPC_POWER8_v10 = 0x004B0100, + CPU_POWERPC_POWER8_v10 = 0x004D0100, CPU_POWERPC_970 = 0x00390202, CPU_POWERPC_970FX_v10 = 0x00391100, CPU_POWERPC_970FX_v20 = 0x003C0200, diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h index 13c7031265..f1f0a52998 100644 --- a/target-ppc/cpu-qom.h +++ b/target-ppc/cpu-qom.h @@ -119,7 +119,9 @@ void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf, int flags); hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); +int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg); int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg); int ppc64_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, void *opaque); int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 74407ee209..08ae527cb6 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -2012,7 +2012,7 @@ enum { PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \ PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \ PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \ - PPC2_ALTIVEC_207 | PPC2_ISA207S) + PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP) }; /*****************************************************************************/ diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c index 381a3c7e31..694d303e19 100644 --- a/target-ppc/gdbstub.c +++ b/target-ppc/gdbstub.c @@ -21,6 +21,31 @@ #include "qemu-common.h" #include "exec/gdbstub.h" +static int ppc_gdb_register_len_apple(int n) +{ + switch (n) { + case 0 ... 31: + /* gprs */ + return 8; + case 32 ... 63: + /* fprs */ + return 8; + case 64 ... 95: + return 16; + case 64+32: /* nip */ + case 65+32: /* msr */ + case 67+32: /* lr */ + case 68+32: /* ctr */ + case 69+32: /* xer */ + case 70+32: /* fpscr */ + return 8; + case 66+32: /* cr */ + return 4; + default: + return 0; + } +} + static int ppc_gdb_register_len(int n) { switch (n) { @@ -132,6 +157,65 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return r; } +int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + int r = ppc_gdb_register_len_apple(n); + + if (!r) { + return r; + } + + if (n < 32) { + /* gprs */ + gdb_get_reg64(mem_buf, env->gpr[n]); + } else if (n < 64) { + /* fprs */ + stfq_p(mem_buf, env->fpr[n-32]); + } else if (n < 96) { + /* Altivec */ + stq_p(mem_buf, n - 64); + stq_p(mem_buf + 8, 0); + } else { + switch (n) { + case 64 + 32: + gdb_get_reg64(mem_buf, env->nip); + break; + case 65 + 32: + gdb_get_reg64(mem_buf, env->msr); + break; + case 66 + 32: + { + uint32_t cr = 0; + int i; + for (i = 0; i < 8; i++) { + cr |= env->crf[i] << (32 - ((i + 1) * 4)); + } + gdb_get_reg32(mem_buf, cr); + break; + } + case 67 + 32: + gdb_get_reg64(mem_buf, env->lr); + break; + case 68 + 32: + gdb_get_reg64(mem_buf, env->ctr); + break; + case 69 + 32: + gdb_get_reg64(mem_buf, env->xer); + break; + case 70 + 32: + gdb_get_reg64(mem_buf, env->fpscr); + break; + } + } + if (msr_le) { + /* If cpu is in LE mode, convert memory contents to LE. */ + ppc_gdb_swap_register(mem_buf, n, r); + } + return r; +} + int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -185,3 +269,56 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) } return r; } +int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + int r = ppc_gdb_register_len_apple(n); + + if (!r) { + return r; + } + if (msr_le) { + /* If cpu is in LE mode, convert memory contents to LE. */ + ppc_gdb_swap_register(mem_buf, n, r); + } + if (n < 32) { + /* gprs */ + env->gpr[n] = ldq_p(mem_buf); + } else if (n < 64) { + /* fprs */ + env->fpr[n-32] = ldfq_p(mem_buf); + } else { + switch (n) { + case 64 + 32: + env->nip = ldq_p(mem_buf); + break; + case 65 + 32: + ppc_store_msr(env, ldq_p(mem_buf)); + break; + case 66 + 32: + { + uint32_t cr = ldl_p(mem_buf); + int i; + for (i = 0; i < 8; i++) { + env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF; + } + break; + } + case 67 + 32: + env->lr = ldq_p(mem_buf); + break; + case 68 + 32: + env->ctr = ldq_p(mem_buf); + break; + case 69 + 32: + env->xer = ldq_p(mem_buf); + break; + case 70 + 32: + /* fpscr */ + store_fpscr(env, ldq_p(mem_buf), 0xffffffff); + break; + } + } + return r; +} diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 561f8ccf2f..2d87108d8b 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -63,6 +63,7 @@ static int cap_ppc_smt; static int cap_ppc_rma; static int cap_spapr_tce; static int cap_spapr_multitce; +static int cap_spapr_vfio; static int cap_hior; static int cap_one_reg; static int cap_epr; @@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s) cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); + cap_spapr_vfio = false; cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); @@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void) return cap_spapr_multitce; } -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, + bool vfio_accel) { struct kvm_create_spapr_tce args = { .liobn = liobn, @@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) * destroying the table, which the upper layers -will- do */ *pfd = -1; - if (!cap_spapr_tce) { + if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) { return NULL; } diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index 412cc7f3c1..1118122d89 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); #ifndef CONFIG_USER_ONLY off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem); bool kvmppc_spapr_use_multitce(void); -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd); +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd, + bool vfio_accel); int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); int kvmppc_reset_htab(int shift_hint); uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); @@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void) } static inline void *kvmppc_create_spapr_tce(uint32_t liobn, - uint32_t window_size, int *fd) + uint32_t window_size, int *fd, + bool vfio_accel) { return NULL; } diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 48017219a4..b23933f7bd 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -426,7 +426,6 @@ static inline uint32_t SPR(uint32_t opcode) return ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5); } /*** Get constants ***/ -EXTRACT_HELPER(IMM, 12, 8); /* 16 bits signed immediate value */ EXTRACT_SHELPER(SIMM, 0, 16); /* 16 bits unsigned immediate value */ @@ -459,8 +458,6 @@ EXTRACT_HELPER(FPFLM, 17, 8); EXTRACT_HELPER(FPW, 16, 1); /*** Jump target decoding ***/ -/* Displacement */ -EXTRACT_SHELPER(d, 0, 16); /* Immediate address */ static inline target_ulong LI(uint32_t opcode) { @@ -2665,11 +2662,6 @@ static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2) tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx); } -static inline void gen_qemu_ld8s(DisasContext *ctx, TCGv arg1, TCGv arg2) -{ - tcg_gen_qemu_ld8s(arg1, arg2, ctx->mem_idx); -} - static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2) { TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask; @@ -4123,8 +4115,9 @@ static void gen_mcrxr(DisasContext *ctx) tcg_gen_trunc_tl_i32(t0, cpu_so); tcg_gen_trunc_tl_i32(t1, cpu_ov); tcg_gen_trunc_tl_i32(dst, cpu_ca); - tcg_gen_shri_i32(t0, t0, 2); - tcg_gen_shri_i32(t1, t1, 1); + tcg_gen_shli_i32(t0, t0, 3); + tcg_gen_shli_i32(t1, t1, 2); + tcg_gen_shli_i32(dst, dst, 1); tcg_gen_or_i32(dst, dst, t0); tcg_gen_or_i32(dst, dst, t1); tcg_temp_free_i32(t0); diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 85581c9537..a3bb336e16 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -34,6 +34,7 @@ //#define PPC_DUMP_CPU //#define PPC_DEBUG_SPR //#define PPC_DUMP_SPR_ACCESSES +/* #define USE_APPLE_GDB */ /* For user-mode emulation, we don't emulate any IRQ controller */ #if defined(CONFIG_USER_ONLY) @@ -8188,16 +8189,16 @@ static void init_proc_POWER8(CPUPPCState *env) init_proc_book3s_64(env, BOOK3S_CPU_POWER8); } -POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) +POWERPC_FAMILY(POWER8E)(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); dc->fw_name = "PowerPC,POWER8"; - dc->desc = "POWER8"; + dc->desc = "POWER8E"; dc->props = powerpc_servercpu_properties; - pcc->pvr = CPU_POWERPC_POWER8_BASE; - pcc->pvr_mask = CPU_POWERPC_POWER8_MASK; + pcc->pvr = CPU_POWERPC_POWER8E_BASE; + pcc->pvr_mask = CPU_POWERPC_POWER8E_MASK; pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06; pcc->init_proc = init_proc_POWER8; pcc->check_pow = check_pow_nocheck; @@ -8251,6 +8252,18 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; } + +POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); + + ppc_POWER8E_cpu_family_class_init(oc, data); + + dc->desc = "POWER8"; + pcc->pvr = CPU_POWERPC_POWER8_BASE; + pcc->pvr_mask = CPU_POWERPC_POWER8_MASK; +} #endif /* defined (TARGET_PPC64) */ @@ -9667,6 +9680,13 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) #endif cc->gdb_num_core_regs = 71; + +#ifdef USE_APPLE_GDB + cc->gdb_read_register = ppc_cpu_gdb_read_register_apple; + cc->gdb_write_register = ppc_cpu_gdb_write_register_apple; + cc->gdb_num_core_regs = 71 + 32; +#endif + #if defined(TARGET_PPC64) cc->gdb_core_xml_file = "power64-core.xml"; #else diff --git a/tests/qapi-schema/event-nest-struct.err b/tests/qapi-schema/event-nest-struct.err index e4a0faac9c..91bde1c967 100644 --- a/tests/qapi-schema/event-nest-struct.err +++ b/tests/qapi-schema/event-nest-struct.err @@ -1 +1 @@ -tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported now, event 'EVENT_A', argname 'a' +tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported, event 'EVENT_A', argname 'a' diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031 index 1d920ea87a..2a77ba8cbb 100755 --- a/tests/qemu-iotests/031 +++ b/tests/qemu-iotests/031 @@ -56,22 +56,22 @@ for IMGOPTS in "compat=0.10" "compat=1.1"; do echo === Create image with unknown header extension === echo _make_test_img 64M - ./qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension" - ./qcow2.py "$TEST_IMG" dump-header + $PYTHON qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension" + $PYTHON qcow2.py "$TEST_IMG" dump-header _check_test_img echo echo === Rewrite header with no backing file === echo $QEMU_IMG rebase -u -b "" "$TEST_IMG" - ./qcow2.py "$TEST_IMG" dump-header + $PYTHON qcow2.py "$TEST_IMG" dump-header _check_test_img echo echo === Add a backing file and format === echo $QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device "$TEST_IMG" - ./qcow2.py "$TEST_IMG" dump-header + $PYTHON qcow2.py "$TEST_IMG" dump-header done # success, all done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 03b6aa9de7..a77365347d 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -53,15 +53,15 @@ IMGOPTS="compat=1.1" echo === Create image with unknown autoclear feature bit === echo _make_test_img 64M -./qcow2.py "$TEST_IMG" set-feature-bit autoclear 63 -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 63 +$PYTHON qcow2.py "$TEST_IMG" dump-header echo echo === Repair image === echo _check_test_img -r all -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header # success, all done echo "*** done" diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index 27fe4bdacc..84c9167660 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -63,7 +63,7 @@ _make_test_img $size $QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io # The dirty bit must not be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features _check_test_img echo @@ -75,7 +75,7 @@ _make_test_img $size _no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io # The dirty bit must be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features _check_test_img echo @@ -84,7 +84,7 @@ echo "== Read-only access must still work ==" $QEMU_IO -r -c "read -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io # The dirty bit must be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features echo echo "== Repairing the image file must succeed ==" @@ -92,7 +92,7 @@ echo "== Repairing the image file must succeed ==" _check_test_img -r all # The dirty bit must not be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features echo echo "== Data should still be accessible after repair ==" @@ -108,12 +108,12 @@ _make_test_img $size _no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io # The dirty bit must be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io # The dirty bit must not be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features echo echo "== Creating an image file with lazy_refcounts=off ==" @@ -124,7 +124,7 @@ _make_test_img $size _no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io # The dirty bit must not be set since lazy_refcounts=off -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features _check_test_img echo @@ -140,8 +140,8 @@ $QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG commit "$TEST_IMG" # The dirty bit must not be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features -./qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features _check_test_img TEST_IMG="$TEST_IMG".base _check_test_img diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 734b6a6bb4..d1668109dd 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -35,12 +35,13 @@ test_img = os.path.join(iotests.test_dir, 'test.img') class ImageCommitTestCase(iotests.QMPTestCase): '''Abstract base class for image commit test cases''' - def run_commit_test(self, top, base): + def run_commit_test(self, top, base, need_ready=False): self.assert_no_active_block_jobs() result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) self.assert_qmp(result, 'return', {}) completed = False + ready = False while not completed: for event in self.vm.get_qmp_events(wait=True): if event['event'] == 'BLOCK_JOB_COMPLETED': @@ -48,8 +49,11 @@ class ImageCommitTestCase(iotests.QMPTestCase): self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/offset', self.image_len) self.assert_qmp(event, 'data/len', self.image_len) + if need_ready: + self.assertTrue(ready, "Expecting BLOCK_JOB_COMPLETED event") completed = True elif event['event'] == 'BLOCK_JOB_READY': + ready = True self.assert_qmp(event, 'data/type', 'commit') self.assert_qmp(event, 'data/device', 'drive0') self.assert_qmp(event, 'data/len', self.image_len) @@ -63,7 +67,7 @@ class TestSingleDrive(ImageCommitTestCase): test_len = 1 * 1024 * 256 def setUp(self): - iotests.create_image(backing_img, TestSingleDrive.image_len) + iotests.create_image(backing_img, self.image_len) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) qemu_io('-c', 'write -P 0xab 0 524288', backing_img) @@ -105,7 +109,7 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') def test_top_is_active(self): - self.run_commit_test(test_img, backing_img) + self.run_commit_test(test_img, backing_img, need_ready=True) self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) @@ -238,6 +242,8 @@ class TestSetSpeed(ImageCommitTestCase): self.cancel_and_wait(resume=True) +class TestActiveZeroLengthImage(TestSingleDrive): + image_len = 0 if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out index b6f257674e..42314e9c00 100644 --- a/tests/qemu-iotests/040.out +++ b/tests/qemu-iotests/040.out @@ -1,5 +1,5 @@ -................ +........................ ---------------------------------------------------------------------- -Ran 16 tests +Ran 24 tests OK diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index ec470b2007..0815e19274 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -28,6 +28,12 @@ target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img') test_img = os.path.join(iotests.test_dir, 'test.img') target_img = os.path.join(iotests.test_dir, 'target.img') +quorum_img1 = os.path.join(iotests.test_dir, 'quorum1.img') +quorum_img2 = os.path.join(iotests.test_dir, 'quorum2.img') +quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img') +quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img') +quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img') + class ImageMirroringTestCase(iotests.QMPTestCase): '''Abstract base class for image mirroring test cases''' @@ -42,8 +48,8 @@ class ImageMirroringTestCase(iotests.QMPTestCase): ready = True def wait_ready_and_cancel(self, drive='drive0'): - self.wait_ready(drive) - event = self.cancel_and_wait() + self.wait_ready(drive=drive) + event = self.cancel_and_wait(drive=drive) self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED') self.assert_qmp(event, 'data/type', 'mirror') self.assert_qmp(event, 'data/offset', self.image_len) @@ -52,19 +58,19 @@ class ImageMirroringTestCase(iotests.QMPTestCase): def complete_and_wait(self, drive='drive0', wait_ready=True): '''Complete a block job and wait for it to finish''' if wait_ready: - self.wait_ready() + self.wait_ready(drive=drive) result = self.vm.qmp('block-job-complete', device=drive) self.assert_qmp(result, 'return', {}) - event = self.wait_until_completed() + event = self.wait_until_completed(drive=drive) self.assert_qmp(event, 'data/type', 'mirror') class TestSingleDrive(ImageMirroringTestCase): image_len = 1 * 1024 * 1024 # MB def setUp(self): - iotests.create_image(backing_img, TestSingleDrive.image_len) + iotests.create_image(backing_img, self.image_len) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) self.vm = iotests.VM().add_drive(test_img) self.vm.launch() @@ -163,7 +169,7 @@ class TestSingleDrive(ImageMirroringTestCase): self.assert_no_active_block_jobs() qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d' - % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img) + % (self.image_len, self.image_len), target_img) result = self.vm.qmp('drive-mirror', device='drive0', sync='full', buf_size=65536, mode='existing', target=target_img) self.assert_qmp(result, 'return', {}) @@ -179,7 +185,7 @@ class TestSingleDrive(ImageMirroringTestCase): self.assert_no_active_block_jobs() qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s' - % (TestSingleDrive.image_len, backing_img), target_img) + % (self.image_len, backing_img), target_img) result = self.vm.qmp('drive-mirror', device='drive0', sync='full', mode='existing', target=target_img) self.assert_qmp(result, 'return', {}) @@ -206,6 +212,11 @@ class TestSingleDrive(ImageMirroringTestCase): target=target_img) self.assert_qmp(result, 'error/class', 'DeviceNotFound') +class TestSingleDriveZeroLength(TestSingleDrive): + image_len = 0 + test_small_buffer2 = None + test_large_cluster = None + class TestMirrorNoBacking(ImageMirroringTestCase): image_len = 2 * 1024 * 1024 # MB @@ -718,5 +729,187 @@ class TestUnbackedSource(ImageMirroringTestCase): self.complete_and_wait() self.assert_no_active_block_jobs() +class TestRepairQuorum(ImageMirroringTestCase): + """ This class test quorum file repair using drive-mirror. + It's mostly a fork of TestSingleDrive """ + image_len = 1 * 1024 * 1024 # MB + IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ] + + def setUp(self): + self.vm = iotests.VM() + + # Add each individual quorum images + for i in self.IMAGES: + qemu_img('create', '-f', iotests.imgfmt, i, + str(TestSingleDrive.image_len)) + # Assign a node name to each quorum image in order to manipulate + # them + opts = "node-name=img%i" % self.IMAGES.index(i) + self.vm = self.vm.add_drive(i, opts) + + self.vm.launch() + + #assemble the quorum block device from the individual files + args = { "options" : { "driver": "quorum", "id": "quorum0", + "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } } + result = self.vm.qmp("blockdev-add", **args) + self.assert_qmp(result, 'return', {}) + + + def tearDown(self): + self.vm.shutdown() + for i in self.IMAGES + [ quorum_repair_img ]: + # Do a try/except because the test may have deleted some images + try: + os.remove(i) + except OSError: + pass + + def test_complete(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name="repair0", + replaces="img1", + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait(drive="quorum0") + result = self.vm.qmp('query-named-block-nodes') + self.assert_qmp(result, 'return[0]/file', quorum_repair_img) + # TODO: a better test requiring some QEMU infrastructure will be added + # to check that this file is really driven by quorum + self.vm.shutdown() + self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img), + 'target image does not match source after mirroring') + + def test_cancel(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name="repair0", + replaces="img1", + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait(drive="quorum0", force=True) + # here we check that the last registered quorum file has not been + # swapped out and unref + result = self.vm.qmp('query-named-block-nodes') + self.assert_qmp(result, 'return[0]/file', quorum_img3) + self.vm.shutdown() + + def test_cancel_after_ready(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name="repair0", + replaces="img1", + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.wait_ready_and_cancel(drive="quorum0") + result = self.vm.qmp('query-named-block-nodes') + # here we check that the last registered quorum file has not been + # swapped out and unref + self.assert_qmp(result, 'return[0]/file', quorum_img3) + self.vm.shutdown() + self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img), + 'target image does not match source after mirroring') + + def test_pause(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name="repair0", + replaces="img1", + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='quorum0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='quorum0') + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait(drive="quorum0") + self.vm.shutdown() + self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img), + 'target image does not match source after mirroring') + + def test_medium_not_found(self): + result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', + node_name='repair0', + replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_image_not_found(self): + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name='repair0', + replaces='img1', + mode='existing', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_device_not_found(self): + result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full', + node_name='repair0', + replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_wrong_sync_mode(self): + result = self.vm.qmp('drive-mirror', device='quorum0', + node_name='repair0', + replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_no_node_name(self): + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_unexistant_replaces(self): + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name='repair0', + replaces='img77', + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_after_a_quorum_snapshot(self): + result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1', + snapshot_file=quorum_snapshot_file, + snapshot_node_name="snap1"); + + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name='repair0', + replaces="img1", + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('drive-mirror', device='quorum0', sync='full', + node_name='repair0', + replaces="snap1", + target=quorum_repair_img, format=iotests.imgfmt) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait(drive="quorum0") + result = self.vm.qmp('query-named-block-nodes') + self.assert_qmp(result, 'return[0]/file', quorum_repair_img) + # TODO: a better test requiring some QEMU infrastructure will be added + # to check that this file is really driven by quorum + self.vm.shutdown() + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index 6d9bee1a4b..42147c0b58 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -........................... +.............................................. ---------------------------------------------------------------------- -Ran 27 tests +Ran 46 tests OK diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index c4af131a66..a41334e022 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -92,6 +92,7 @@ echo run_qemu -drive file="$TEST_IMG",format=foo run_qemu -drive file="$TEST_IMG",driver=foo +run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2 echo echo === Overriding backing file === @@ -99,6 +100,11 @@ echo echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults +# Drivers that don't support backing files +run_qemu -drive file="$TEST_IMG",driver=raw,backing.file.filename="$TEST_IMG.orig" +run_qemu -drive file="$TEST_IMG",file.backing.driver=file,file.backing.filename="$TEST_IMG.orig" +run_qemu -drive file="$TEST_IMG",file.backing.driver=qcow2,file.backing.file.filename="$TEST_IMG.orig" + echo echo === Enable and disable lazy refcounting on the command line, plus some invalid values === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 31e329e893..d7b0f503af 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -38,7 +38,10 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=foo QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: 'foo' invalid format Testing: -drive file=TEST_DIR/t.qcow2,driver=foo -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Invalid driver: 'foo' +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Unknown driver 'foo' + +Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2: could not open disk image TEST_DIR/t.qcow2: Driver specified twice === Overriding backing file === @@ -50,6 +53,15 @@ ide0-hd0: TEST_DIR/t.qcow2 (qcow2) Backing file: TEST_DIR/t.qcow2.orig (chain depth: 1) (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K +Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files + +Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files + +Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files + === Enable and disable lazy refcounting on the command line, plus some invalid values === diff --git a/tests/qemu-iotests/054 b/tests/qemu-iotests/054 index c8b7082b4e..bd94153d66 100755 --- a/tests/qemu-iotests/054 +++ b/tests/qemu-iotests/054 @@ -49,7 +49,7 @@ _make_test_img $((1024*1024))T echo echo "creating too large image (1 EB) using qcow2.py" _make_test_img 4G -./qcow2.py "$TEST_IMG" set-header size $((1024 ** 6)) +$PYTHON qcow2.py "$TEST_IMG" set-header size $((1024 ** 6)) _check_test_img # success, all done diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index f0116aab1d..3cffc12fec 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -68,13 +68,13 @@ poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" _check_test_img # The corrupt bit should not be set anyway -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Try to write something, thereby forcing the corrupt bit to be set $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io # The corrupt bit must now be set -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Try to open the image R/W (which should fail) $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ @@ -99,19 +99,19 @@ poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" # Redirect new data cluster onto refcount block poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" _check_test_img -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Try to fix it _check_test_img -r all # The corrupt bit should be cleared -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Look if it's really really fixed $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features echo echo "=== Testing cluster data reference into inactive L2 table ===" @@ -124,13 +124,13 @@ $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ "\x80\x00\x00\x00\x00\x04\x00\x00" _check_test_img -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features _check_test_img -r all -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features +$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Check data $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index d3a6b388b5..ab98def6d4 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -48,9 +48,9 @@ echo "=== Testing version downgrade with zero expansion ===" echo IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io _check_test_img @@ -59,9 +59,9 @@ echo "=== Testing dirty version downgrade ===" echo IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io _check_test_img @@ -69,11 +69,11 @@ echo echo "=== Testing version downgrade with unknown compat/autoclear flags ===" echo IMGOPTS="compat=1.1" _make_test_img 64M -./qcow2.py "$TEST_IMG" set-feature-bit compatible 42 -./qcow2.py "$TEST_IMG" set-feature-bit autoclear 42 -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" set-feature-bit compatible 42 +$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 42 +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header _check_test_img echo @@ -81,9 +81,9 @@ echo "=== Testing version upgrade and resize ===" echo IMGOPTS="compat=0.10" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "compat=1.1,lazy_refcounts=on,size=128M" "$TEST_IMG" -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IO -c "read -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io _check_test_img @@ -92,9 +92,9 @@ echo "=== Testing dirty lazy_refcounts=off ===" echo IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IMG amend -o "lazy_refcounts=off" "$TEST_IMG" -./qcow2.py "$TEST_IMG" dump-header +$PYTHON qcow2.py "$TEST_IMG" dump-header $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io _check_test_img diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 index ab5445f62d..e89b61d70b 100755 --- a/tests/qemu-iotests/065 +++ b/tests/qemu-iotests/065 @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # # Test for additional information emitted by qemu-img info on qcow2 # images diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081 index b512d00cc8..7ae4be2053 100755 --- a/tests/qemu-iotests/081 +++ b/tests/qemu-iotests/081 @@ -134,15 +134,28 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF EOF echo +echo "== using quorum rewrite corrupted mode ==" + +quorum="$quorum,file.rewrite-corrupted=on" + +$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io + +echo +echo "== checking that quorum has corrected the corrupted file ==" + +$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io + +echo echo "== breaking quorum ==" $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io +$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io + echo echo "== checking that quorum is broken ==" $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io - # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out index 2241cec148..073515ea50 100644 --- a/tests/qemu-iotests/081.out +++ b/tests/qemu-iotests/081.out @@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} +== using quorum rewrite corrupted mode == +read 10485760/10485760 bytes at offset 0 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== checking that quorum has corrected the corrupted file == +read 10485760/10485760 bytes at offset 0 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + == breaking quorum == wrote 10485760/10485760 bytes at offset 0 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 10485760/10485760 bytes at offset 0 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == checking that quorum is broken == qemu-io: can't open: Could not read image for determining its format: Input/output error diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083 index f764534782..991a9d91db 100755 --- a/tests/qemu-iotests/083 +++ b/tests/qemu-iotests/083 @@ -44,7 +44,7 @@ choose_tcp_port() { wait_for_tcp_port() { while ! (netstat --tcp --listening --numeric | \ - grep "$1.*0.0.0.0:\*.*LISTEN") 2>&1 >/dev/null; do + grep "$1.*0\\.0\\.0\\.0:\\*.*LISTEN") 2>&1 >/dev/null; do sleep 0.1 done } @@ -55,8 +55,8 @@ filter_nbd() { # callbacks sometimes, making them unreliable. # # Filter out the TCP port number since this changes between runs. - sed -e 's#^nbd.c:.*##g' \ - -e 's#nbd:127.0.0.1:[^:]*:#nbd:127.0.0.1:PORT:#g' + sed -e 's#^.*nbd\.c:.*##g' \ + -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g' } check_disconnect() { @@ -81,8 +81,8 @@ EOF nbd_url="nbd:127.0.0.1:$port:exportname=foo" fi - ./nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null & - wait_for_tcp_port "127.0.0.1:$port" + $PYTHON nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null & + wait_for_tcp_port "127\\.0\\.0\\.1:$port" $QEMU_IO -c "read 0 512" "$nbd_url" 2>&1 | _filter_qemu_io | filter_nbd echo diff --git a/tests/qemu-iotests/095 b/tests/qemu-iotests/095 new file mode 100755 index 0000000000..acc7dbf182 --- /dev/null +++ b/tests/qemu-iotests/095 @@ -0,0 +1,86 @@ +#!/bin/bash +# +# Test for commit of larger active layer +# +# This tests live snapshots of images on a running QEMU instance, using +# QMP commands. Both single disk snapshots, and transactional group +# snapshots are performed. +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + rm -f "${TEST_IMG}.base" "${TEST_IMG}.snp1" + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +size_smaller=5M +size_larger=100M + +_make_test_img $size_smaller +mv "${TEST_IMG}" "${TEST_IMG}.base" + +_make_test_img -b "${TEST_IMG}.base" $size_larger +mv "${TEST_IMG}" "${TEST_IMG}.snp1" + +_make_test_img -b "${TEST_IMG}.snp1" $size_larger + +echo +echo "=== Base image info before commit and resize ===" +$QEMU_IMG info "${TEST_IMG}.base" | _filter_testdir + +echo +echo === Running QEMU Live Commit Test === +echo + +qemu_comm_method="qmp" +_launch_qemu -drive file="${TEST_IMG}",if=virtio,id=test +h=$QEMU_HANDLE + +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" "return" + +_send_qemu_cmd $h "{ 'execute': 'block-commit', + 'arguments': { 'device': 'test', + 'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED" + +echo +echo "=== Base image info after commit and resize ===" +$QEMU_IMG info "${TEST_IMG}.base" | _filter_testdir + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/095.out b/tests/qemu-iotests/095.out new file mode 100644 index 0000000000..5864ddac2b --- /dev/null +++ b/tests/qemu-iotests/095.out @@ -0,0 +1,31 @@ +QA output created by 095 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=5242880 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=104857600 backing_file='TEST_DIR/t.IMGFMT.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=104857600 backing_file='TEST_DIR/t.IMGFMT.snp1' + +=== Base image info before commit and resize === +image: TEST_DIR/t.qcow2.base +file format: qcow2 +virtual size: 5.0M (5242880 bytes) +disk size: 196K +cluster_size: 65536 +Format specific information: + compat: 1.1 + lazy refcounts: false + +=== Running QEMU Live Commit Test === + +{"return": {}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "test", "len": 104857600, "offset": 104857600, "speed": 0, "type": "commit"}} + +=== Base image info after commit and resize === +image: TEST_DIR/t.qcow2.base +file format: qcow2 +virtual size: 100M (104857600 bytes) +disk size: 196K +cluster_size: 65536 +Format specific information: + compat: 1.1 + lazy refcounts: false +*** done diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index e2ed5a95f8..8ca40116d7 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -34,22 +34,95 @@ timestamp=${TIMESTAMP:=false} # generic initialization iam=check +_init_error() +{ + echo "$iam: $1" >&2 + exit 1 +} + +if [ -L "$0" ] +then + # called from the build tree + source_iotests=$(dirname "$(readlink "$0")") + if [ -z "$source_iotests" ] + then + _init_error "failed to obtain source tree name from check symlink" + fi + source_iotests=$(cd "$source_iotests"; pwd) || _init_error "failed to enter source tree" + build_iotests=$PWD +else + # called from the source tree + source_iotests=$PWD + # this may be an in-tree build (note that in the following code we may not + # assume that it truly is and have to test whether the build results + # actually exist) + build_iotests=$PWD +fi + +build_root="$build_iotests/../.." + +if [ -x "$build_iotests/socket_scm_helper" ] +then + export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper" +fi + +# if ./qemu exists, it should be prioritized and will be chosen by common.config +if [[ -z "$QEMU_PROG" && ! -x './qemu' ]] +then + arch=$(uname -m 2> /dev/null) + + if [[ -n $arch && -x "$build_root/$arch-softmmu/qemu-system-$arch" ]] + then + export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch" + else + pushd "$build_root" > /dev/null + for binary in *-softmmu/qemu-system-* + do + if [ -x "$binary" ] + then + export QEMU_PROG="$build_root/$binary" + break + fi + done + popd > /dev/null + fi +fi + +if [[ -z $QEMU_IMG_PROG && -x "$build_root/qemu-img" && ! -x './qemu-img' ]] +then + export QEMU_IMG_PROG="$build_root/qemu-img" +fi + +if [[ -z $QEMU_IO_PROG && -x "$build_root/qemu-io" && ! -x './qemu-io' ]] +then + export QEMU_IO_PROG="$build_root/qemu-io" +fi + +if [[ -z $QEMU_NBD_PROG && -x "$build_root/qemu-nbd" && ! -x './qemu-nbd' ]] +then + export QEMU_NBD_PROG="$build_root/qemu-nbd" +fi + +# we need common.env +if ! . "$build_iotests/common.env" +then + _init_error "failed to source common.env (make sure the qemu-iotests are run from tests/qemu-iotests in the build tree)" +fi + # we need common.config -if ! . ./common.config +if ! . "$source_iotests/common.config" then - echo "$iam: failed to source common.config" - exit 1 + _init_error "failed to source common.config" fi # we need common.rc -if ! . ./common.rc +if ! . "$source_iotests/common.rc" then - echo "check: failed to source common.rc" - exit 1 + _init_error "failed to source common.rc" fi # we need common -. ./common +. "$source_iotests/common" #if [ `id -u` -ne 0 ] #then @@ -194,7 +267,7 @@ do echo " - expunged" rm -f $seq.out.bad echo "/^$seq\$/d" >>$tmp.expunged - elif [ ! -f $seq ] + elif [ ! -f "$source_iotests/$seq" ] then echo " - no such test?" echo "/^$seq\$/d" >>$tmp.expunged @@ -215,9 +288,16 @@ do start=`_wallclock` $timestamp && echo -n " ["`date "+%T"`"]" - [ ! -x $seq ] && chmod u+x $seq # ensure we can run it + + if [ "$(head -n 1 "$source_iotests/$seq")" == "#!/usr/bin/env python" ]; then + run_command="$PYTHON $seq" + else + run_command="./$seq" + fi + export OUTPUT_DIR=$PWD + (cd "$source_iotests"; MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \ - ./$seq >$tmp.out 2>&1 + $run_command >$tmp.out 2>&1) sts=$? $timestamp && _timestamp stop=`_wallclock` @@ -242,17 +322,17 @@ do err=true fi - reference=$seq.out + reference="$source_iotests/$seq.out" if [ "$CACHEMODE" = "none" ]; then - [ -f $seq.out.nocache ] && reference=$seq.out.nocache + [ -f "$source_iotests/$seq.out.nocache" ] && reference="$source_iotests/$seq.out.nocache" fi - if [ ! -f $reference ] + if [ ! -f "$reference" ] then echo " - no qualified output" err=true else - if diff -w $reference $tmp.out >/dev/null 2>&1 + if diff -w "$reference" $tmp.out >/dev/null 2>&1 then echo "" if $err @@ -264,7 +344,7 @@ do else echo " - output mismatch (see $seq.out.bad)" mv $tmp.out $seq.out.bad - $diff -w $reference $seq.out.bad + $diff -w "$reference" $seq.out.bad err=true fi fi diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 0aaf84d015..e4083f4212 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -25,8 +25,7 @@ _setenvironment() export MSGVERB } -here=`pwd` -rm -f $here/$iam.out +rm -f "$OUTPUT_DIR/$iam.out" _setenvironment check=${check-true} @@ -59,7 +58,7 @@ do if $group then # arg after -g - group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ + group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ s/ .*//p }'` if [ -z "$group_list" ] @@ -84,7 +83,7 @@ s/ .*//p then # arg after -x [ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null - group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ + group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{ s/ .*//p }'` if [ -z "$group_list" ] @@ -366,7 +365,7 @@ testlist options BEGIN { for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \ | while read id do - if grep -s "^$id " group >/dev/null + if grep -s "^$id " "$source_iotests/group" >/dev/null then # in group file ... OK echo $id >>$tmp.list @@ -402,7 +401,7 @@ else touch $tmp.list else # no test numbers, do everything from group file - sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <group >$tmp.list + sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <"$source_iotests/group" >$tmp.list fi fi diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index d90a8bca8b..bd6790be63 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -126,7 +126,7 @@ fi export TEST_DIR if [ -z "$SAMPLE_IMG_DIR" ]; then - SAMPLE_IMG_DIR=`pwd`/sample_images + SAMPLE_IMG_DIR="$source_iotests/sample_images" fi if [ ! -d "$SAMPLE_IMG_DIR" ]; then diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 195c5646aa..e0ea7e3a7d 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -318,9 +318,9 @@ _do() status=1; exit fi - (eval "echo '---' \"$_cmd\"") >>$here/$seq.full + (eval "echo '---' \"$_cmd\"") >>"$OUTPUT_DIR/$seq.full" (eval "$_cmd") >$tmp._out 2>&1; ret=$? - cat $tmp._out >>$here/$seq.full + cat $tmp._out >>"$OUTPUT_DIR/$seq.full" if [ $# -eq 2 ]; then if [ $ret -eq 0 ]; then echo "done" @@ -344,7 +344,7 @@ _do() # _notrun() { - echo "$*" >$seq.notrun + echo "$*" >"$OUTPUT_DIR/$seq.notrun" echo "$seq not run: $*" status=0 exit @@ -354,7 +354,7 @@ _notrun() # _fail() { - echo "$*" | tee -a $here/$seq.full + echo "$*" | tee -a "$OUTPUT_DIR/$seq.full" echo "(see $seq.full for details)" status=1 exit 1 diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 0f074403ae..e3dc4e8754 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -99,3 +99,4 @@ 090 rw auto quick 091 rw auto 092 rw auto quick +095 rw auto diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index f6c437c0c3..39a4cfcf4d 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -37,6 +37,7 @@ qemu_args = os.environ.get('QEMU', 'qemu').strip().split(' ') imgfmt = os.environ.get('IMGFMT', 'raw') imgproto = os.environ.get('IMGPROTO', 'file') test_dir = os.environ.get('TEST_DIR', '/var/tmp') +output_dir = os.environ.get('OUTPUT_DIR', '.') cachemode = os.environ.get('CACHEMODE') socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper') @@ -278,7 +279,7 @@ def notrun(reason): # Each test in qemu-iotests has a number ("seq") seq = os.path.basename(sys.argv[0]) - open('%s.notrun' % seq, 'wb').write(reason + '\n') + open('%s/%s.notrun' % (output_dir, seq), 'wb').write(reason + '\n') print '%s not run: %s' % (seq, reason) sys.exit(0) diff --git a/trace-events b/trace-events index ba01ad52cf..d071b97dd7 100644 --- a/trace-events +++ b/trace-events @@ -1169,12 +1169,13 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride, qxl_render_update_area_done(void *cookie) "%p" # hw/ppc/spapr_pci.c -spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)" +spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)" spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64 -spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u" +spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u" spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u" spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u" spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u" +spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u" # hw/intc/xics.c xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x" @@ -1188,6 +1189,12 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]" xics_ics_eoi(int nr) "ics_eoi: irq %#x" +xics_alloc(int src, int irq) "source#%d, irq %d" +xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use" +xics_alloc_failed_no_left(int src) "source#%d, no irq left" +xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d" +xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs" +xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free" # hw/ppc/spapr.c spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes" diff --git a/util/qemu-option.c b/util/qemu-option.c index 43de3add29..6dc27ce04f 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -1111,6 +1111,7 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, size_t num_opts, num_dst_opts; QemuOptDesc *desc; bool need_init = false; + bool need_head_update; if (!list) { return dst; @@ -1121,6 +1122,12 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, */ if (!dst) { need_init = true; + need_head_update = true; + } else { + /* Moreover, even if dst is not NULL, the realloc may move it to a + * different address in which case we may get a stale tail pointer + * in dst->head. */ + need_head_update = QTAILQ_EMPTY(&dst->head); } num_opts = count_opts_list(dst); @@ -1131,9 +1138,11 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst, if (need_init) { dst->name = NULL; dst->implied_opt_name = NULL; - QTAILQ_INIT(&dst->head); dst->merge_lists = false; } + if (need_head_update) { + QTAILQ_INIT(&dst->head); + } dst->desc[num_dst_opts].name = NULL; /* append list->desc to dst->desc */ @@ -43,11 +43,18 @@ static int vmstate_size(void *opaque, VMStateField *field) return size; } -static void *vmstate_base_addr(void *opaque, VMStateField *field) +static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc) { void *base_addr = opaque + field->offset; if (field->flags & VMS_POINTER) { + if (alloc && (field->flags & VMS_ALLOC)) { + int n_elems = vmstate_n_elems(opaque, field); + if (n_elems) { + gsize size = n_elems * field->size; + *((void **)base_addr + field->start) = g_malloc(size); + } + } base_addr = *(void **)base_addr + field->start; } @@ -81,7 +88,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, field->field_exists(opaque, version_id)) || (!field->field_exists && field->version_id <= version_id)) { - void *base_addr = vmstate_base_addr(opaque, field); + void *base_addr = vmstate_base_addr(opaque, field, true); int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); @@ -135,7 +142,7 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { - void *base_addr = vmstate_base_addr(opaque, field); + void *base_addr = vmstate_base_addr(opaque, field, false); int i, n_elems = vmstate_n_elems(opaque, field); int size = vmstate_size(opaque, field); |