aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile2
-rw-r--r--block.c316
-rw-r--r--block/cow.c1
-rw-r--r--block/mirror.c71
-rw-r--r--block/nfs.c22
-rw-r--r--block/qapi.c6
-rw-r--r--block/qcow.c1
-rw-r--r--block/qcow2.c1
-rw-r--r--block/qed.c1
-rw-r--r--block/quorum.c97
-rw-r--r--block/vmdk.c1
-rw-r--r--blockdev.c47
-rw-r--r--blockjob.c22
-rwxr-xr-xconfigure12
-rw-r--r--docs/qapi-code-gen.txt8
-rw-r--r--docs/qmp/qmp-events.txt559
-rw-r--r--hmp.c1
-rw-r--r--hw/block/dataplane/virtio-blk.c246
-rw-r--r--hw/block/virtio-blk.c139
-rw-r--r--hw/char/virtio-console.c12
-rw-r--r--hw/core/qdev-properties-system.c3
-rw-r--r--hw/intc/xics.c182
-rw-r--r--hw/intc/xics_kvm.c34
-rw-r--r--hw/misc/vfio.c78
-rw-r--r--hw/net/eepro100.c4
-rw-r--r--hw/net/virtio-net.c2
-rw-r--r--hw/nvram/spapr_nvram.c4
-rw-r--r--hw/pci-host/uninorth.c2
-rw-r--r--hw/ppc/Makefile.objs3
-rw-r--r--hw/ppc/e500.c13
-rw-r--r--hw/ppc/mac_newworld.c18
-rw-r--r--hw/ppc/spapr.c104
-rw-r--r--hw/ppc/spapr_events.c5
-rw-r--r--hw/ppc/spapr_iommu.c7
-rw-r--r--hw/ppc/spapr_pci.c217
-rw-r--r--hw/ppc/spapr_pci_vfio.c102
-rw-r--r--hw/ppc/spapr_rtas.c97
-rw-r--r--hw/ppc/spapr_vio.c9
-rw-r--r--hw/watchdog/watchdog.c2
-rw-r--r--hw/xtensa/Makefile.objs4
-rw-r--r--hw/xtensa/bootparam.h49
-rw-r--r--hw/xtensa/sim.c (renamed from hw/xtensa/xtensa_sim.c)6
-rw-r--r--hw/xtensa/xtensa_bootparam.h25
-rw-r--r--hw/xtensa/xtfpga.c (renamed from hw/xtensa/xtensa_lx60.c)128
-rw-r--r--include/block/block.h6
-rw-r--r--include/block/block_int.h6
-rw-r--r--include/block/blockjob.h10
-rw-r--r--include/block/qapi.h1
-rw-r--r--include/hw/misc/vfio.h9
-rw-r--r--include/hw/pci-host/spapr.h32
-rw-r--r--include/hw/ppc/spapr.h82
-rw-r--r--include/hw/ppc/xics.h9
-rw-r--r--include/hw/virtio/virtio-blk.h23
-rw-r--r--include/migration/vmstate.h11
-rw-r--r--include/net/net.h2
-rw-r--r--linux-user/elfload.c44
-rw-r--r--monitor.c3
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/clients.h2
-rw-r--r--net/l2tpv3.c757
-rw-r--r--net/net.c10
-rw-r--r--qapi-schema.json80
-rw-r--r--qapi/block-core.json33
-rw-r--r--qapi/event.json (renamed from qapi-event.json)18
-rw-r--r--qemu-bridge-helper.c9
-rw-r--r--qemu-char.c1
-rw-r--r--qemu-options.hx82
-rw-r--r--qmp-commands.hx24
-rw-r--r--scripts/qapi-event.py5
-rw-r--r--scripts/qapi.py2
-rw-r--r--target-ppc/cpu-models.c3
-rw-r--r--target-ppc/cpu-models.h7
-rw-r--r--target-ppc/cpu-qom.h2
-rw-r--r--target-ppc/cpu.h2
-rw-r--r--target-ppc/gdbstub.c137
-rw-r--r--target-ppc/kvm.c7
-rw-r--r--target-ppc/kvm_ppc.h6
-rw-r--r--target-ppc/translate.c13
-rw-r--r--target-ppc/translate_init.c28
-rw-r--r--tests/qapi-schema/event-nest-struct.err2
-rwxr-xr-xtests/qemu-iotests/0318
-rwxr-xr-xtests/qemu-iotests/0366
-rwxr-xr-xtests/qemu-iotests/03918
-rwxr-xr-xtests/qemu-iotests/04012
-rw-r--r--tests/qemu-iotests/040.out4
-rwxr-xr-xtests/qemu-iotests/041207
-rw-r--r--tests/qemu-iotests/041.out4
-rwxr-xr-xtests/qemu-iotests/0516
-rw-r--r--tests/qemu-iotests/051.out14
-rwxr-xr-xtests/qemu-iotests/0542
-rwxr-xr-xtests/qemu-iotests/06020
-rwxr-xr-xtests/qemu-iotests/06124
-rwxr-xr-xtests/qemu-iotests/0652
-rwxr-xr-xtests/qemu-iotests/08115
-rw-r--r--tests/qemu-iotests/081.out10
-rwxr-xr-xtests/qemu-iotests/08310
-rwxr-xr-xtests/qemu-iotests/09586
-rw-r--r--tests/qemu-iotests/095.out31
-rwxr-xr-xtests/qemu-iotests/check110
-rw-r--r--tests/qemu-iotests/common11
-rw-r--r--tests/qemu-iotests/common.config2
-rw-r--r--tests/qemu-iotests/common.rc8
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/qemu-iotests/iotests.py3
-rw-r--r--trace-events11
-rw-r--r--util/qemu-option.c11
-rw-r--r--vmstate.c13
108 files changed, 3816 insertions, 966 deletions
diff --git a/.gitignore b/.gitignore
index c658613560..2286d0a109 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@
/qapi-generated
/qapi-types.[ch]
/qapi-visit.[ch]
+/qapi-event.[ch]
/qmp-commands.h
/qmp-marshal.c
/qemu-doc.html
diff --git a/Makefile b/Makefile
index 145adb68a2..1eea0c418f 100644
--- a/Makefile
+++ b/Makefile
@@ -248,7 +248,7 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
$(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
- $(SRC_PATH)/qapi-event.json
+ $(SRC_PATH)/qapi/event.json
qapi-types.c qapi-types.h :\
$(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
diff --git a/block.c b/block.c
index 217f523dd0..6856c18aa8 100644
--- a/block.c
+++ b/block.c
@@ -831,7 +831,7 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
* Clear flags that are internal to the block layer before opening the
* image.
*/
- open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+ open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
/*
* Snapshots should be writable.
@@ -928,6 +928,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
bs->zero_beyond_eof = true;
open_flags = bdrv_open_flags(bs, flags);
bs->read_only = !(open_flags & BDRV_O_RDWR);
+ bs->growable = !!(flags & BDRV_O_PROTOCOL);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
error_setg(errp,
@@ -1005,94 +1006,124 @@ free_and_fail:
return ret;
}
+static QDict *parse_json_filename(const char *filename, Error **errp)
+{
+ QObject *options_obj;
+ QDict *options;
+ int ret;
+
+ ret = strstart(filename, "json:", &filename);
+ assert(ret);
+
+ options_obj = qobject_from_json(filename);
+ if (!options_obj) {
+ error_setg(errp, "Could not parse the JSON options");
+ return NULL;
+ }
+
+ if (qobject_type(options_obj) != QTYPE_QDICT) {
+ qobject_decref(options_obj);
+ error_setg(errp, "Invalid JSON object given");
+ return NULL;
+ }
+
+ options = qobject_to_qdict(options_obj);
+ qdict_flatten(options);
+
+ return options;
+}
+
/*
- * Opens a file using a protocol (file, host_device, nbd, ...)
- *
- * options is an indirect pointer to a QDict of options to pass to the block
- * drivers, or pointer to NULL for an empty set of options. If this function
- * takes ownership of the QDict reference, it will set *options to NULL;
- * otherwise, it will contain unused/unrecognized options after this function
- * returns. Then, the caller is responsible for freeing it. If it intends to
- * reuse the QDict, QINCREF() should be called beforehand.
+ * Fills in default options for opening images and converts the legacy
+ * filename/flags pair to option QDict entries.
*/
-static int bdrv_file_open(BlockDriverState *bs, const char *filename,
- QDict **options, int flags, Error **errp)
+static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
+ BlockDriver *drv, Error **errp)
{
- BlockDriver *drv;
+ const char *filename = *pfilename;
const char *drvname;
+ bool protocol = flags & BDRV_O_PROTOCOL;
bool parse_filename = false;
Error *local_err = NULL;
- int ret;
+
+ /* Parse json: pseudo-protocol */
+ if (filename && g_str_has_prefix(filename, "json:")) {
+ QDict *json_options = parse_json_filename(filename, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
+ }
+
+ /* Options given in the filename have lower priority than options
+ * specified directly */
+ qdict_join(*options, json_options, false);
+ QDECREF(json_options);
+ *pfilename = filename = NULL;
+ }
/* Fetch the file name from the options QDict if necessary */
- if (!filename) {
- filename = qdict_get_try_str(*options, "filename");
- } else if (filename && !qdict_haskey(*options, "filename")) {
- qdict_put(*options, "filename", qstring_from_str(filename));
- parse_filename = true;
- } else {
- error_setg(errp, "Can't specify 'file' and 'filename' options at the "
- "same time");
- ret = -EINVAL;
- goto fail;
+ if (protocol && filename) {
+ if (!qdict_haskey(*options, "filename")) {
+ qdict_put(*options, "filename", qstring_from_str(filename));
+ parse_filename = true;
+ } else {
+ error_setg(errp, "Can't specify 'file' and 'filename' options at "
+ "the same time");
+ return -EINVAL;
+ }
}
/* Find the right block driver */
+ filename = qdict_get_try_str(*options, "filename");
drvname = qdict_get_try_str(*options, "driver");
- if (drvname) {
- drv = bdrv_find_format(drvname);
- if (!drv) {
- error_setg(errp, "Unknown driver '%s'", drvname);
- }
- qdict_del(*options, "driver");
- } else if (filename) {
- drv = bdrv_find_protocol(filename, parse_filename);
- if (!drv) {
- error_setg(errp, "Unknown protocol");
+
+ if (drv) {
+ if (drvname) {
+ error_setg(errp, "Driver specified twice");
+ return -EINVAL;
}
+ drvname = drv->format_name;
+ qdict_put(*options, "driver", qstring_from_str(drvname));
} else {
- error_setg(errp, "Must specify either driver or file");
- drv = NULL;
- }
+ if (!drvname && protocol) {
+ if (filename) {
+ drv = bdrv_find_protocol(filename, parse_filename);
+ if (!drv) {
+ error_setg(errp, "Unknown protocol");
+ return -EINVAL;
+ }
- if (!drv) {
- /* errp has been set already */
- ret = -ENOENT;
- goto fail;
+ drvname = drv->format_name;
+ qdict_put(*options, "driver", qstring_from_str(drvname));
+ } else {
+ error_setg(errp, "Must specify either driver or file");
+ return -EINVAL;
+ }
+ } else if (drvname) {
+ drv = bdrv_find_format(drvname);
+ if (!drv) {
+ error_setg(errp, "Unknown driver '%s'", drvname);
+ return -ENOENT;
+ }
+ }
}
- /* Parse the filename and open it */
- if (drv->bdrv_parse_filename && parse_filename) {
+ assert(drv || !protocol);
+
+ /* Driver-specific filename parsing */
+ if (drv && drv->bdrv_parse_filename && parse_filename) {
drv->bdrv_parse_filename(filename, *options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
+ return -EINVAL;
}
if (!drv->bdrv_needs_filename) {
qdict_del(*options, "filename");
- } else {
- filename = qdict_get_str(*options, "filename");
}
}
- if (!drv->bdrv_file_open) {
- ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
- *options = NULL;
- } else {
- ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
- }
- if (ret < 0) {
- error_propagate(errp, local_err);
- goto fail;
- }
-
- bs->growable = 1;
return 0;
-
-fail:
- return ret;
}
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
@@ -1162,6 +1193,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
}
+ if (!bs->drv || !bs->drv->supports_backing) {
+ ret = -EINVAL;
+ error_setg(errp, "Driver doesn't support backing files");
+ QDECREF(options);
+ goto free_exit;
+ }
+
backing_hd = bdrv_new("", errp);
if (bs->backing_format[0] != '\0') {
@@ -1240,7 +1278,7 @@ done:
return ret;
}
-void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
{
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char *tmp_filename = g_malloc0(PATH_MAX + 1);
@@ -1258,6 +1296,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
/* Get the required size from the image */
total_size = bdrv_getlength(bs);
if (total_size < 0) {
+ ret = total_size;
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
@@ -1304,33 +1343,7 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
out:
g_free(tmp_filename);
-}
-
-static QDict *parse_json_filename(const char *filename, Error **errp)
-{
- QObject *options_obj;
- QDict *options;
- int ret;
-
- ret = strstart(filename, "json:", &filename);
- assert(ret);
-
- options_obj = qobject_from_json(filename);
- if (!options_obj) {
- error_setg(errp, "Could not parse the JSON options");
- return NULL;
- }
-
- if (qobject_type(options_obj) != QTYPE_QDICT) {
- qobject_decref(options_obj);
- error_setg(errp, "Invalid JSON object given");
- return NULL;
- }
-
- options = qobject_to_qdict(options_obj);
- qdict_flatten(options);
-
- return options;
+ return ret;
}
/*
@@ -1396,77 +1409,62 @@ int bdrv_open(BlockDriverState **pbs, const char *filename,
options = qdict_new();
}
- if (filename && g_str_has_prefix(filename, "json:")) {
- QDict *json_options = parse_json_filename(filename, &local_err);
- if (local_err) {
- ret = -EINVAL;
- goto fail;
- }
-
- /* Options given in the filename have lower priority than options
- * specified directly */
- qdict_join(options, json_options, false);
- QDECREF(json_options);
- filename = NULL;
- }
-
- bs->options = options;
- options = qdict_clone_shallow(options);
-
- if (flags & BDRV_O_PROTOCOL) {
- assert(!drv);
- ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
- &local_err);
- if (!ret) {
- drv = bs->drv;
- goto done;
- } else if (bs->drv) {
- goto close_and_fail;
- } else {
- goto fail;
- }
- }
-
- /* Open image file without format layer */
- if (flags & BDRV_O_RDWR) {
- flags |= BDRV_O_ALLOW_RDWR;
- }
- if (flags & BDRV_O_SNAPSHOT) {
- snapshot_flags = bdrv_temp_snapshot_flags(flags);
- flags = bdrv_backing_flags(flags);
- }
-
- assert(file == NULL);
- ret = bdrv_open_image(&file, filename, options, "file",
- bdrv_inherited_flags(flags),
- true, &local_err);
- if (ret < 0) {
+ ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
+ if (local_err) {
goto fail;
}
/* Find the right image format driver */
+ drv = NULL;
drvname = qdict_get_try_str(options, "driver");
if (drvname) {
drv = bdrv_find_format(drvname);
qdict_del(options, "driver");
if (!drv) {
- error_setg(errp, "Invalid driver: '%s'", drvname);
+ error_setg(errp, "Unknown driver: '%s'", drvname);
ret = -EINVAL;
goto fail;
}
}
- if (!drv) {
- if (file) {
- ret = find_image_format(file, filename, &drv, &local_err);
- } else {
- error_setg(errp, "Must specify either driver or file");
- ret = -EINVAL;
+ assert(drvname || !(flags & BDRV_O_PROTOCOL));
+ if (drv && !drv->bdrv_file_open) {
+ /* If the user explicitly wants a format driver here, we'll need to add
+ * another layer for the protocol in bs->file */
+ flags &= ~BDRV_O_PROTOCOL;
+ }
+
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* Open image file without format layer */
+ if ((flags & BDRV_O_PROTOCOL) == 0) {
+ if (flags & BDRV_O_RDWR) {
+ flags |= BDRV_O_ALLOW_RDWR;
+ }
+ if (flags & BDRV_O_SNAPSHOT) {
+ snapshot_flags = bdrv_temp_snapshot_flags(flags);
+ flags = bdrv_backing_flags(flags);
+ }
+
+ assert(file == NULL);
+ ret = bdrv_open_image(&file, filename, options, "file",
+ bdrv_inherited_flags(flags),
+ true, &local_err);
+ if (ret < 0) {
goto fail;
}
}
- if (!drv) {
+ /* Image format probing */
+ if (!drv && file) {
+ ret = find_image_format(file, filename, &drv, &local_err);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else if (!drv) {
+ error_setg(errp, "Must specify either driver or file");
+ ret = -EINVAL;
goto fail;
}
@@ -1495,15 +1493,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename,
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
* temporary snapshot afterwards. */
if (snapshot_flags) {
- bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+ ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
if (local_err) {
- error_propagate(errp, local_err);
goto close_and_fail;
}
}
-
-done:
/* Check if any unknown options were used */
if (options && (qdict_size(options) != 0)) {
const QDictEntry *entry = qdict_first(options);
@@ -3489,9 +3484,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
return -ENOTSUP;
if (bs->read_only)
return -EACCES;
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
- return -EBUSY;
- }
+
ret = drv->bdrv_truncate(bs, offset);
if (ret == 0) {
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
@@ -5774,3 +5767,28 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate)
return false;
}
+
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
+{
+ BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
+ if (!to_replace_bs) {
+ error_setg(errp, "Node name '%s' not found", node_name);
+ return NULL;
+ }
+
+ if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
+ return NULL;
+ }
+
+ /* We don't want arbitrary node of the BDS chain to be replaced only the top
+ * most non filter in order to prevent data corruption.
+ * Another benefit is that this tests exclude backing files which are
+ * blocked by the backing blockers.
+ */
+ if (!bdrv_is_first_non_filter(to_replace_bs)) {
+ error_setg(errp, "Only top most non filter can be replaced");
+ return NULL;
+ }
+
+ return to_replace_bs;
+}
diff --git a/block/cow.c b/block/cow.c
index a05a92cada..8f81ee6d56 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -414,6 +414,7 @@ static BlockDriver bdrv_cow = {
.bdrv_close = cow_close,
.bdrv_create = cow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .supports_backing = true,
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
diff --git a/block/mirror.c b/block/mirror.c
index 301a04de8e..6c3ee7041c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
RateLimit limit;
BlockDriverState *target;
BlockDriverState *base;
+ /* The name of the graph node to replace */
+ char *replaces;
+ /* The BDS to replace */
+ BlockDriverState *to_replace;
+ /* Used to block operations on the drive-mirror-replace target */
+ Error *replace_blocker;
bool is_none_mode;
BlockdevOnError on_source_error, on_target_error;
bool synced;
@@ -324,9 +330,18 @@ static void coroutine_fn mirror_run(void *opaque)
}
s->common.len = bdrv_getlength(bs);
- if (s->common.len <= 0) {
+ if (s->common.len < 0) {
ret = s->common.len;
goto immediate_exit;
+ } else if (s->common.len == 0) {
+ /* Report BLOCK_JOB_READY and wait for complete. */
+ block_job_event_ready(&s->common);
+ s->synced = true;
+ while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
+ block_job_yield(&s->common);
+ }
+ s->common.cancelled = false;
+ goto immediate_exit;
}
length = DIV_ROUND_UP(s->common.len, s->granularity);
@@ -491,10 +506,14 @@ immediate_exit:
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
bdrv_iostatus_disable(s->target);
if (s->should_complete && ret == 0) {
- if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
- bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+ BlockDriverState *to_replace = s->common.bs;
+ if (s->to_replace) {
+ to_replace = s->to_replace;
}
- bdrv_swap(s->target, s->common.bs);
+ if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+ bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+ }
+ bdrv_swap(s->target, to_replace);
if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
/* drop the bs loop chain formed by the swap: break the loop then
* trigger the unref from the top one */
@@ -503,6 +522,12 @@ immediate_exit:
bdrv_unref(p);
}
}
+ if (s->to_replace) {
+ bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+ error_free(s->replace_blocker);
+ bdrv_unref(s->to_replace);
+ }
+ g_free(s->replaces);
bdrv_unref(s->target);
block_job_completed(&s->common, ret);
}
@@ -541,6 +566,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
return;
}
+ /* check the target bs is not blocked and block all operations on it */
+ if (s->replaces) {
+ s->to_replace = check_to_replace_node(s->replaces, &local_err);
+ if (!s->to_replace) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ error_setg(&s->replace_blocker,
+ "block device is in use by block-job-complete");
+ bdrv_op_block_all(s->to_replace, s->replace_blocker);
+ bdrv_ref(s->to_replace);
+ }
+
s->should_complete = true;
block_job_resume(job);
}
@@ -563,14 +602,15 @@ static const BlockJobDriver commit_active_job_driver = {
};
static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, int64_t granularity,
- int64_t buf_size,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp,
- const BlockJobDriver *driver,
- bool is_none_mode, BlockDriverState *base)
+ const char *replaces,
+ int64_t speed, int64_t granularity,
+ int64_t buf_size,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp,
+ const BlockJobDriver *driver,
+ bool is_none_mode, BlockDriverState *base)
{
MirrorBlockJob *s;
@@ -601,6 +641,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
return;
}
+ s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
s->target = target;
@@ -622,6 +663,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
}
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ const char *replaces,
int64_t speed, int64_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
@@ -633,7 +675,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
- mirror_start_job(bs, target, speed, granularity, buf_size,
+ mirror_start_job(bs, target, replaces,
+ speed, granularity, buf_size,
on_source_error, on_target_error, cb, opaque, errp,
&mirror_job_driver, is_none_mode, base);
}
@@ -681,7 +724,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
}
bdrv_ref(base);
- mirror_start_job(bs, base, speed, 0, 0,
+ mirror_start_job(bs, base, NULL, speed, 0, 0,
on_error, on_error, cb, opaque, &local_err,
&commit_active_job_driver, false, base);
if (local_err) {
diff --git a/block/nfs.c b/block/nfs.c
index ec43201817..8439e0d389 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -304,17 +304,27 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
qp = query_params_parse(uri->query);
for (i = 0; i < qp->n; i++) {
+ unsigned long long val;
if (!qp->p[i].value) {
error_setg(errp, "Value for NFS parameter expected: %s",
qp->p[i].name);
goto fail;
}
- if (!strncmp(qp->p[i].name, "uid", 3)) {
- nfs_set_uid(client->context, atoi(qp->p[i].value));
- } else if (!strncmp(qp->p[i].name, "gid", 3)) {
- nfs_set_gid(client->context, atoi(qp->p[i].value));
- } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
- nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+ if (parse_uint_full(qp->p[i].value, &val, 0)) {
+ error_setg(errp, "Illegal value for NFS parameter: %s",
+ qp->p[i].name);
+ goto fail;
+ }
+ if (!strcmp(qp->p[i].name, "uid")) {
+ nfs_set_uid(client->context, val);
+ } else if (!strcmp(qp->p[i].name, "gid")) {
+ nfs_set_gid(client->context, val);
+ } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+ nfs_set_tcp_syncnt(client->context, val);
+#ifdef LIBNFS_FEATURE_READAHEAD
+ } else if (!strcmp(qp->p[i].name, "readahead")) {
+ nfs_set_readahead(client->context, val);
+#endif
} else {
error_setg(errp, "Unknown NFS parameter name: %s",
qp->p[i].name);
diff --git a/block/qapi.c b/block/qapi.c
index 97e16418ef..f44f6b4012 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -293,7 +293,7 @@ void bdrv_query_info(BlockDriverState *bs,
qapi_free_BlockInfo(info);
}
-BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
{
BlockStats *s;
@@ -360,7 +360,11 @@ BlockStatsList *qmp_query_blockstats(Error **errp)
while ((bs = bdrv_next(bs))) {
BlockStatsList *info = g_malloc0(sizeof(*info));
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
info->value = bdrv_query_stats(bs);
+ aio_context_release(ctx);
*p_next = info;
p_next = &info->next;
diff --git a/block/qcow.c b/block/qcow.c
index 1f2bac8a5f..a874056cf3 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -941,6 +941,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_reopen_prepare = qcow_reopen_prepare,
.bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .supports_backing = true,
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
diff --git a/block/qcow2.c b/block/qcow2.c
index b9d2fa6632..67e55c9667 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2418,6 +2418,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_save_vmstate = qcow2_save_vmstate,
.bdrv_load_vmstate = qcow2_load_vmstate,
+ .supports_backing = true,
.bdrv_change_backing_file = qcow2_change_backing_file,
.bdrv_refresh_limits = qcow2_refresh_limits,
diff --git a/block/qed.c b/block/qed.c
index 092e6fb1d2..eddae929eb 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1652,6 +1652,7 @@ static BlockDriver bdrv_qed = {
.format_name = "qed",
.instance_size = sizeof(BDRVQEDState),
.create_opts = &qed_create_opts,
+ .supports_backing = true,
.bdrv_probe = bdrv_qed_probe,
.bdrv_rebind = bdrv_qed_rebind,
diff --git a/block/quorum.c b/block/quorum.c
index 86802d306b..d5ee9c0059 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -23,6 +23,7 @@
#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
#define QUORUM_OPT_BLKVERIFY "blkverify"
+#define QUORUM_OPT_REWRITE "rewrite-corrupted"
/* This union holds a vote hash value */
typedef union QuorumVoteValue {
@@ -70,6 +71,9 @@ typedef struct BDRVQuorumState {
* It is useful to debug other block drivers by
* comparing them with a reference one.
*/
+ bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+ * block if Quorum is reached.
+ */
} BDRVQuorumState;
typedef struct QuorumAIOCB QuorumAIOCB;
@@ -105,13 +109,17 @@ struct QuorumAIOCB {
int count; /* number of completed AIOCB */
int success_count; /* number of successfully completed AIOCB */
+ int rewrite_count; /* number of replica to rewrite: count down to
+ * zero once writes are fired
+ */
+
QuorumVotes votes;
bool is_read;
int vote_ret;
};
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
{
@@ -183,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
acb->count = 0;
acb->success_count = 0;
+ acb->rewrite_count = 0;
acb->votes.compare = quorum_sha256_compare;
QLIST_INIT(&acb->votes.vote_list);
acb->is_read = false;
@@ -232,11 +241,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
return false;
}
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+ QuorumAIOCB *acb = opaque;
+
+ /* one less rewrite to do */
+ acb->rewrite_count--;
+
+ /* wait until all rewrite callbacks have completed */
+ if (acb->rewrite_count) {
+ return;
+ }
+
+ quorum_aio_finalize(acb);
+}
+
static void quorum_aio_cb(void *opaque, int ret)
{
QuorumChildRequest *sacb = opaque;
QuorumAIOCB *acb = sacb->parent;
BDRVQuorumState *s = acb->common.bs->opaque;
+ bool rewrite = false;
sacb->ret = ret;
acb->count++;
@@ -253,12 +278,15 @@ static void quorum_aio_cb(void *opaque, int ret)
/* Do the vote on read */
if (acb->is_read) {
- quorum_vote(acb);
+ rewrite = quorum_vote(acb);
} else {
quorum_has_too_much_io_failed(acb);
}
- quorum_aio_finalize(acb);
+ /* if no rewrite is done the code will finish right away */
+ if (!rewrite) {
+ quorum_aio_finalize(acb);
+ }
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -278,6 +306,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
}
}
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+ QuorumVoteValue *value)
+{
+ QuorumVoteVersion *version;
+ QuorumVoteItem *item;
+ int count = 0;
+
+ /* first count the number of bad versions: done first to avoid concurrency
+ * issues.
+ */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ count++;
+ }
+ }
+
+ /* quorum_rewrite_aio_cb will count down this to zero */
+ acb->rewrite_count = count;
+
+ /* now fire the correcting rewrites */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+ acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ }
+ }
+
+ /* return true if any rewrite is done else false */
+ return count;
+}
+
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
{
int i;
@@ -468,16 +533,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
return ret;
}
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
{
bool quorum = true;
+ bool rewrite = false;
int i, j, ret;
QuorumVoteValue hash;
BDRVQuorumState *s = acb->common.bs->opaque;
QuorumVoteVersion *winner;
if (quorum_has_too_much_io_failed(acb)) {
- return;
+ return false;
}
/* get the index of the first successful read */
@@ -505,7 +571,7 @@ static void quorum_vote(QuorumAIOCB *acb)
/* Every successful read agrees */
if (quorum) {
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
- return;
+ return false;
}
/* compute hashes for each successful read, also store indexes */
@@ -538,9 +604,15 @@ static void quorum_vote(QuorumAIOCB *acb)
/* some versions are bad print them */
quorum_report_bad_versions(s, acb, &winner->value);
+ /* corruption correction is enabled */
+ if (s->rewrite_corrupted) {
+ rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+ }
+
free_exit:
/* free lists */
quorum_free_vote_list(&acb->votes);
+ return rewrite;
}
static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -705,6 +777,11 @@ static QemuOptsList quorum_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Trigger block verify mode if set",
},
+ {
+ .name = QUORUM_OPT_REWRITE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Rewrite corrupted block on read quorum",
+ },
{ /* end of list */ }
},
};
@@ -766,6 +843,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
"and using two files with vote_threshold=2\n");
}
+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+ if (s->rewrite_corrupted && s->is_blkverify) {
+ error_setg(&local_err,
+ "rewrite-corrupted=on cannot be used with blkverify=on");
+ ret = -EINVAL;
+ goto exit;
+ }
+
/* allocate the children BlockDriverState array */
s->bs = g_new0(BlockDriverState *, s->num_children);
opened = g_new0(bool, s->num_children);
diff --git a/block/vmdk.c b/block/vmdk.c
index 83dd6fe4fb..d0de0193fc 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2180,6 +2180,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_detach_aio_context = vmdk_detach_aio_context,
.bdrv_attach_aio_context = vmdk_attach_aio_context,
+ .supports_backing = true,
.create_opts = &vmdk_create_opts,
};
diff --git a/blockdev.c b/blockdev.c
index 03ab153d01..69b7c2a8c5 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1819,6 +1819,11 @@ void qmp_block_resize(bool has_device, const char *device,
return;
}
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
+ error_set(errp, QERR_DEVICE_IN_USE, device);
+ return;
+ }
+
/* complete all in-flight operations before resizing the device */
bdrv_drain_all();
@@ -2094,6 +2099,8 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
void qmp_drive_mirror(const char *device, const char *target,
bool has_format, const char *format,
+ bool has_node_name, const char *node_name,
+ bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
@@ -2107,6 +2114,7 @@ void qmp_drive_mirror(const char *device, const char *target,
BlockDriverState *source, *target_bs;
BlockDriver *drv = NULL;
Error *local_err = NULL;
+ QDict *options = NULL;
int flags;
int64_t size;
int ret;
@@ -2180,6 +2188,29 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
+ if (has_replaces) {
+ BlockDriverState *to_replace_bs;
+
+ if (!has_node_name) {
+ error_setg(errp, "a node-name must be provided when replacing a"
+ " named node of the graph");
+ return;
+ }
+
+ to_replace_bs = check_to_replace_node(replaces, &local_err);
+
+ if (!to_replace_bs) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ if (size != bdrv_getlength(to_replace_bs)) {
+ error_setg(errp, "cannot replace image with a mirror image of "
+ "different size");
+ return;
+ }
+ }
+
if ((sync == MIRROR_SYNC_MODE_FULL || !source)
&& mode != NEW_IMAGE_MODE_EXISTING)
{
@@ -2208,18 +2239,28 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
+ if (has_node_name) {
+ options = qdict_new();
+ qdict_put(options, "node-name", qstring_from_str(node_name));
+ }
+
/* Mirroring takes care of copy-on-write using the source's backing
* file.
*/
target_bs = NULL;
- ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING,
- drv, &local_err);
+ ret = bdrv_open(&target_bs, target, NULL, options,
+ flags | BDRV_O_NO_BACKING, drv, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return;
}
- mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+ /* pass the node name to replace to mirror start since it's loose coupling
+ * and will allow to check whether the node still exist at mirror completion
+ */
+ mirror_start(bs, target_bs,
+ has_replaces ? replaces : NULL,
+ speed, granularity, buf_size, sync,
on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
diff --git a/blockjob.c b/blockjob.c
index 4da86cdfcd..67a64ea318 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -210,6 +210,20 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
job->busy = true;
}
+void block_job_yield(BlockJob *job)
+{
+ assert(job->busy);
+
+ /* Check cancellation *before* setting busy = false, too! */
+ if (block_job_is_cancelled(job)) {
+ return;
+ }
+
+ job->busy = false;
+ qemu_coroutine_yield();
+ job->busy = true;
+}
+
BlockJobInfo *block_job_query(BlockJob *job)
{
BlockJobInfo *info = g_new0(BlockJobInfo, 1);
@@ -256,7 +270,11 @@ void block_job_event_completed(BlockJob *job, const char *msg)
void block_job_event_ready(BlockJob *job)
{
- qapi_event_send_block_job_ready(bdrv_get_device_name(job->bs), &error_abort);
+ qapi_event_send_block_job_ready(job->driver->job_type,
+ bdrv_get_device_name(job->bs),
+ job->len,
+ job->offset,
+ job->speed, &error_abort);
}
BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
@@ -282,7 +300,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
default:
abort();
}
- qapi_event_send_block_job_error(bdrv_get_device_name(bs),
+ qapi_event_send_block_job_error(bdrv_get_device_name(job->bs),
is_read ? IO_OPERATION_TYPE_READ :
IO_OPERATION_TYPE_WRITE,
action, &error_abort);
diff --git a/configure b/configure
index 71029643cc..23ecb37c43 100755
--- a/configure
+++ b/configure
@@ -5273,6 +5273,18 @@ if test "$docs" = "yes" ; then
mkdir -p QMP
fi
+# set up qemu-iotests in this build directory
+iotests_common_env="tests/qemu-iotests/common.env"
+iotests_check="tests/qemu-iotests/check"
+
+echo "# Automatically generated by configure - do not modify" > "$iotests_common_env"
+echo >> "$iotests_common_env"
+echo "export PYTHON='$python'" >> "$iotests_common_env"
+
+if [ ! -e "$iotests_check" ]; then
+ symlink "$source_path/$iotests_check" "$iotests_check"
+fi
+
# Save the configure command line for later reuse.
cat <<EOD >config.status
#!/bin/sh
diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt
index 3a0c99e1da..a6197a9133 100644
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -218,10 +218,10 @@ An example command is:
=== Events ===
Events are defined with the keyword 'event'. When 'data' is also specified,
-additional info will be carried on. Finally there will be C API generated
-in qapi-event.h; when called by QEMU code, a message with timestamp will
-be emitted on the wire. If timestamp is -1, it means failure to retrieve host
-time.
+additional info will be included in the event. Finally there will be C API
+generated in qapi-event.h; when called by QEMU code, a message with timestamp
+will be emitted on the wire. If timestamp is -1, it means failure to retrieve
+host time.
An example event is:
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
new file mode 100644
index 0000000000..44be891261
--- /dev/null
+++ b/docs/qmp/qmp-events.txt
@@ -0,0 +1,559 @@
+ QEMU Machine Protocol Events
+ ============================
+
+ACPI_DEVICE_OST
+---------------
+
+Emitted when guest executes ACPI _OST method.
+
+ - data: ACPIOSTInfo type as described in qapi-schema.json
+
+{ "event": "ACPI_DEVICE_OST",
+ "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } }
+
+BALLOON_CHANGE
+--------------
+
+Emitted when the guest changes the actual BALLOON level. This
+value is equivalent to the 'actual' field return by the
+'query-balloon' command
+
+Data:
+
+- "actual": actual level of the guest memory balloon in bytes (json-number)
+
+Example:
+
+{ "event": "BALLOON_CHANGE",
+ "data": { "actual": 944766976 },
+ "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+BLOCK_IMAGE_CORRUPTED
+---------------------
+
+Emitted when a disk image is being marked corrupt.
+
+Data:
+
+- "device": Device name (json-string)
+- "msg": Informative message (e.g., reason for the corruption) (json-string)
+- "offset": If the corruption resulted from an image access, this is the access
+ offset into the image (json-int)
+- "size": If the corruption resulted from an image access, this is the access
+ size (json-int)
+
+Example:
+
+{ "event": "BLOCK_IMAGE_CORRUPTED",
+ "data": { "device": "ide0-hd0",
+ "msg": "Prevented active L1 table overwrite", "offset": 196608,
+ "size": 65536 },
+ "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+
+BLOCK_IO_ERROR
+--------------
+
+Emitted when a disk I/O error occurs.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+ "ignore": error has been ignored
+ "report": error has been reported to the device
+ "stop": the VM is going to stop because of the error
+
+Example:
+
+{ "event": "BLOCK_IO_ERROR",
+ "data": { "device": "ide0-hd1",
+ "operation": "write",
+ "action": "stop" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: If action is "stop", a STOP event will eventually follow the
+BLOCK_IO_ERROR event.
+
+BLOCK_JOB_CANCELLED
+-------------------
+
+Emitted when a block job has been cancelled.
+
+Data:
+
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
+- "device": Device name (json-string)
+- "len": Maximum progress value (json-int)
+- "offset": Current progress value (json-int)
+ On success this is equal to len.
+ On failure this is less than len.
+- "speed": Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_CANCELLED",
+ "data": { "type": "stream", "device": "virtio-disk0",
+ "len": 10737418240, "offset": 134217728,
+ "speed": 0 },
+ "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_COMPLETED
+-------------------
+
+Emitted when a block job has completed.
+
+Data:
+
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
+- "device": Device name (json-string)
+- "len": Maximum progress value (json-int)
+- "offset": Current progress value (json-int)
+ On success this is equal to len.
+ On failure this is less than len.
+- "speed": Rate limit, bytes per second (json-int)
+- "error": Error message (json-string, optional)
+ Only present on failure. This field contains a human-readable
+ error message. There are no semantics other than that streaming
+ has failed and clients should not try to interpret the error
+ string.
+
+Example:
+
+{ "event": "BLOCK_JOB_COMPLETED",
+ "data": { "type": "stream", "device": "virtio-disk0",
+ "len": 10737418240, "offset": 10737418240,
+ "speed": 0 },
+ "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+ "ignore": error has been ignored, the job may fail later
+ "report": error will be reported and the job canceled
+ "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+ "data": { "device": "ide0-hd1",
+ "operation": "write",
+ "action": "stop" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+BLOCK_JOB_READY
+---------------
+
+Emitted when a block job is ready to complete.
+
+Data:
+
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
+- "device": Device name (json-string)
+- "len": Maximum progress value (json-int)
+- "offset": Current progress value (json-int)
+ On success this is equal to len.
+ On failure this is less than len.
+- "speed": Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_READY",
+ "data": { "device": "drive0", "type": "mirror", "speed": 0,
+ "len": 2097152, "offset": 2097152 }
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
+event.
+
+DEVICE_DELETED
+--------------
+
+Emitted whenever the device removal completion is acknowledged
+by the guest.
+At this point, it's safe to reuse the specified device ID.
+Device removal can be initiated by the guest or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string, optional)
+- "path": device path (json-string)
+
+{ "event": "DEVICE_DELETED",
+ "data": { "device": "virtio-net-pci-0",
+ "path": "/machine/peripheral/virtio-net-pci-0" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+DEVICE_TRAY_MOVED
+-----------------
+
+It's emitted whenever the tray of a removable device is moved by the guest
+or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string)
+- "tray-open": true if the tray has been opened or false if it has been closed
+ (json-bool)
+
+{ "event": "DEVICE_TRAY_MOVED",
+ "data": { "device": "ide1-cd0",
+ "tray-open": true
+ },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+GUEST_PANICKED
+--------------
+
+Emitted when guest OS panic is detected.
+
+Data:
+
+- "action": Action that has been taken (json-string, currently always "pause").
+
+Example:
+
+{ "event": "GUEST_PANICKED",
+ "data": { "action": "pause" } }
+
+NIC_RX_FILTER_CHANGED
+---------------------
+
+The event is emitted once until the query command is executed,
+the first event will always be emitted.
+
+Data:
+
+- "name": net client name (json-string)
+- "path": device path (json-string)
+
+{ "event": "NIC_RX_FILTER_CHANGED",
+ "data": { "name": "vnet0",
+ "path": "/machine/peripheral/vnet0/virtio-backend" },
+ "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
+}
+
+QUORUM_FAILURE
+--------------
+
+Emitted by the Quorum block driver if it fails to establish a quorum.
+
+Data:
+
+- "reference": device name if defined else node name.
+- "sector-num": Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_FAILURE",
+ "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
+ "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+QUORUM_REPORT_BAD
+-----------------
+
+Emitted to report a corruption of a Quorum file.
+
+Data:
+
+- "error": Error message (json-string, optional)
+ Only present on failure. This field contains a human-readable
+ error message. There are no semantics other than that the
+ block layer reported an error and clients should not try to
+ interpret the error string.
+- "node-name": The graph node name of the block driver state.
+- "sector-num": Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_REPORT_BAD",
+ "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
+ "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+RESET
+-----
+
+Emitted when the Virtual Machine is reseted.
+
+Data: None.
+
+Example:
+
+{ "event": "RESET",
+ "timestamp": { "seconds": 1267041653, "microseconds": 9518 } }
+
+RESUME
+------
+
+Emitted when the Virtual Machine resumes execution.
+
+Data: None.
+
+Example:
+
+{ "event": "RESUME",
+ "timestamp": { "seconds": 1271770767, "microseconds": 582542 } }
+
+RTC_CHANGE
+----------
+
+Emitted when the guest changes the RTC time.
+
+Data:
+
+- "offset": Offset between base RTC clock (as specified by -rtc base), and
+new RTC clock value (json-number)
+
+Example:
+
+{ "event": "RTC_CHANGE",
+ "data": { "offset": 78 },
+ "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+SHUTDOWN
+--------
+
+Emitted when the Virtual Machine is powered down.
+
+Data: None.
+
+Example:
+
+{ "event": "SHUTDOWN",
+ "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+
+Note: If the command-line option "-no-shutdown" has been specified, a STOP
+event will eventually follow the SHUTDOWN event.
+
+SPICE_CONNECTED, SPICE_DISCONNECTED
+-----------------------------------
+
+Emitted when a SPICE client connects or disconnects.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+ "event": "SPICE_CONNECTED",
+ "data": {
+ "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+ "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+}}
+
+SPICE_INITIALIZED
+-----------------
+
+Emitted after initial handshake and authentication takes place (if any)
+and the SPICE channel is up'n'running
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "connection-id": spice connection id. All channels with the same id
+ belong to the same spice session (json-int)
+ - "channel-type": channel type. "1" is the main control channel, filter for
+ this one if you want track spice sessions only (json-int)
+ - "channel-id": channel id. Usually "0", might be different needed when
+ multiple channels of the same type exist, such as multiple
+ display channels in a multihead setup (json-int)
+ - "tls": whevener the channel is encrypted (json-bool)
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+ "event": "SPICE_INITIALIZED",
+ "data": {"server": {"auth": "spice", "port": "5921",
+ "family": "ipv4", "host": "127.0.0.1"},
+ "client": {"port": "49004", "family": "ipv4", "channel-type": 3,
+ "connection-id": 1804289383, "host": "127.0.0.1",
+ "channel-id": 0, "tls": true}
+}}
+
+STOP
+----
+
+Emitted when the Virtual Machine is stopped.
+
+Data: None.
+
+Example:
+
+{ "event": "STOP",
+ "timestamp": { "seconds": 1267041730, "microseconds": 281295 } }
+
+SUSPEND
+-------
+
+Emitted when guest enters S3 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND",
+ "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+SUSPEND_DISK
+------------
+
+Emitted when the guest makes a request to enter S4 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND_DISK",
+ "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+Note: QEMU shuts down when entering S4 state.
+
+VNC_CONNECTED
+-------------
+
+Emitted when a VNC client establishes a connection.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "event": "VNC_CONNECTED",
+ "data": {
+ "server": { "auth": "sasl", "family": "ipv4",
+ "service": "5901", "host": "0.0.0.0" },
+ "client": { "family": "ipv4", "service": "58425",
+ "host": "127.0.0.1" } },
+ "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+
+Note: This event is emitted before any authentication takes place, thus
+the authentication ID is not provided.
+
+VNC_DISCONNECTED
+----------------
+
+Emitted when the connection is closed.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "x509_dname": TLS dname (json-string, optional)
+ - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_DISCONNECTED",
+ "data": {
+ "server": { "auth": "sasl", "family": "ipv4",
+ "service": "5901", "host": "0.0.0.0" },
+ "client": { "family": "ipv4", "service": "58425",
+ "host": "127.0.0.1", "sasl_username": "luiz" } },
+ "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+VNC_INITIALIZED
+---------------
+
+Emitted after authentication takes place (if any) and the VNC session is
+made active.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "x509_dname": TLS dname (json-string, optional)
+ - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_INITIALIZED",
+ "data": {
+ "server": { "auth": "sasl", "family": "ipv4",
+ "service": "5901", "host": "0.0.0.0"},
+ "client": { "family": "ipv4", "service": "46089",
+ "host": "127.0.0.1", "sasl_username": "luiz" } },
+ "timestamp": { "seconds": 1263475302, "microseconds": 150772 } }
+
+WAKEUP
+------
+
+Emitted when the guest has woken up from S3 and is running.
+
+Data: None.
+
+Example:
+
+{ "event": "WAKEUP",
+ "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+WATCHDOG
+--------
+
+Emitted when the watchdog device's timer is expired.
+
+Data:
+
+- "action": Action that has been taken, it's one of the following (json-string):
+ "reset", "shutdown", "poweroff", "pause", "debug", or "none"
+
+Example:
+
+{ "event": "WATCHDOG",
+ "data": { "action": "reset" },
+ "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
+followed respectively by the RESET, SHUTDOWN, or STOP events.
diff --git a/hmp.c b/hmp.c
index dc3d2795d2..6429e6b447 100644
--- a/hmp.c
+++ b/hmp.c
@@ -933,6 +933,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
}
qmp_drive_mirror(device, filename, !!format, format,
+ false, NULL, false, NULL,
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
true, mode, false, 0, false, 0, false, 0,
false, 0, false, 0, &err);
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c10b7b70fb..09bd2c70ab 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -24,16 +24,6 @@
#include "hw/virtio/virtio-bus.h"
#include "qom/object_interfaces.h"
-typedef struct {
- VirtIOBlockDataPlane *s;
- QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */
- VirtQueueElement *elem; /* saved data from the virtqueue */
- QEMUIOVector qiov; /* original request iovecs */
- struct iovec bounce_iov; /* used if guest buffers are unaligned */
- QEMUIOVector bounce_qiov; /* bounce buffer iovecs */
- bool read; /* read or write? */
-} VirtIOBlockRequest;
-
struct VirtIOBlockDataPlane {
bool started;
bool starting;
@@ -57,6 +47,8 @@ struct VirtIOBlockDataPlane {
/* Operation blocker on BDS */
Error *blocker;
+ void (*saved_complete_request)(struct VirtIOBlockReq *req,
+ unsigned char status);
};
/* Raise an interrupt to signal guest, if necessary */
@@ -69,215 +61,14 @@ static void notify_guest(VirtIOBlockDataPlane *s)
event_notifier_set(s->guest_notifier);
}
-static void complete_rdwr(void *opaque, int ret)
-{
- VirtIOBlockRequest *req = opaque;
- struct virtio_blk_inhdr hdr;
- int len;
-
- if (likely(ret == 0)) {
- hdr.status = VIRTIO_BLK_S_OK;
- len = req->qiov.size;
- } else {
- hdr.status = VIRTIO_BLK_S_IOERR;
- len = 0;
- }
-
- trace_virtio_blk_data_plane_complete_request(req->s, req->elem->index, ret);
-
- if (req->read && req->bounce_iov.iov_base) {
- qemu_iovec_from_buf(&req->qiov, 0, req->bounce_iov.iov_base, len);
- }
-
- if (req->bounce_iov.iov_base) {
- qemu_vfree(req->bounce_iov.iov_base);
- }
-
- qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
- qemu_iovec_destroy(req->inhdr);
- g_slice_free(QEMUIOVector, req->inhdr);
-
- /* According to the virtio specification len should be the number of bytes
- * written to, but for virtio-blk it seems to be the number of bytes
- * transferred plus the status bytes.
- */
- vring_push(&req->s->vring, req->elem, len + sizeof(hdr));
- notify_guest(req->s);
- g_slice_free(VirtIOBlockRequest, req);
-}
-
-static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
- QEMUIOVector *inhdr, unsigned char status)
-{
- struct virtio_blk_inhdr hdr = {
- .status = status,
- };
-
- qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr));
- qemu_iovec_destroy(inhdr);
- g_slice_free(QEMUIOVector, inhdr);
-
- vring_push(&s->vring, elem, sizeof(hdr));
- notify_guest(s);
-}
-
-/* Get disk serial number */
-static void do_get_id_cmd(VirtIOBlockDataPlane *s,
- struct iovec *iov, unsigned int iov_cnt,
- VirtQueueElement *elem, QEMUIOVector *inhdr)
-{
- char id[VIRTIO_BLK_ID_BYTES];
-
- /* Serial number not NUL-terminated when longer than buffer */
- strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
- iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
- complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
-}
-
-static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
- struct iovec *iov, unsigned iov_cnt,
- int64_t sector_num, VirtQueueElement *elem,
- QEMUIOVector *inhdr)
-{
- VirtIOBlockRequest *req = g_slice_new0(VirtIOBlockRequest);
- QEMUIOVector *qiov;
- int nb_sectors;
-
- /* Fill in virtio block metadata needed for completion */
- req->s = s;
- req->elem = elem;
- req->inhdr = inhdr;
- req->read = read;
- qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
-
- qiov = &req->qiov;
-
- if (!bdrv_qiov_is_aligned(s->blk->conf.bs, qiov)) {
- void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov->size);
-
- /* Populate bounce buffer with data for writes */
- if (!read) {
- qemu_iovec_to_buf(qiov, 0, bounce_buffer, qiov->size);
- }
-
- /* Redirect I/O to aligned bounce buffer */
- req->bounce_iov.iov_base = bounce_buffer;
- req->bounce_iov.iov_len = qiov->size;
- qemu_iovec_init_external(&req->bounce_qiov, &req->bounce_iov, 1);
- qiov = &req->bounce_qiov;
- }
-
- nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
-
- if (read) {
- bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
- complete_rdwr, req);
- } else {
- bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
- complete_rdwr, req);
- }
-}
-
-static void complete_flush(void *opaque, int ret)
-{
- VirtIOBlockRequest *req = opaque;
- unsigned char status;
-
- if (ret == 0) {
- status = VIRTIO_BLK_S_OK;
- } else {
- status = VIRTIO_BLK_S_IOERR;
- }
-
- complete_request_early(req->s, req->elem, req->inhdr, status);
- g_slice_free(VirtIOBlockRequest, req);
-}
-
-static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
- QEMUIOVector *inhdr)
+static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
{
- VirtIOBlockRequest *req = g_slice_new(VirtIOBlockRequest);
- req->s = s;
- req->elem = elem;
- req->inhdr = inhdr;
+ stb_p(&req->in->status, status);
- bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
-}
-
-static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
- QEMUIOVector *inhdr)
-{
- int status;
-
- status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
- complete_request_early(s, elem, inhdr, status);
-}
-
-static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
-{
- struct iovec *iov = elem->out_sg;
- struct iovec *in_iov = elem->in_sg;
- unsigned out_num = elem->out_num;
- unsigned in_num = elem->in_num;
- struct virtio_blk_outhdr outhdr;
- QEMUIOVector *inhdr;
- size_t in_size;
-
- /* Copy in outhdr */
- if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
- sizeof(outhdr)) != sizeof(outhdr))) {
- error_report("virtio-blk request outhdr too short");
- return -EFAULT;
- }
- iov_discard_front(&iov, &out_num, sizeof(outhdr));
-
- /* Grab inhdr for later */
- in_size = iov_size(in_iov, in_num);
- if (in_size < sizeof(struct virtio_blk_inhdr)) {
- error_report("virtio_blk request inhdr too short");
- return -EFAULT;
- }
- inhdr = g_slice_new(QEMUIOVector);
- qemu_iovec_init(inhdr, 1);
- qemu_iovec_concat_iov(inhdr, in_iov, in_num,
- in_size - sizeof(struct virtio_blk_inhdr),
- sizeof(struct virtio_blk_inhdr));
- iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
-
- /* TODO Linux sets the barrier bit even when not advertised! */
- outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
-
- switch (outhdr.type) {
- case VIRTIO_BLK_T_IN:
- do_rdwr_cmd(s, true, in_iov, in_num,
- outhdr.sector * 512 / BDRV_SECTOR_SIZE,
- elem, inhdr);
- return 0;
-
- case VIRTIO_BLK_T_OUT:
- do_rdwr_cmd(s, false, iov, out_num,
- outhdr.sector * 512 / BDRV_SECTOR_SIZE,
- elem, inhdr);
- return 0;
-
- case VIRTIO_BLK_T_SCSI_CMD:
- do_scsi_cmd(s, elem, inhdr);
- return 0;
-
- case VIRTIO_BLK_T_FLUSH:
- do_flush_cmd(s, elem, inhdr);
- return 0;
-
- case VIRTIO_BLK_T_GET_ID:
- do_get_id_cmd(s, in_iov, in_num, elem, inhdr);
- return 0;
-
- default:
- error_report("virtio-blk unsupported request type %#x", outhdr.type);
- qemu_iovec_destroy(inhdr);
- g_slice_free(QEMUIOVector, inhdr);
- return -EFAULT;
- }
+ vring_push(&req->dev->dataplane->vring, req->elem,
+ req->qiov.size + sizeof(*req->in));
+ notify_guest(req->dev->dataplane);
+ g_slice_free(VirtIOBlockReq, req);
}
static void handle_notify(EventNotifier *e)
@@ -286,7 +77,11 @@ static void handle_notify(EventNotifier *e)
host_notifier);
VirtQueueElement *elem;
+ VirtIOBlockReq *req;
int ret;
+ MultiReqBuffer mrb = {
+ .num_writes = 0,
+ };
event_notifier_test_and_clear(&s->host_notifier);
for (;;) {
@@ -303,14 +98,14 @@ static void handle_notify(EventNotifier *e)
trace_virtio_blk_data_plane_process_request(s, elem->out_num,
elem->in_num, elem->index);
- if (process_request(s, elem) < 0) {
- vring_set_broken(&s->vring);
- vring_free_element(elem);
- ret = -EFAULT;
- break;
- }
+ req = g_slice_new(VirtIOBlockReq);
+ req->dev = VIRTIO_BLK(s->vdev);
+ req->elem = elem;
+ virtio_blk_handle_request(req, &mrb);
}
+ virtio_submit_multiwrite(s->blk->conf.bs, &mrb);
+
if (likely(ret == -EAGAIN)) { /* vring emptied */
/* Re-enable guest->host notifies and stop processing the vring.
* But if the guest has snuck in more descriptors, keep processing.
@@ -330,6 +125,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
Error **errp)
{
VirtIOBlockDataPlane *s;
+ VirtIOBlock *vblk = VIRTIO_BLK(vdev);
Error *local_err = NULL;
*dataplane = NULL;
@@ -372,6 +168,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
bdrv_op_block_all(blk->conf.bs, s->blocker);
*dataplane = s;
+ s->saved_complete_request = vblk->complete_request;
+ vblk->complete_request = complete_request_vring;
}
/* Context: QEMU global mutex held */
@@ -446,10 +244,12 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+ VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
if (!s->started || s->stopping) {
return;
}
s->stopping = true;
+ vblk->complete_request = s->saved_complete_request;
trace_virtio_blk_data_plane_stop(s);
aio_context_acquire(s->ctx);
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 08562ea390..a222e3f9a4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -12,6 +12,7 @@
*/
#include "qemu-common.h"
+#include "qemu/iov.h"
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/block/block.h"
@@ -27,18 +28,24 @@
#endif
#include "hw/virtio/virtio-bus.h"
-typedef struct VirtIOBlockReq
+static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
+{
+ VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
+ req->dev = s;
+ req->elem = g_slice_new0(VirtQueueElement);
+ return req;
+}
+
+static void virtio_blk_free_request(VirtIOBlockReq *req)
{
- VirtIOBlock *dev;
- VirtQueueElement elem;
- struct virtio_blk_inhdr *in;
- struct virtio_blk_outhdr *out;
- QEMUIOVector qiov;
- struct VirtIOBlockReq *next;
- BlockAcctCookie acct;
-} VirtIOBlockReq;
+ if (req) {
+ g_slice_free(VirtQueueElement, req->elem);
+ g_slice_free(VirtIOBlockReq, req);
+ }
+}
-static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
+static void virtio_blk_complete_request(VirtIOBlockReq *req,
+ unsigned char status)
{
VirtIOBlock *s = req->dev;
VirtIODevice *vdev = VIRTIO_DEVICE(s);
@@ -46,10 +53,15 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status);
- virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+ virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in));
virtio_notify(vdev, s->vq);
}
+static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
+{
+ req->dev->complete_request(req, status);
+}
+
static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
bool is_read)
{
@@ -62,7 +74,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
} else if (action == BLOCK_ERROR_ACTION_REPORT) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
bdrv_acct_done(s->bs, &req->acct);
- g_free(req);
+ virtio_blk_free_request(req);
}
bdrv_error_action(s->bs, action, is_read, error);
@@ -76,14 +88,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
trace_virtio_blk_rw_complete(req, ret);
if (ret) {
- bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+ bool is_read = !(ldl_p(&req->out.type) & VIRTIO_BLK_T_OUT);
if (virtio_blk_handle_rw_error(req, -ret, is_read))
return;
}
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
bdrv_acct_done(req->dev->bs, &req->acct);
- g_free(req);
+ virtio_blk_free_request(req);
}
static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -98,27 +110,16 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
bdrv_acct_done(req->dev->bs, &req->acct);
- g_free(req);
-}
-
-static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
-{
- VirtIOBlockReq *req = g_malloc(sizeof(*req));
- req->dev = s;
- req->qiov.size = 0;
- req->next = NULL;
- return req;
+ virtio_blk_free_request(req);
}
static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
{
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
- if (req != NULL) {
- if (!virtqueue_pop(s->vq, &req->elem)) {
- g_free(req);
- return NULL;
- }
+ if (!virtqueue_pop(s->vq, req->elem)) {
+ virtio_blk_free_request(req);
+ return NULL;
}
return req;
@@ -247,17 +248,12 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
{
int status;
- status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
+ status = virtio_blk_handle_scsi_req(req->dev, req->elem);
virtio_blk_req_complete(req, status);
- g_free(req);
+ virtio_blk_free_request(req);
}
-typedef struct MultiReqBuffer {
- BlockRequest blkreq[32];
- unsigned int num_writes;
-} MultiReqBuffer;
-
-static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
+void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
{
int i, ret;
@@ -293,7 +289,7 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
BlockRequest *blkreq;
uint64_t sector;
- sector = ldq_p(&req->out->sector);
+ sector = ldq_p(&req->out.sector);
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
@@ -327,7 +323,7 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
{
uint64_t sector;
- sector = ldq_p(&req->out->sector);
+ sector = ldq_p(&req->out.sector);
bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
@@ -346,26 +342,39 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
virtio_blk_rw_complete, req);
}
-static void virtio_blk_handle_request(VirtIOBlockReq *req,
- MultiReqBuffer *mrb)
+void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
+ struct iovec *in_iov = req->elem->in_sg;
+ struct iovec *iov = req->elem->out_sg;
+ unsigned in_num = req->elem->in_num;
+ unsigned out_num = req->elem->out_num;
- if (req->elem.out_num < 1 || req->elem.in_num < 1) {
+ if (req->elem->out_num < 1 || req->elem->in_num < 1) {
error_report("virtio-blk missing headers");
exit(1);
}
- if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
- req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
- error_report("virtio-blk header not in correct element");
+ if (unlikely(iov_to_buf(iov, out_num, 0, &req->out,
+ sizeof(req->out)) != sizeof(req->out))) {
+ error_report("virtio-blk request outhdr too short");
+ exit(1);
+ }
+
+ iov_discard_front(&iov, &out_num, sizeof(req->out));
+
+ if (in_num < 1 ||
+ in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+ error_report("virtio-blk request inhdr too short");
exit(1);
}
- req->out = (void *)req->elem.out_sg[0].iov_base;
- req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
+ req->in = (void *)in_iov[in_num - 1].iov_base
+ + in_iov[in_num - 1].iov_len
+ - sizeof(struct virtio_blk_inhdr);
+ iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
- type = ldl_p(&req->out->type);
+ type = ldl_p(&req->out.type);
if (type & VIRTIO_BLK_T_FLUSH) {
virtio_blk_handle_flush(req, mrb);
@@ -378,23 +387,23 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
* NB: per existing s/n string convention the string is
* terminated by '\0' only when shorter than buffer.
*/
- strncpy(req->elem.in_sg[0].iov_base,
+ strncpy(req->elem->in_sg[0].iov_base,
s->blk.serial ? s->blk.serial : "",
- MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
+ MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- g_free(req);
+ virtio_blk_free_request(req);
} else if (type & VIRTIO_BLK_T_OUT) {
- qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
- req->elem.out_num - 1);
+ qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1],
+ req->elem->out_num - 1);
virtio_blk_handle_write(req, mrb);
} else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
/* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
- qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
- req->elem.in_num - 1);
+ qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0],
+ req->elem->in_num - 1);
virtio_blk_handle_read(req);
} else {
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
- g_free(req);
+ virtio_blk_free_request(req);
}
}
@@ -460,7 +469,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
}
if (!s->bh) {
- s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
+ s->bh = aio_bh_new(bdrv_get_aio_context(s->blk.conf.bs),
+ virtio_blk_dma_restart_bh, s);
qemu_bh_schedule(s->bh);
}
}
@@ -609,7 +619,8 @@ static void virtio_blk_save(QEMUFile *f, void *opaque)
while (req) {
qemu_put_sbyte(f, 1);
- qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+ qemu_put_buffer(f, (unsigned char *)req->elem,
+ sizeof(VirtQueueElement));
req = req->next;
}
qemu_put_sbyte(f, 0);
@@ -631,14 +642,15 @@ static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
while (qemu_get_sbyte(f)) {
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
- qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+ qemu_get_buffer(f, (unsigned char *)req->elem,
+ sizeof(VirtQueueElement));
req->next = s->rq;
s->rq = req;
- virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
- req->elem.in_num, 1);
- virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
- req->elem.out_num, 0);
+ virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr,
+ req->elem->in_num, 1);
+ virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr,
+ req->elem->out_num, 0);
}
return 0;
@@ -729,6 +741,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+ s->complete_request = virtio_blk_complete_request;
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err);
if (err != NULL) {
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index 6c8be0fe26..54eb15f3af 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -14,6 +14,7 @@
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/virtio/virtio-serial.h"
+#include "qapi-event.h"
#define TYPE_VIRTIO_CONSOLE_SERIAL_PORT "virtserialport"
#define VIRTIO_CONSOLE(obj) \
@@ -81,11 +82,16 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
static void set_guest_connected(VirtIOSerialPort *port, int guest_connected)
{
VirtConsole *vcon = VIRTIO_CONSOLE(port);
+ DeviceState *dev = DEVICE(port);
- if (!vcon->chr) {
- return;
+ if (vcon->chr) {
+ qemu_chr_fe_set_open(vcon->chr, guest_connected);
+ }
+
+ if (dev->id) {
+ qapi_event_send_vserport_change(dev->id, guest_connected,
+ &error_abort);
}
- qemu_chr_fe_set_open(vcon->chr, guest_connected);
}
/* Readiness of the guest to accept data on a port */
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index de433b2e38..52c2f8afa5 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = {
static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
{
NICPeers *peers_ptr = (NICPeers *)ptr;
- NICConf *conf = container_of(peers_ptr, NICConf, peers);
NetClientState **ncs = peers_ptr->ncs;
NetClientState *peers[MAX_QUEUE_NUM];
int queues, i = 0;
@@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
ncs[i]->queue_index = i;
}
- conf->queues = queues;
+ peers_ptr->queues = queues;
return 0;
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 76dd6f5708..0fd2a84c7b 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
{
ICSState *ics = (ICSState *)opaque;
- if (ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
set_irq_lsi(ics, srcno, val);
} else {
set_irq_msi(ics, srcno, val);
@@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server,
trace_xics_ics_write_xive(nr, srcno, server, priority);
- if (ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
write_xive_lsi(ics, srcno);
} else {
write_xive_msi(ics, srcno);
@@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics)
for (i = 0; i < ics->nr_irqs; i++) {
/* FIXME: filter by server#? */
- if (ics->islsi[i]) {
+ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
resend_lsi(ics, i);
} else {
resend_msi(ics, i);
@@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr)
trace_xics_ics_eoi(nr);
- if (ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
irq->status &= ~XICS_STATUS_SENT;
}
}
@@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev)
{
ICSState *ics = ICS(dev);
int i;
+ uint8_t flags[ics->nr_irqs];
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ flags[i] = ics->irqs[i].flags;
+ }
memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
for (i = 0; i < ics->nr_irqs; i++) {
ics->irqs[i].priority = 0xff;
ics->irqs[i].saved_priority = 0xff;
+ ics->irqs[i].flags = flags[i];
}
}
@@ -563,13 +570,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
static const VMStateDescription vmstate_ics_irq = {
.name = "ics/irq",
- .version_id = 1,
+ .version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(server, ICSIRQState),
VMSTATE_UINT8(priority, ICSIRQState),
VMSTATE_UINT8(saved_priority, ICSIRQState),
VMSTATE_UINT8(status, ICSIRQState),
+ VMSTATE_UINT8(flags, ICSIRQState),
VMSTATE_END_OF_LIST()
},
};
@@ -606,7 +614,6 @@ static void ics_realize(DeviceState *dev, Error **errp)
return;
}
ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
- ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
}
@@ -633,21 +640,166 @@ static const TypeInfo ics_info = {
/*
* Exported functions
*/
+static int xics_find_source(XICSState *icp, int irq)
+{
+ int sources = 1;
+ int src;
+
+ /* FIXME: implement multiple sources */
+ for (src = 0; src < sources; ++src) {
+ ICSState *ics = &icp->ics[src];
+ if (ics_valid_irq(ics, irq)) {
+ return src;
+ }
+ }
+
+ return -1;
+}
qemu_irq xics_get_qirq(XICSState *icp, int irq)
{
- if (!ics_valid_irq(icp->ics, irq)) {
- return NULL;
+ int src = xics_find_source(icp, irq);
+
+ if (src >= 0) {
+ ICSState *ics = &icp->ics[src];
+ return ics->qirqs[irq - ics->offset];
}
- return icp->ics->qirqs[irq - icp->ics->offset];
+ return NULL;
+}
+
+static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+{
+ assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
+
+ ics->irqs[srcno].flags |=
+ lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
}
void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
{
- assert(ics_valid_irq(icp->ics, irq));
+ int src = xics_find_source(icp, irq);
+ ICSState *ics;
- icp->ics->islsi[irq - icp->ics->offset] = lsi;
+ assert(src >= 0);
+
+ ics = &icp->ics[src];
+ ics_set_irq_type(ics, irq - ics->offset, lsi);
+}
+
+#define ICS_IRQ_FREE(ics, srcno) \
+ (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+ int first, i;
+
+ for (first = 0; first < ics->nr_irqs; first += alignnum) {
+ if (num > (ics->nr_irqs - first)) {
+ return -1;
+ }
+ for (i = first; i < first + num; ++i) {
+ if (!ICS_IRQ_FREE(ics, i)) {
+ break;
+ }
+ }
+ if (i == (first + num)) {
+ return first;
+ }
+ }
+
+ return -1;
+}
+
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+{
+ ICSState *ics = &icp->ics[src];
+ int irq;
+
+ if (irq_hint) {
+ assert(src == xics_find_source(icp, irq_hint));
+ if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+ trace_xics_alloc_failed_hint(src, irq_hint);
+ return -1;
+ }
+ irq = irq_hint;
+ } else {
+ irq = ics_find_free_block(ics, 1, 1);
+ if (irq < 0) {
+ trace_xics_alloc_failed_no_left(src);
+ return -1;
+ }
+ irq += ics->offset;
+ }
+
+ ics_set_irq_type(ics, irq - ics->offset, lsi);
+ trace_xics_alloc(src, irq);
+
+ return irq;
+}
+
+/*
+ * Allocate block of consequtive IRQs, returns a number of the first.
+ * If align==true, aligns the first IRQ number to num.
+ */
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+{
+ int i, first = -1;
+ ICSState *ics = &icp->ics[src];
+
+ assert(src == 0);
+ /*
+ * MSIMesage::data is used for storing VIRQ so
+ * it has to be aligned to num to support multiple
+ * MSI vectors. MSI-X is not affected by this.
+ * The hint is used for the first IRQ, the rest should
+ * be allocated continuously.
+ */
+ if (align) {
+ assert((num == 1) || (num == 2) || (num == 4) ||
+ (num == 8) || (num == 16) || (num == 32));
+ first = ics_find_free_block(ics, num, num);
+ } else {
+ first = ics_find_free_block(ics, num, 1);
+ }
+
+ if (first >= 0) {
+ for (i = first; i < first + num; ++i) {
+ ics_set_irq_type(ics, i, lsi);
+ }
+ }
+ first += ics->offset;
+
+ trace_xics_alloc_block(src, first, num, lsi, align);
+
+ return first;
+}
+
+static void ics_free(ICSState *ics, int srcno, int num)
+{
+ int i;
+
+ for (i = srcno; i < srcno + num; ++i) {
+ if (ICS_IRQ_FREE(ics, i)) {
+ trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+ }
+ memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+ }
+}
+
+void xics_free(XICSState *icp, int irq, int num)
+{
+ int src = xics_find_source(icp, irq);
+
+ if (src >= 0) {
+ ICSState *ics = &icp->ics[src];
+
+ /* FIXME: implement multiple sources */
+ assert(src == 0);
+
+ trace_xics_ics_free(ics - icp->ics, irq, num);
+ ics_free(ics, irq - ics->offset, num);
+ }
}
/*
@@ -866,10 +1018,10 @@ static void xics_realize(DeviceState *dev, Error **errp)
}
/* Registration of global state belongs into realize */
- spapr_rtas_register("ibm,set-xive", rtas_set_xive);
- spapr_rtas_register("ibm,get-xive", rtas_get_xive);
- spapr_rtas_register("ibm,int-off", rtas_int_off);
- spapr_rtas_register("ibm,int-on", rtas_int_on);
+ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
+ spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
+ spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
+ spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
spapr_register_hypercall(H_CPPR, h_cppr);
spapr_register_hypercall(H_IPI, h_ipi);
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 09476ae34d..20b19e9d4f 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -38,10 +38,6 @@
typedef struct KVMXICSState {
XICSState parent_obj;
- uint32_t set_xive_token;
- uint32_t get_xive_token;
- uint32_t int_off_token;
- uint32_t int_on_token;
int kernel_xics_fd;
} KVMXICSState;
@@ -224,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
state |= KVM_XICS_MASKED;
}
- if (ics->islsi[i]) {
+ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
state |= KVM_XICS_LEVEL_SENSITIVE;
if (irq->status & XICS_STATUS_ASSERTED) {
state |= KVM_XICS_PENDING;
@@ -253,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
int rc;
args.irq = srcno + ics->offset;
- if (!ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
if (!val) {
return;
}
@@ -271,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev)
{
ICSState *ics = ICS(dev);
int i;
+ uint8_t flags[ics->nr_irqs];
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ flags[i] = ics->irqs[i].flags;
+ }
memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
for (i = 0; i < ics->nr_irqs; i++) {
ics->irqs[i].priority = 0xff;
ics->irqs[i].saved_priority = 0xff;
+ ics->irqs[i].flags = flags[i];
}
ics_set_kvm_state(ics, 1);
@@ -290,7 +293,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
return;
}
ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
- ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
}
@@ -392,32 +394,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
goto fail;
}
- icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
- icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
- icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
- icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy);
- rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token,
- "ibm,set-xive");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive");
goto fail;
}
- rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token,
- "ibm,get-xive");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive");
goto fail;
}
- rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on");
goto fail;
}
- rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off");
goto fail;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 7437c2e3c3..7b279c4f05 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -39,6 +39,7 @@
#include "qemu/range.h"
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
+#include "hw/misc/vfio.h"
/* #define DEBUG_VFIO */
#ifdef DEBUG_VFIO
@@ -3649,6 +3650,39 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
container->iommu_data.type1.initialized = true;
+ } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
+ ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+ if (ret) {
+ error_report("vfio: failed to set group container: %m");
+ ret = -errno;
+ goto free_container_exit;
+ }
+
+ ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
+ if (ret) {
+ error_report("vfio: failed to set iommu for container: %m");
+ ret = -errno;
+ goto free_container_exit;
+ }
+
+ /*
+ * The host kernel code implementing VFIO_IOMMU_DISABLE is called
+ * when container fd is closed so we do not call it explicitly
+ * in this file.
+ */
+ ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+ if (ret) {
+ error_report("vfio: failed to enable container: %m");
+ ret = -errno;
+ goto free_container_exit;
+ }
+
+ container->iommu_data.type1.listener = vfio_memory_listener;
+ container->iommu_data.release = vfio_listener_release;
+
+ memory_listener_register(&container->iommu_data.type1.listener,
+ container->space->as);
+
} else {
error_report("vfio: No available IOMMU models");
ret = -EINVAL;
@@ -4318,3 +4352,47 @@ static void register_vfio_pci_dev_type(void)
}
type_init(register_vfio_pci_dev_type)
+
+static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
+ int req, void *param)
+{
+ VFIOGroup *group;
+ VFIOContainer *container;
+ int ret = -1;
+
+ group = vfio_get_group(groupid, as);
+ if (!group) {
+ error_report("vfio: group %d not registered", groupid);
+ return ret;
+ }
+
+ container = group->container;
+ if (group->container) {
+ ret = ioctl(container->fd, req, param);
+ if (ret < 0) {
+ error_report("vfio: failed to ioctl container: ret=%d, %s",
+ ret, strerror(errno));
+ }
+ }
+
+ vfio_put_group(group);
+
+ return ret;
+}
+
+int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+ int req, void *param)
+{
+ /* We allow only certain ioctls to the container */
+ switch (req) {
+ case VFIO_CHECK_EXTENSION:
+ case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
+ break;
+ default:
+ /* Return an error on unknown requests */
+ error_report("vfio: unsupported ioctl %X", req);
+ return -1;
+ }
+
+ return vfio_container_do_ioctl(as, groupid, req, param);
+}
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index aaa3ff2360..3263e3fe90 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1217,7 +1217,6 @@ static void eepro100_write_mdi(EEPRO100State *s)
break;
case 1: /* Status Register */
missing("not writable");
- data = s->mdimem[reg];
break;
case 2: /* PHY Identification Register (Word 1) */
case 3: /* PHY Identification Register (Word 2) */
@@ -1230,7 +1229,8 @@ static void eepro100_write_mdi(EEPRO100State *s)
default:
missing("not implemented");
}
- s->mdimem[reg] = data;
+ s->mdimem[reg] &= eepro100_mdi_mask[reg];
+ s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg];
} else if (opcode == 2) {
/* MDI read */
switch (reg) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 00b5e07ddd..e51d753cee 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1542,7 +1542,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
- n->max_queues = MAX(n->nic_conf.queues, 1);
+ n->max_queues = MAX(n->nic_conf.peers.queues, 1);
n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
n->curr_queues = 1;
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index af49c4667d..6a72ef4814 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -153,8 +153,8 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev)
return -1;
}
- spapr_rtas_register("nvram-fetch", rtas_nvram_fetch);
- spapr_rtas_register("nvram-store", rtas_nvram_store);
+ spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch);
+ spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store);
return 0;
}
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index e72fe2a70b..21f805f314 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -230,7 +230,7 @@ PCIBus *pci_pmac_init(qemu_irq *pic,
d = UNI_NORTH_PCI_HOST_BRIDGE(dev);
memory_region_init(&d->pci_mmio, OBJECT(d), "pci-mmio", 0x100000000ULL);
memory_region_init_alias(&d->pci_hole, OBJECT(d), "pci-hole", &d->pci_mmio,
- 0x80000000ULL, 0x70000000ULL);
+ 0x80000000ULL, 0x10000000ULL);
memory_region_add_subregion(address_space_mem, 0x80000000ULL,
&d->pci_hole);
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index ea747f0a20..edd44d03e7 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o
obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
obj-$(CONFIG_PSERIES) += spapr_pci.o
+ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
+obj-y += spapr_pci_vfio.o
+endif
# PowerPC 4xx boards
obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index a973c18d88..bb2e75fe1b 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -316,10 +316,15 @@ static int ppce500_load_device_tree(MachineState *machine,
* device it finds in the dt as serial output device. And we generate
* devices in reverse order to the dt.
*/
- dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
- soc, mpic, "serial1", 1, false);
- dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
- soc, mpic, "serial0", 0, true);
+ if (serial_hds[1]) {
+ dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
+ soc, mpic, "serial1", 1, false);
+ }
+
+ if (serial_hds[0]) {
+ dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
+ soc, mpic, "serial0", 0, true);
+ }
snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc,
MPC8544_UTIL_OFFSET);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index e493dc1b4b..89d3cadf19 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -373,18 +373,11 @@ static void ppc_core99_init(MachineState *machine)
machine_arch = ARCH_MAC99;
}
/* init basic PC hardware */
- pci_vga_init(pci_bus);
-
escc_mem = escc_init(0, pic[0x25], pic[0x24],
serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
memory_region_init_alias(escc_bar, NULL, "escc-bar",
escc_mem, 0, memory_region_size(escc_mem));
- for(i = 0; i < nb_nics; i++)
- pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
-
- ide_drive_get(hd, MAX_IDE_BUS);
-
macio = pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO);
dev = DEVICE(macio);
qdev_connect_gpio_out(dev, 0, pic[0x19]); /* CUDA */
@@ -395,6 +388,8 @@ static void ppc_core99_init(MachineState *machine)
macio_init(macio, pic_mem, escc_bar);
/* We only emulate 2 out of 3 IDE controllers for now */
+ ide_drive_get(hd, MAX_IDE_BUS);
+
macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
"ide[0]"));
macio_ide_init_drives(macio_ide, hd);
@@ -420,8 +415,15 @@ static void ppc_core99_init(MachineState *machine)
}
}
- if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+ pci_vga_init(pci_bus);
+
+ if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) {
graphic_depth = 15;
+ }
+
+ for (i = 0; i < nb_nics; i++) {
+ pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
+ }
/* The NewWorld NVRAM is not located in the MacIO device */
dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 82f183f173..a8ba916970 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -85,16 +85,16 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
+typedef struct sPAPRMachineState sPAPRMachineState;
-typedef struct SPAPRMachine SPAPRMachine;
#define TYPE_SPAPR_MACHINE "spapr-machine"
#define SPAPR_MACHINE(obj) \
- OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE)
+ OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
/**
- * SPAPRMachine:
+ * sPAPRMachineState:
*/
-struct SPAPRMachine {
+struct sPAPRMachineState {
/*< private >*/
MachineState parent_obj;
@@ -102,76 +102,8 @@ struct SPAPRMachine {
char *kvm_type;
};
-
sPAPREnvironment *spapr;
-int spapr_allocate_irq(int hint, bool lsi)
-{
- int irq;
-
- if (hint) {
- irq = hint;
- if (hint >= spapr->next_irq) {
- spapr->next_irq = hint + 1;
- }
- /* FIXME: we should probably check for collisions somehow */
- } else {
- irq = spapr->next_irq++;
- }
-
- /* Configure irq type */
- if (!xics_get_qirq(spapr->icp, irq)) {
- return 0;
- }
-
- xics_set_irq_type(spapr->icp, irq, lsi);
-
- return irq;
-}
-
-/*
- * Allocate block of consequtive IRQs, returns a number of the first.
- * If msi==true, aligns the first IRQ number to num.
- */
-int spapr_allocate_irq_block(int num, bool lsi, bool msi)
-{
- int first = -1;
- int i, hint = 0;
-
- /*
- * MSIMesage::data is used for storing VIRQ so
- * it has to be aligned to num to support multiple
- * MSI vectors. MSI-X is not affected by this.
- * The hint is used for the first IRQ, the rest should
- * be allocated continuously.
- */
- if (msi) {
- assert((num == 1) || (num == 2) || (num == 4) ||
- (num == 8) || (num == 16) || (num == 32));
- hint = (spapr->next_irq + num - 1) & ~(num - 1);
- }
-
- for (i = 0; i < num; ++i) {
- int irq;
-
- irq = spapr_allocate_irq(hint, lsi);
- if (!irq) {
- return -1;
- }
-
- if (0 == i) {
- first = irq;
- hint = 0;
- }
-
- /* If the above doesn't create a consecutive block then that's
- * an internal bug */
- assert(irq == (first + i));
- }
-
- return first;
-}
-
static XICSState *try_create_xics(const char *type, int nr_servers,
int nr_irqs)
{
@@ -442,6 +374,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
if (boot_device) {
_FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
}
+ if (boot_menu) {
+ _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
+ }
_FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
_FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
_FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
@@ -956,7 +891,7 @@ static const VMStateDescription vmstate_spapr = {
.version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_UINT32(next_irq, sPAPREnvironment),
+ VMSTATE_UNUSED(4), /* used to be @next_irq */
/* RTC offset */
VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
@@ -1360,7 +1295,6 @@ static void ppc_spapr_init(MachineState *machine)
/* Set up Interrupt Controller before we create the VCPUs */
spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
XICS_IRQS);
- spapr->next_irq = XICS_IRQ_BASE;
/* init CPUs */
if (cpu_model == NULL) {
@@ -1619,14 +1553,14 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
static char *spapr_get_kvm_type(Object *obj, Error **errp)
{
- SPAPRMachine *sm = SPAPR_MACHINE(obj);
+ sPAPRMachineState *sm = SPAPR_MACHINE(obj);
return g_strdup(sm->kvm_type);
}
static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
{
- SPAPRMachine *sm = SPAPR_MACHINE(obj);
+ sPAPRMachineState *sm = SPAPR_MACHINE(obj);
g_free(sm->kvm_type);
sm->kvm_type = g_strdup(value);
@@ -1660,7 +1594,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
static const TypeInfo spapr_machine_info = {
.name = TYPE_SPAPR_MACHINE,
.parent = TYPE_MACHINE,
- .instance_size = sizeof(SPAPRMachine),
+ .instance_size = sizeof(sPAPRMachineState),
.instance_init = spapr_machine_initfn,
.class_init = spapr_machine_class_init,
.interfaces = (InterfaceInfo[]) {
@@ -1669,9 +1603,25 @@ static const TypeInfo spapr_machine_info = {
},
};
+static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->name = "pseries-2.1";
+ mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1";
+ mc->is_default = 0;
+}
+
+static const TypeInfo spapr_machine_2_1_info = {
+ .name = TYPE_SPAPR_MACHINE "2.1",
+ .parent = TYPE_SPAPR_MACHINE,
+ .class_init = spapr_machine_2_1_class_init,
+};
+
static void spapr_machine_register_types(void)
{
type_register_static(&spapr_machine_info);
+ type_register_static(&spapr_machine_2_1_info);
}
type_init(spapr_machine_register_types)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 16fa49e886..1b6157dec4 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -314,8 +314,9 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
void spapr_events_init(sPAPREnvironment *spapr)
{
- spapr->epow_irq = spapr_allocate_msi(0);
+ spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false);
spapr->epow_notifier.notify = spapr_powerdown_req;
qemu_register_powerdown_notifier(&spapr->epow_notifier);
- spapr_rtas_register("check-exception", check_exception);
+ spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
+ check_exception);
}
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 9e49ec4a5c..698ae60953 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -118,7 +118,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
tcet->nb_table <<
tcet->page_shift,
- &tcet->fd);
+ &tcet->fd,
+ tcet->vfio_accel);
}
if (!tcet->table) {
@@ -142,7 +143,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
uint64_t bus_offset,
uint32_t page_shift,
- uint32_t nb_table)
+ uint32_t nb_table,
+ bool vfio_accel)
{
sPAPRTCETable *tcet;
@@ -161,6 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
tcet->bus_offset = bus_offset;
tcet->page_shift = page_shift;
tcet->nb_table = nb_table;
+ tcet->vfio_accel = vfio_accel;
object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 988f8cb858..9ed39a93b7 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
/*
- * Find an entry with config_addr or returns the empty one if not found AND
- * alloc_new is set.
- * At the moment the msi_table entries are never released so there is
- * no point to look till the end of the list if we need to find the free entry.
- */
-static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
- bool alloc_new)
-{
- int i;
-
- for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
- if (!phb->msi_table[i].nvec) {
- break;
- }
- if (phb->msi_table[i].config_addr == config_addr) {
- return i;
- }
- }
- if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
- trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
- return i;
- }
-
- return -1;
-}
-
-/*
* Set MSI/MSIX message data.
* This is required for msi_notify()/msix_notify() which
* will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
*/
static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
unsigned first_irq, unsigned req_num)
@@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
return;
}
- for (i = 0; i < req_num; ++i, ++msg.data) {
+ for (i = 0; i < req_num; ++i) {
msix_set_message(pdev, i, msg);
trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+ if (addr) {
+ ++msg.data;
+ }
}
}
@@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
unsigned int seq_num = rtas_ld(args, 5);
unsigned int ret_intr_type;
- int ndev, irq, max_irqs = 0;
+ unsigned int irq, max_irqs = 0, num = 0;
sPAPRPHBState *phb = NULL;
PCIDevice *pdev = NULL;
+ bool msix = false;
+ spapr_pci_msi *msi;
+ int *config_addr_key;
switch (func) {
case RTAS_CHANGE_MSI_FN:
@@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
/* Releasing MSIs */
if (!req_num) {
- ndev = spapr_msicfg_find(phb, config_addr, false);
- if (ndev < 0) {
- trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+ msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+ if (!msi) {
+ trace_spapr_pci_msi("Releasing wrong config", config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
}
- trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
+
+ xics_free(spapr->icp, msi->first_irq, msi->num);
+ spapr_msi_setmsg(pdev, 0, msix, 0, num);
+ g_hash_table_remove(phb->msi, &config_addr);
+
+ trace_spapr_pci_msi("Released MSIs", config_addr);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, 0);
return;
@@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
/* Enabling MSI */
- /* Find a device number in the map to add or reuse the existing one */
- ndev = spapr_msicfg_find(phb, config_addr, true);
- if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
- error_report("No free entry for a new MSI device");
- rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
- return;
- }
- trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
-
/* Check if the device supports as many IRQs as requested */
if (ret_intr_type == RTAS_TYPE_MSI) {
max_irqs = msi_nr_vectors_allocated(pdev);
@@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
max_irqs = pdev->msix_entries_nr;
}
if (!max_irqs) {
- error_report("Requested interrupt type %d is not enabled for device#%d",
- ret_intr_type, ndev);
+ error_report("Requested interrupt type %d is not enabled for device %x",
+ ret_intr_type, config_addr);
rtas_st(rets, 0, -1); /* Hardware error */
return;
}
/* Correct the number if the guest asked for too many */
if (req_num > max_irqs) {
+ trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
req_num = max_irqs;
+ irq = 0; /* to avoid misleading trace */
+ goto out;
}
- /* Check if there is an old config and MSI number has not changed */
- if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
- /* Unexpected behaviour */
- error_report("Cannot reuse MSI config for device#%d", ndev);
+ /* Allocate MSIs */
+ irq = xics_alloc_block(spapr->icp, 0, req_num, false,
+ ret_intr_type == RTAS_TYPE_MSI);
+ if (!irq) {
+ error_report("Cannot allocate MSIs for device %x", config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
}
- /* There is no cached config, allocate MSIs */
- if (!phb->msi_table[ndev].nvec) {
- irq = spapr_allocate_irq_block(req_num, false,
- ret_intr_type == RTAS_TYPE_MSI);
- if (irq < 0) {
- error_report("Cannot allocate MSIs for device#%d", ndev);
- rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
- return;
- }
- phb->msi_table[ndev].irq = irq;
- phb->msi_table[ndev].nvec = req_num;
- phb->msi_table[ndev].config_addr = config_addr;
- }
-
/* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
- phb->msi_table[ndev].irq, req_num);
+ irq, req_num);
+ /* Add MSI device to cache */
+ msi = g_new(spapr_pci_msi, 1);
+ msi->first_irq = irq;
+ msi->num = req_num;
+ config_addr_key = g_new(int, 1);
+ *config_addr_key = config_addr;
+ g_hash_table_insert(phb->msi, config_addr_key, msi);
+
+out:
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, req_num);
rtas_st(rets, 2, ++seq_num);
rtas_st(rets, 3, ret_intr_type);
- trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
+ trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
}
static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
@@ -395,25 +372,28 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
uint32_t config_addr = rtas_ld(args, 0);
uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
- int ndev;
sPAPRPHBState *phb = NULL;
+ PCIDevice *pdev = NULL;
+ spapr_pci_msi *msi;
- /* Fins sPAPRPHBState */
+ /* Find sPAPRPHBState */
phb = find_phb(spapr, buid);
- if (!phb) {
+ if (phb) {
+ pdev = find_dev(spapr, buid, config_addr);
+ }
+ if (!phb || !pdev) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
/* Find device descriptor and start IRQ */
- ndev = spapr_msicfg_find(phb, config_addr, false);
- if (ndev < 0) {
- trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+ msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+ if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
+ trace_spapr_pci_msi("Failed to return vector", config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
}
-
- intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
+ intr_src_num = msi->first_irq + ioa_intr_num;
trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
intr_src_num);
@@ -634,7 +614,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
for (i = 0; i < PCI_NUM_PINS; i++) {
uint32_t irq;
- irq = spapr_allocate_lsi(0);
+ irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
if (!irq) {
error_setg(errp, "spapr_allocate_lsi failed");
return;
@@ -649,6 +629,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
}
info->finish_realize(sphb, errp);
+
+ sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
}
static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
@@ -658,7 +640,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
0,
SPAPR_TCE_PAGE_SHIFT,
- 0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+ 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
if (!tcet) {
error_setg(errp, "Unable to create TCE table for %s",
sphb->dtbusname);
@@ -712,22 +694,69 @@ static const VMStateDescription vmstate_spapr_pci_lsi = {
};
static const VMStateDescription vmstate_spapr_pci_msi = {
- .name = "spapr_pci/lsi",
+ .name = "spapr_pci/msi",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
- VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
- VMSTATE_UINT32(irq, struct spapr_pci_msi),
- VMSTATE_UINT32(nvec, struct spapr_pci_msi),
-
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(key, spapr_pci_msi_mig),
+ VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
+ VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
VMSTATE_END_OF_LIST()
},
};
+static void spapr_pci_pre_save(void *opaque)
+{
+ sPAPRPHBState *sphb = opaque;
+ GHashTableIter iter;
+ gpointer key, value;
+ int i;
+
+ if (sphb->msi_devs) {
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ }
+ sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+ if (!sphb->msi_devs_num) {
+ return;
+ }
+ sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
+
+ g_hash_table_iter_init(&iter, sphb->msi);
+ for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+ sphb->msi_devs[i].key = *(uint32_t *) key;
+ sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
+ }
+}
+
+static int spapr_pci_post_load(void *opaque, int version_id)
+{
+ sPAPRPHBState *sphb = opaque;
+ gpointer key, value;
+ int i;
+
+ for (i = 0; i < sphb->msi_devs_num; ++i) {
+ key = g_memdup(&sphb->msi_devs[i].key,
+ sizeof(sphb->msi_devs[i].key));
+ value = g_memdup(&sphb->msi_devs[i].value,
+ sizeof(sphb->msi_devs[i].value));
+ g_hash_table_insert(sphb->msi, key, value);
+ }
+ if (sphb->msi_devs) {
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ }
+ sphb->msi_devs_num = 0;
+
+ return 0;
+}
+
static const VMStateDescription vmstate_spapr_pci = {
.name = "spapr_pci",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .pre_save = spapr_pci_pre_save,
+ .post_load = spapr_pci_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
@@ -737,9 +766,9 @@ static const VMStateDescription vmstate_spapr_pci = {
VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
- VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
- vmstate_spapr_pci_msi, struct spapr_pci_msi),
-
+ VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
+ VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0,
+ vmstate_spapr_pci_msi, spapr_pci_msi_mig),
VMSTATE_END_OF_LIST()
},
};
@@ -909,14 +938,20 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
void spapr_pci_rtas_init(void)
{
- spapr_rtas_register("read-pci-config", rtas_read_pci_config);
- spapr_rtas_register("write-pci-config", rtas_write_pci_config);
- spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
- spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+ spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
+ rtas_read_pci_config);
+ spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
+ rtas_write_pci_config);
+ spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
+ rtas_ibm_read_pci_config);
+ spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
+ rtas_ibm_write_pci_config);
if (msi_supported) {
- spapr_rtas_register("ibm,query-interrupt-source-number",
+ spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+ "ibm,query-interrupt-source-number",
rtas_ibm_query_interrupt_source_number);
- spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
+ spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
+ rtas_ibm_change_msi);
}
}
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 0000000000..d3bddf2887
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,102 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "linux/vfio.h"
+#include "hw/misc/vfio.h"
+
+static Property spapr_phb_vfio_properties[] = {
+ DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+ sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
+ struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
+ int ret;
+ sPAPRTCETable *tcet;
+ uint32_t liobn = svphb->phb.dma_liobn;
+
+ if (svphb->iommugroupid == -1) {
+ error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
+ return;
+ }
+
+ ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+ VFIO_CHECK_EXTENSION,
+ (void *) VFIO_SPAPR_TCE_IOMMU);
+ if (ret != 1) {
+ error_setg_errno(errp, -ret,
+ "spapr-vfio: SPAPR extension is not supported");
+ return;
+ }
+
+ ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+ VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "spapr-vfio: get info from container failed");
+ return;
+ }
+
+ tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
+ SPAPR_TCE_PAGE_SHIFT,
+ info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
+ true);
+ if (!tcet) {
+ error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
+ return;
+ }
+
+ /* Register default 32bit DMA window */
+ memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+ spapr_tce_get_iommu(tcet));
+}
+
+static void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+ /* Do nothing */
+}
+
+static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+
+ dc->props = spapr_phb_vfio_properties;
+ dc->reset = spapr_phb_vfio_reset;
+ spc->finish_realize = spapr_phb_vfio_finish_realize;
+}
+
+static const TypeInfo spapr_phb_vfio_info = {
+ .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
+ .parent = TYPE_SPAPR_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(sPAPRPHBVFIOState),
+ .class_init = spapr_phb_vfio_class_init,
+ .class_size = sizeof(sPAPRPHBClass),
+};
+
+static void spapr_pci_vfio_register_types(void)
+{
+ type_register_static(&spapr_phb_vfio_info);
+}
+
+type_init(spapr_pci_vfio_register_types)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 8d08539baa..9ba1ba69f9 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -36,9 +36,6 @@
#include <libfdt.h>
-#define TOKEN_BASE 0x2000
-#define TOKEN_MAX 0x100
-
static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
@@ -225,8 +222,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
env->msr = 0;
}
-#define DIAGNOSTICS_RUN_MODE 42
-
static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs,
@@ -236,16 +231,28 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
target_ulong parameter = rtas_ld(args, 0);
target_ulong buffer = rtas_ld(args, 1);
target_ulong length = rtas_ld(args, 2);
- target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
+ target_ulong ret = RTAS_OUT_SUCCESS;
switch (parameter) {
- case DIAGNOSTICS_RUN_MODE:
- if (length == 1) {
- rtas_st(buffer, 0, 0);
- ret = RTAS_OUT_SUCCESS;
- }
+ case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
+ char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d",
+ max_cpus, smp_cpus);
+ rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val));
+ g_free(param_val);
break;
}
+ case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: {
+ uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED;
+
+ rtas_st_buffer(buffer, length, &param_val, sizeof(param_val));
+ break;
+ }
+ case RTAS_SYSPARM_UUID:
+ rtas_st_buffer(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0));
+ break;
+ default:
+ ret = RTAS_OUT_NOT_SUPPORTED;
+ }
rtas_st(rets, 0, ret);
}
@@ -260,7 +267,9 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
switch (parameter) {
- case DIAGNOSTICS_RUN_MODE:
+ case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS:
+ case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE:
+ case RTAS_SYSPARM_UUID:
ret = RTAS_OUT_NOT_AUTHORIZED;
break;
}
@@ -271,17 +280,14 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
static struct rtas_call {
const char *name;
spapr_rtas_fn fn;
-} rtas_table[TOKEN_MAX];
-
-static struct rtas_call *rtas_next = rtas_table;
+} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- if ((token >= TOKEN_BASE)
- && ((token - TOKEN_BASE) < TOKEN_MAX)) {
- struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
+ if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) {
+ struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE);
if (call->fn) {
call->fn(cpu, spapr, token, nargs, args, nret, rets);
@@ -303,23 +309,22 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_PARAMETER;
}
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
{
- int i;
-
- for (i = 0; i < (rtas_next - rtas_table); i++) {
- if (strcmp(name, rtas_table[i].name) == 0) {
- fprintf(stderr, "RTAS call \"%s\" registered twice\n", name);
- exit(1);
- }
+ if (!((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX))) {
+ fprintf(stderr, "RTAS invalid token 0x%x\n", token);
+ exit(1);
}
- assert(rtas_next < (rtas_table + TOKEN_MAX));
-
- rtas_next->name = name;
- rtas_next->fn = fn;
+ token -= RTAS_TOKEN_BASE;
+ if (rtas_table[token].name) {
+ fprintf(stderr, "RTAS call \"%s\" is registered already as 0x%x\n",
+ rtas_table[token].name, token);
+ exit(1);
+ }
- return (rtas_next++ - rtas_table) + TOKEN_BASE;
+ rtas_table[token].name = name;
+ rtas_table[token].fn = fn;
}
int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -359,7 +364,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
return ret;
}
- for (i = 0; i < TOKEN_MAX; i++) {
+ for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
struct rtas_call *call = &rtas_table[i];
if (!call->name) {
@@ -367,7 +372,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
}
ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name,
- i + TOKEN_BASE);
+ i + RTAS_TOKEN_BASE);
if (ret < 0) {
fprintf(stderr, "Couldn't add rtas token for %s: %s\n",
call->name, fdt_strerror(ret));
@@ -380,18 +385,24 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
static void core_rtas_register_types(void)
{
- spapr_rtas_register("display-character", rtas_display_character);
- spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
- spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
- spapr_rtas_register("power-off", rtas_power_off);
- spapr_rtas_register("system-reboot", rtas_system_reboot);
- spapr_rtas_register("query-cpu-stopped-state",
+ spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
+ rtas_display_character);
+ spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day",
+ rtas_get_time_of_day);
+ spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day",
+ rtas_set_time_of_day);
+ spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off);
+ spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot",
+ rtas_system_reboot);
+ spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state",
rtas_query_cpu_stopped_state);
- spapr_rtas_register("start-cpu", rtas_start_cpu);
- spapr_rtas_register("stop-self", rtas_stop_self);
- spapr_rtas_register("ibm,get-system-parameter",
+ spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
+ spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+ spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
+ "ibm,get-system-parameter",
rtas_ibm_get_system_parameter);
- spapr_rtas_register("ibm,set-system-parameter",
+ spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
+ "ibm,set-system-parameter",
rtas_ibm_set_system_parameter);
}
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 04e16ae04d..dc9e46a7b1 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -449,7 +449,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
dev->qdev.id = id;
}
- dev->irq = spapr_allocate_msi(dev->irq);
+ dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false);
if (!dev->irq) {
return -1;
}
@@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
0,
SPAPR_TCE_PAGE_SHIFT,
pc->rtce_window_size >>
- SPAPR_TCE_PAGE_SHIFT);
+ SPAPR_TCE_PAGE_SHIFT, false);
address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
}
@@ -517,8 +517,9 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
/* RTAS calls */
- spapr_rtas_register("ibm,set-tce-bypass", rtas_set_tce_bypass);
- spapr_rtas_register("quiesce", rtas_quiesce);
+ spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
+ rtas_set_tce_bypass);
+ spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
return bus;
}
diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c
index 4aebd34924..9f607d42bb 100644
--- a/hw/watchdog/watchdog.c
+++ b/hw/watchdog/watchdog.c
@@ -106,7 +106,7 @@ int select_watchdog_action(const char *p)
*/
void watchdog_perform_action(void)
{
- switch(watchdog_action) {
+ switch (watchdog_action) {
case WDT_RESET: /* same as 'system_reset' in monitor */
qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort);
qemu_system_reset_request();
diff --git a/hw/xtensa/Makefile.objs b/hw/xtensa/Makefile.objs
index 6ead7820c4..cb77dc3793 100644
--- a/hw/xtensa/Makefile.objs
+++ b/hw/xtensa/Makefile.objs
@@ -1,3 +1,3 @@
obj-y += pic_cpu.o
-obj-y += xtensa_sim.o
-obj-y += xtensa_lx60.o
+obj-y += sim.o
+obj-y += xtfpga.o
diff --git a/hw/xtensa/bootparam.h b/hw/xtensa/bootparam.h
new file mode 100644
index 0000000000..955f4e86e3
--- /dev/null
+++ b/hw/xtensa/bootparam.h
@@ -0,0 +1,49 @@
+#ifndef HW_XTENSA_BOOTPARAM
+#define HW_XTENSA_BOOTPARAM
+
+#define BP_TAG_COMMAND_LINE 0x1001 /* command line (0-terminated string)*/
+#define BP_TAG_INITRD 0x1002 /* ramdisk addr and size (bp_meminfo) */
+#define BP_TAG_MEMORY 0x1003 /* memory addr and size (bp_meminfo) */
+#define BP_TAG_SERIAL_BAUDRATE 0x1004 /* baud rate of current console. */
+#define BP_TAG_SERIAL_PORT 0x1005 /* serial device of current console */
+#define BP_TAG_FDT 0x1006 /* flat device tree addr */
+
+#define BP_TAG_FIRST 0x7B0B /* first tag with a version number */
+#define BP_TAG_LAST 0x7E0B /* last tag */
+
+typedef struct BpTag {
+ uint16_t tag;
+ uint16_t size;
+} BpTag;
+
+typedef struct BpMemInfo {
+ uint32_t type;
+ uint32_t start;
+ uint32_t end;
+} BpMemInfo;
+
+#define MEMORY_TYPE_CONVENTIONAL 0x1000
+#define MEMORY_TYPE_NONE 0x2000
+
+static inline size_t get_tag_size(size_t data_size)
+{
+ return data_size + sizeof(BpTag) + 4;
+}
+
+static inline ram_addr_t put_tag(ram_addr_t addr, uint16_t tag,
+ size_t size, const void *data)
+{
+ BpTag bp_tag = {
+ .tag = tswap16(tag),
+ .size = tswap16((size + 3) & ~3),
+ };
+
+ cpu_physical_memory_write(addr, &bp_tag, sizeof(bp_tag));
+ addr += sizeof(bp_tag);
+ cpu_physical_memory_write(addr, data, size);
+ addr += (size + 3) & ~3;
+
+ return addr;
+}
+
+#endif
diff --git a/hw/xtensa/xtensa_sim.c b/hw/xtensa/sim.c
index 89da43c160..9642bf54c7 100644
--- a/hw/xtensa/xtensa_sim.c
+++ b/hw/xtensa/sim.c
@@ -31,6 +31,7 @@
#include "elf.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
static uint64_t translate_phys_addr(void *opaque, uint64_t addr)
{
@@ -63,8 +64,9 @@ static void xtensa_sim_init(MachineState *machine)
for (n = 0; n < smp_cpus; n++) {
cpu = cpu_xtensa_init(cpu_model);
if (cpu == NULL) {
- fprintf(stderr, "Unable to find CPU definition\n");
- exit(1);
+ error_report("unable to find CPU definition '%s'\n",
+ cpu_model);
+ exit(EXIT_FAILURE);
}
env = &cpu->env;
diff --git a/hw/xtensa/xtensa_bootparam.h b/hw/xtensa/xtensa_bootparam.h
deleted file mode 100644
index 38ef32bdb6..0000000000
--- a/hw/xtensa/xtensa_bootparam.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef HW_XTENSA_BOOTPARAM
-#define HW_XTENSA_BOOTPARAM
-
-typedef struct BpTag {
- uint16_t tag;
- uint16_t size;
-} BpTag;
-
-static inline ram_addr_t put_tag(ram_addr_t addr, uint16_t tag,
- size_t size, const void *data)
-{
- BpTag bp_tag = {
- .tag = tswap16(tag),
- .size = tswap16((size + 3) & ~3),
- };
-
- cpu_physical_memory_write(addr, &bp_tag, sizeof(bp_tag));
- addr += sizeof(bp_tag);
- cpu_physical_memory_write(addr, data, size);
- addr += (size + 3) & ~3;
-
- return addr;
-}
-
-#endif
diff --git a/hw/xtensa/xtensa_lx60.c b/hw/xtensa/xtfpga.c
index 507dd88452..a2dff5a13e 100644
--- a/hw/xtensa/xtensa_lx60.c
+++ b/hw/xtensa/xtfpga.c
@@ -37,11 +37,14 @@
#include "hw/block/flash.h"
#include "sysemu/blockdev.h"
#include "sysemu/char.h"
-#include "xtensa_bootparam.h"
+#include "sysemu/device_tree.h"
+#include "qemu/error-report.h"
+#include "bootparam.h"
typedef struct LxBoardDesc {
hwaddr flash_base;
size_t flash_size;
+ size_t flash_boot_base;
size_t flash_sector_size;
size_t sram_size;
} LxBoardDesc;
@@ -172,9 +175,12 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
MemoryRegion *ram, *rom, *system_io;
DriveInfo *dinfo;
pflash_t *flash = NULL;
+ QemuOpts *machine_opts = qemu_get_machine_opts();
const char *cpu_model = machine->cpu_model;
- const char *kernel_filename = machine->kernel_filename;
- const char *kernel_cmdline = machine->kernel_cmdline;
+ const char *kernel_filename = qemu_opt_get(machine_opts, "kernel");
+ const char *kernel_cmdline = qemu_opt_get(machine_opts, "append");
+ const char *dtb_filename = qemu_opt_get(machine_opts, "dtb");
+ const char *initrd_filename = qemu_opt_get(machine_opts, "initrd");
int n;
if (!cpu_model) {
@@ -184,8 +190,9 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
for (n = 0; n < smp_cpus; n++) {
cpu = cpu_xtensa_init(cpu_model);
if (cpu == NULL) {
- fprintf(stderr, "Unable to find CPU definition\n");
- exit(1);
+ error_report("unable to find CPU definition '%s'\n",
+ cpu_model);
+ exit(EXIT_FAILURE);
}
env = &cpu->env;
@@ -226,39 +233,117 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
board->flash_size / board->flash_sector_size,
4, 0x0000, 0x0000, 0x0000, 0x0000, be);
if (flash == NULL) {
- fprintf(stderr, "Unable to mount pflash\n");
- exit(1);
+ error_report("unable to mount pflash\n");
+ exit(EXIT_FAILURE);
}
}
/* Use presence of kernel file name as 'boot from SRAM' switch. */
if (kernel_filename) {
+ uint32_t entry_point = env->pc;
+ size_t bp_size = 3 * get_tag_size(0); /* first/last and memory tags */
+ uint32_t tagptr = 0xfe000000 + board->sram_size;
+ uint32_t cur_tagptr;
+ BpMemInfo memory_location = {
+ .type = tswap32(MEMORY_TYPE_CONVENTIONAL),
+ .start = tswap32(0),
+ .end = tswap32(machine->ram_size),
+ };
+ uint32_t lowmem_end = machine->ram_size < 0x08000000 ?
+ machine->ram_size : 0x08000000;
+ uint32_t cur_lowmem = QEMU_ALIGN_UP(lowmem_end / 2, 4096);
+
rom = g_malloc(sizeof(*rom));
memory_region_init_ram(rom, NULL, "lx60.sram", board->sram_size);
vmstate_register_ram_global(rom);
memory_region_add_subregion(system_memory, 0xfe000000, rom);
- /* Put kernel bootparameters to the end of that SRAM */
if (kernel_cmdline) {
- size_t cmdline_size = strlen(kernel_cmdline) + 1;
- size_t bp_size = sizeof(BpTag[4]) + cmdline_size;
- uint32_t tagptr = (0xfe000000 + board->sram_size - bp_size) & ~0xff;
+ bp_size += get_tag_size(strlen(kernel_cmdline) + 1);
+ }
+ if (dtb_filename) {
+ bp_size += get_tag_size(sizeof(uint32_t));
+ }
+ if (initrd_filename) {
+ bp_size += get_tag_size(sizeof(BpMemInfo));
+ }
- env->regs[2] = tagptr;
+ /* Put kernel bootparameters to the end of that SRAM */
+ tagptr = (tagptr - bp_size) & ~0xff;
+ cur_tagptr = put_tag(tagptr, BP_TAG_FIRST, 0, NULL);
+ cur_tagptr = put_tag(cur_tagptr, BP_TAG_MEMORY,
+ sizeof(memory_location), &memory_location);
+
+ if (kernel_cmdline) {
+ cur_tagptr = put_tag(cur_tagptr, BP_TAG_COMMAND_LINE,
+ strlen(kernel_cmdline) + 1, kernel_cmdline);
+ }
+ if (dtb_filename) {
+ int fdt_size;
+ void *fdt = load_device_tree(dtb_filename, &fdt_size);
+ uint32_t dtb_addr = tswap32(cur_lowmem);
+
+ if (!fdt) {
+ error_report("could not load DTB '%s'\n", dtb_filename);
+ exit(EXIT_FAILURE);
+ }
- tagptr = put_tag(tagptr, 0x7b0b, 0, NULL);
- if (cmdline_size > 1) {
- tagptr = put_tag(tagptr, 0x1001,
- cmdline_size, kernel_cmdline);
+ cpu_physical_memory_write(cur_lowmem, fdt, fdt_size);
+ cur_tagptr = put_tag(cur_tagptr, BP_TAG_FDT,
+ sizeof(dtb_addr), &dtb_addr);
+ cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + fdt_size, 4096);
+ }
+ if (initrd_filename) {
+ BpMemInfo initrd_location = { 0 };
+ int initrd_size = load_ramdisk(initrd_filename, cur_lowmem,
+ lowmem_end - cur_lowmem);
+
+ if (initrd_size < 0) {
+ initrd_size = load_image_targphys(initrd_filename,
+ cur_lowmem,
+ lowmem_end - cur_lowmem);
+ }
+ if (initrd_size < 0) {
+ error_report("could not load initrd '%s'\n", initrd_filename);
+ exit(EXIT_FAILURE);
}
- tagptr = put_tag(tagptr, 0x7e0b, 0, NULL);
+ initrd_location.start = tswap32(cur_lowmem);
+ initrd_location.end = tswap32(cur_lowmem + initrd_size);
+ cur_tagptr = put_tag(cur_tagptr, BP_TAG_INITRD,
+ sizeof(initrd_location), &initrd_location);
+ cur_lowmem = QEMU_ALIGN_UP(cur_lowmem + initrd_size, 4096);
}
+ cur_tagptr = put_tag(cur_tagptr, BP_TAG_LAST, 0, NULL);
+ env->regs[2] = tagptr;
+
uint64_t elf_entry;
uint64_t elf_lowaddr;
int success = load_elf(kernel_filename, translate_phys_addr, cpu,
&elf_entry, &elf_lowaddr, NULL, be, ELF_MACHINE, 0);
if (success > 0) {
- env->pc = elf_entry;
+ entry_point = elf_entry;
+ } else {
+ hwaddr ep;
+ int is_linux;
+ success = load_uimage(kernel_filename, &ep, NULL, &is_linux);
+ if (success > 0 && is_linux) {
+ entry_point = ep;
+ } else {
+ error_report("could not load kernel '%s'\n",
+ kernel_filename);
+ exit(EXIT_FAILURE);
+ }
+ }
+ if (entry_point != env->pc) {
+ static const uint8_t jx_a0[] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ 0x0a, 0, 0,
+#else
+ 0xa0, 0, 0,
+#endif
+ };
+ env->regs[0] = entry_point;
+ cpu_physical_memory_write(env->pc, jx_a0, sizeof(jx_a0));
}
} else {
if (flash) {
@@ -266,9 +351,9 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
MemoryRegion *flash_io = g_malloc(sizeof(*flash_io));
memory_region_init_alias(flash_io, NULL, "lx60.flash",
- flash_mr, 0,
- board->flash_size < 0x02000000 ?
- board->flash_size : 0x02000000);
+ flash_mr, board->flash_boot_base,
+ board->flash_size - board->flash_boot_base < 0x02000000 ?
+ board->flash_size - board->flash_boot_base : 0x02000000);
memory_region_add_subregion(system_memory, 0xfe000000,
flash_io);
}
@@ -313,6 +398,7 @@ static void xtensa_kc705_init(MachineState *machine)
static const LxBoardDesc kc705_board = {
.flash_base = 0xf0000000,
.flash_size = 0x08000000,
+ .flash_boot_base = 0x06000000,
.flash_sector_size = 0x20000,
.sram_size = 0x2000000,
};
diff --git a/include/block/block.h b/include/block/block.h
index d0baf4fb83..7e92f549fb 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -175,6 +175,7 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_MIRROR,
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
+ BLOCK_OP_TYPE_REPLACE,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
@@ -213,7 +214,7 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
bool allow_none, Error **errp);
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
-void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
int bdrv_open(BlockDriverState **pbs, const char *filename,
const char *reference, QDict *options, int flags,
BlockDriver *drv, Error **errp);
@@ -314,6 +315,9 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
BlockDriverState *candidate);
bool bdrv_is_first_non_filter(BlockDriverState *candidate);
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp);
+
/* async block I/O */
typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
int sector_num);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 715c761fad..53e77cf11e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -100,6 +100,9 @@ struct BlockDriver {
*/
bool bdrv_needs_filename;
+ /* Set if a driver can support backing files */
+ bool supports_backing;
+
/* For handling image reopen for split or non-split files */
int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp);
@@ -486,6 +489,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
* mirror_start:
* @bs: Block device to operate on.
* @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ * only be used when full mirroring is selected.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @granularity: The chosen granularity for the dirty bitmap.
* @buf_size: The amount of data that can be in flight at one time.
@@ -502,6 +507,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
* @bs will be switched to read from @target.
*/
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ const char *replaces,
int64_t speed, int64_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index e443987ea8..f3cf63f9f5 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -147,6 +147,14 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
/**
+ * block_job_yield:
+ * @job: The job that calls the function.
+ *
+ * Yield the block job coroutine.
+ */
+void block_job_yield(BlockJob *job);
+
+/**
* block_job_completed:
* @job: The job being completed.
* @ret: The status code.
@@ -217,7 +225,7 @@ void block_job_pause(BlockJob *job);
void block_job_resume(BlockJob *job);
/**
- * block_job_event_cancle:
+ * block_job_event_cancelled:
* @job: The job whose information is requested.
*
* Send a BLOCK_JOB_CANCELLED event for the specified job.
diff --git a/include/block/qapi.h b/include/block/qapi.h
index e92c00daf6..03745464d6 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -39,7 +39,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
void bdrv_query_info(BlockDriverState *bs,
BlockInfo **p_info,
Error **errp);
-BlockStats *bdrv_query_stats(const BlockDriverState *bs);
void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
QEMUSnapshotInfo *sn);
diff --git a/include/hw/misc/vfio.h b/include/hw/misc/vfio.h
new file mode 100644
index 0000000000..0b26cd8e11
--- /dev/null
+++ b/include/hw/misc/vfio.h
@@ -0,0 +1,9 @@
+#ifndef VFIO_API_H
+#define VFIO_API_H
+
+#include "qemu/typedefs.h"
+
+extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+ int req, void *param);
+
+#endif
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0934518bbd..32f0aa7d10 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -27,13 +27,15 @@
#include "hw/pci/pci_host.h"
#include "hw/ppc/xics.h"
-#define SPAPR_MSIX_MAX_DEVS 32
-
#define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
#define SPAPR_PCI_HOST_BRIDGE(obj) \
OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+ OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
+
#define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
#define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
@@ -41,6 +43,7 @@
typedef struct sPAPRPHBClass sPAPRPHBClass;
typedef struct sPAPRPHBState sPAPRPHBState;
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
struct sPAPRPHBClass {
PCIHostBridgeClass parent_class;
@@ -48,6 +51,16 @@ struct sPAPRPHBClass {
void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
};
+typedef struct spapr_pci_msi {
+ uint32_t first_irq;
+ uint32_t num;
+} spapr_pci_msi;
+
+typedef struct spapr_pci_msi_mig {
+ uint32_t key;
+ spapr_pci_msi value;
+} spapr_pci_msi_mig;
+
struct sPAPRPHBState {
PCIHostState parent_obj;
@@ -67,15 +80,20 @@ struct sPAPRPHBState {
uint32_t irq;
} lsi_table[PCI_NUM_PINS];
- struct spapr_pci_msi {
- uint32_t config_addr;
- uint32_t irq;
- uint32_t nvec;
- } msi_table[SPAPR_MSIX_MAX_DEVS];
+ GHashTable *msi;
+ /* Temporary cache for migration purposes */
+ int32_t msi_devs_num;
+ spapr_pci_msi_mig *msi_devs;
QLIST_ENTRY(sPAPRPHBState) list;
};
+struct sPAPRPHBVFIOState {
+ sPAPRPHBState phb;
+
+ int32_t iommugroupid;
+};
+
#define SPAPR_PCI_BASE_BUID 0x800000020000000ULL
#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 08c301f38d..bbba51a703 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -27,7 +27,6 @@ typedef struct sPAPREnvironment {
long rtas_size;
void *fdt_skel;
target_ulong entry_point;
- uint32_t next_irq;
uint64_t rtc_offset;
struct PPCTimebase tb;
bool has_graphics;
@@ -339,16 +338,6 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
int spapr_allocate_irq(int hint, bool lsi);
int spapr_allocate_irq_block(int num, bool lsi, bool msi);
-static inline int spapr_allocate_msi(int hint)
-{
- return spapr_allocate_irq(hint, false);
-}
-
-static inline int spapr_allocate_lsi(int hint)
-{
- return spapr_allocate_irq(hint, true);
-}
-
/* RTAS return codes */
#define RTAS_OUT_SUCCESS 0
#define RTAS_OUT_NO_ERRORS_FOUND 1
@@ -358,6 +347,58 @@ static inline int spapr_allocate_lsi(int hint)
#define RTAS_OUT_NOT_SUPPORTED -3
#define RTAS_OUT_NOT_AUTHORIZED -9002
+/* RTAS tokens */
+#define RTAS_TOKEN_BASE 0x2000
+
+#define RTAS_DISPLAY_CHARACTER (RTAS_TOKEN_BASE + 0x00)
+#define RTAS_GET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x01)
+#define RTAS_SET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x02)
+#define RTAS_POWER_OFF (RTAS_TOKEN_BASE + 0x03)
+#define RTAS_SYSTEM_REBOOT (RTAS_TOKEN_BASE + 0x04)
+#define RTAS_QUERY_CPU_STOPPED_STATE (RTAS_TOKEN_BASE + 0x05)
+#define RTAS_START_CPU (RTAS_TOKEN_BASE + 0x06)
+#define RTAS_STOP_SELF (RTAS_TOKEN_BASE + 0x07)
+#define RTAS_IBM_GET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x08)
+#define RTAS_IBM_SET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x09)
+#define RTAS_IBM_SET_XIVE (RTAS_TOKEN_BASE + 0x0A)
+#define RTAS_IBM_GET_XIVE (RTAS_TOKEN_BASE + 0x0B)
+#define RTAS_IBM_INT_OFF (RTAS_TOKEN_BASE + 0x0C)
+#define RTAS_IBM_INT_ON (RTAS_TOKEN_BASE + 0x0D)
+#define RTAS_CHECK_EXCEPTION (RTAS_TOKEN_BASE + 0x0E)
+#define RTAS_EVENT_SCAN (RTAS_TOKEN_BASE + 0x0F)
+#define RTAS_IBM_SET_TCE_BYPASS (RTAS_TOKEN_BASE + 0x10)
+#define RTAS_QUIESCE (RTAS_TOKEN_BASE + 0x11)
+#define RTAS_NVRAM_FETCH (RTAS_TOKEN_BASE + 0x12)
+#define RTAS_NVRAM_STORE (RTAS_TOKEN_BASE + 0x13)
+#define RTAS_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x14)
+#define RTAS_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x15)
+#define RTAS_IBM_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x16)
+#define RTAS_IBM_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x17)
+#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER (RTAS_TOKEN_BASE + 0x18)
+#define RTAS_IBM_CHANGE_MSI (RTAS_TOKEN_BASE + 0x19)
+#define RTAS_SET_INDICATOR (RTAS_TOKEN_BASE + 0x1A)
+#define RTAS_SET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1B)
+#define RTAS_GET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1C)
+#define RTAS_GET_SENSOR_STATE (RTAS_TOKEN_BASE + 0x1D)
+#define RTAS_IBM_CONFIGURE_CONNECTOR (RTAS_TOKEN_BASE + 0x1E)
+#define RTAS_IBM_OS_TERM (RTAS_TOKEN_BASE + 0x1F)
+#define RTAS_IBM_EXTENDED_OS_TERM (RTAS_TOKEN_BASE + 0x20)
+
+#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x21)
+
+/* RTAS ibm,get-system-parameter token values */
+#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20
+#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42
+#define RTAS_SYSPARM_UUID 48
+
+/* Possible values for the platform-processor-diagnostics-run-mode parameter
+ * of the RTAS ibm,get-system-parameter call.
+ */
+#define DIAGNOSTICS_RUN_MODE_DISABLED 0
+#define DIAGNOSTICS_RUN_MODE_STAGGERED 1
+#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2
+#define DIAGNOSTICS_RUN_MODE_PERIODIC 3
+
static inline uint64_t ppc64_phys_to_real(uint64_t addr)
{
return addr & ~0xF000000000000000ULL;
@@ -373,11 +414,24 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val)
stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val);
}
+
+static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len,
+ uint8_t *buffer, uint16_t buffer_len)
+{
+ if (phys_len < 2) {
+ return;
+ }
+ stw_be_phys(&address_space_memory,
+ ppc64_phys_to_real(phys), buffer_len);
+ cpu_physical_memory_write(ppc64_phys_to_real(phys + 2),
+ buffer, MIN(buffer_len, phys_len - 2));
+}
+
typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
@@ -407,6 +461,7 @@ struct sPAPRTCETable {
uint32_t page_shift;
uint64_t *table;
bool bypass;
+ bool vfio_accel;
int fd;
MemoryRegion iommu;
QLIST_ENTRY(sPAPRTCETable) list;
@@ -418,7 +473,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
uint64_t bus_offset,
uint32_t page_shift,
- uint32_t nb_table);
+ uint32_t nb_table,
+ bool vfio_accel);
MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 85e4c8a3dd..a214dd7f28 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -136,7 +136,6 @@ struct ICSState {
uint32_t nr_irqs;
uint32_t offset;
qemu_irq *qirqs;
- bool *islsi;
ICSIRQState *irqs;
XICSState *icp;
};
@@ -150,12 +149,20 @@ struct ICSIRQState {
#define XICS_STATUS_REJECTED 0x4
#define XICS_STATUS_MASKED_PENDING 0x8
uint8_t status;
+/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */
+#define XICS_FLAGS_IRQ_LSI 0x1
+#define XICS_FLAGS_IRQ_MSI 0x2
+#define XICS_FLAGS_IRQ_MASK 0x3
+ uint8_t flags;
};
#define XICS_IRQS 1024
qemu_irq xics_get_qirq(XICSState *icp, int irq);
void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
+void xics_free(XICSState *icp, int irq, int num);
void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 4bc9b549ad..d0fb26f963 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -17,6 +17,7 @@
#include "hw/virtio/virtio.h"
#include "hw/block/block.h"
#include "sysemu/iothread.h"
+#include "block/block.h"
#define TYPE_VIRTIO_BLK "virtio-blk-device"
#define VIRTIO_BLK(obj) \
@@ -116,6 +117,7 @@ struct VirtIOBlkConf
struct VirtIOBlockDataPlane;
+struct VirtIOBlockReq;
typedef struct VirtIOBlock {
VirtIODevice parent_obj;
BlockDriverState *bs;
@@ -127,12 +129,29 @@ typedef struct VirtIOBlock {
unsigned short sector_mask;
bool original_wce;
VMChangeStateEntry *change;
+ /* Function to push to vq and notify guest */
+ void (*complete_request)(struct VirtIOBlockReq *req, unsigned char status);
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
Notifier migration_state_notifier;
struct VirtIOBlockDataPlane *dataplane;
#endif
} VirtIOBlock;
+typedef struct MultiReqBuffer {
+ BlockRequest blkreq[32];
+ unsigned int num_writes;
+} MultiReqBuffer;
+
+typedef struct VirtIOBlockReq {
+ VirtIOBlock *dev;
+ VirtQueueElement *elem;
+ struct virtio_blk_inhdr *in;
+ struct virtio_blk_outhdr out;
+ QEMUIOVector qiov;
+ struct VirtIOBlockReq *next;
+ BlockAcctCookie acct;
+} VirtIOBlockReq;
+
#define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
DEFINE_VIRTIO_COMMON_FEATURES(_state, _field)
@@ -158,4 +177,8 @@ void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk);
int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
VirtQueueElement *elem);
+void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb);
+
+void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb);
+
#endif
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 71a8a95641..9a001bd28f 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -101,6 +101,7 @@ enum VMStateFlags {
VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/
VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/
VMS_MUST_EXIST = 0x1000, /* Field must exist in input */
+ VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */
};
typedef struct {
@@ -429,6 +430,16 @@ extern const VMStateInfo vmstate_info_bitmap;
.offset = offsetof(_state, _field), \
}
+#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
diff --git a/include/net/net.h b/include/net/net.h
index 8b189da5ee..ed594f9bdb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -24,13 +24,13 @@ struct MACAddr {
typedef struct NICPeers {
NetClientState *ncs[MAX_QUEUE_NUM];
+ int32_t queues;
} NICPeers;
typedef struct NICConf {
MACAddr macaddr;
NICPeers peers;
int32_t bootindex;
- int32_t queues;
} NICConf;
#define DEFINE_NIC_PROPERTIES(_state, _conf) \
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 1248eda272..60777fecf6 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -736,6 +736,14 @@ enum {
QEMU_PPC_FEATURE_TRUE_LE = 0x00000002,
QEMU_PPC_FEATURE_PPC_LE = 0x00000001,
+
+ /* Feature definitions in AT_HWCAP2. */
+ QEMU_PPC_FEATURE2_ARCH_2_07 = 0x80000000, /* ISA 2.07 */
+ QEMU_PPC_FEATURE2_HAS_HTM = 0x40000000, /* Hardware Transactional Memory */
+ QEMU_PPC_FEATURE2_HAS_DSCR = 0x20000000, /* Data Stream Control Register */
+ QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */
+ QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */
+ QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */
};
#define ELF_HWCAP get_elf_hwcap()
@@ -749,6 +757,8 @@ static uint32_t get_elf_hwcap(void)
Altivec/FP/SPE support. Anything else is just a bonus. */
#define GET_FEATURE(flag, feature) \
do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature) \
+ do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64);
GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU);
GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC);
@@ -757,7 +767,36 @@ static uint32_t get_elf_hwcap(void)
GET_FEATURE(PPC_SPE_DOUBLE, QEMU_PPC_FEATURE_HAS_EFP_DOUBLE);
GET_FEATURE(PPC_BOOKE, QEMU_PPC_FEATURE_BOOKE);
GET_FEATURE(PPC_405_MAC, QEMU_PPC_FEATURE_HAS_4xxMAC);
+ GET_FEATURE2(PPC2_DFP, QEMU_PPC_FEATURE_HAS_DFP);
+ GET_FEATURE2(PPC2_VSX, QEMU_PPC_FEATURE_HAS_VSX);
+ GET_FEATURE2((PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 |
+ PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206),
+ QEMU_PPC_FEATURE_ARCH_2_06);
+#undef GET_FEATURE
+#undef GET_FEATURE2
+
+ return features;
+}
+
+#define ELF_HWCAP2 get_elf_hwcap2()
+
+static uint32_t get_elf_hwcap2(void)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);
+ uint32_t features = 0;
+
+#define GET_FEATURE(flag, feature) \
+ do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature) \
+ do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
+
+ GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL);
+ GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR);
+ GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+ PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07);
+
#undef GET_FEATURE
+#undef GET_FEATURE2
return features;
}
@@ -774,8 +813,9 @@ static uint32_t get_elf_hwcap(void)
#define DLINFO_ARCH_ITEMS 5
#define ARCH_DLINFO \
do { \
- NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20); \
- NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20); \
+ PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); \
+ NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \
+ NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \
NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \
/* \
* Now handle glibc compatibility. \
diff --git a/monitor.c b/monitor.c
index a8ab600c88..5718d0b60a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -570,6 +570,7 @@ monitor_qapi_event_throttle(QAPIEvent event, int64_t rate)
trace_monitor_protocol_event_throttle(event, rate);
evstate->event = event;
+ assert(rate * SCALE_MS <= INT64_MAX);
evstate->rate = rate * SCALE_MS;
evstate->last = 0;
evstate->data = NULL;
@@ -585,9 +586,9 @@ static void monitor_qapi_event_init(void)
monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000);
monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000);
monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000);
- /* limit the rate of quorum events to avoid hammering the management */
monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000);
monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000);
+ monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000);
qmp_event_set_func_emit(monitor_qapi_event_queue);
}
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 301f6b6b51..a06ba59dad 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
common-obj-y += socket.o
common-obj-y += dump.o
common-obj-y += eth.o
+common-obj-$(CONFIG_LINUX) += l2tpv3.o
common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
common-obj-$(CONFIG_LINUX) += tap-linux.o
common-obj-$(CONFIG_WIN32) += tap-win32.o
diff --git a/net/clients.h b/net/clients.h
index 7f3d4ae9f3..2e8fedad8d 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
int net_init_bridge(const NetClientOptions *opts, const char *name,
NetClientState *peer);
+int net_init_l2tpv3(const NetClientOptions *opts, const char *name,
+ NetClientState *peer);
#ifdef CONFIG_VDE
int net_init_vde(const NetClientOptions *opts, const char *name,
NetClientState *peer);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 0000000000..528d95b641
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,757 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012-2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <netdb.h>
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+/* The buffer size needs to be investigated for optimum numbers and
+ * optimum means of paging in on different systems. This size is
+ * chosen to be sufficient to accommodate one packet with some headers
+ */
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_L2TPV3_MSGCNT 64
+#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
+
+/* Header set to 0x30000 signifies a data packet */
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+typedef struct NetL2TPV3State {
+ NetClientState nc;
+ int fd;
+
+ /*
+ * these are used for xmit - that happens packet a time
+ * and for first sign of life packet (easier to parse that once)
+ */
+
+ uint8_t *header_buf;
+ struct iovec *vec;
+
+ /*
+ * these are used for receive - try to "eat" up to 32 packets at a time
+ */
+
+ struct mmsghdr *msgvec;
+
+ /*
+ * peer address
+ */
+
+ struct sockaddr_storage *dgram_dst;
+ uint32_t dst_size;
+
+ /*
+ * L2TPv3 parameters
+ */
+
+ uint64_t rx_cookie;
+ uint64_t tx_cookie;
+ uint32_t rx_session;
+ uint32_t tx_session;
+ uint32_t header_size;
+ uint32_t counter;
+
+ /*
+ * DOS avoidance in error handling
+ */
+
+ bool header_mismatch;
+
+ /*
+ * Ring buffer handling
+ */
+
+ int queue_head;
+ int queue_tail;
+ int queue_depth;
+
+ /*
+ * Precomputed offsets
+ */
+
+ uint32_t offset;
+ uint32_t cookie_offset;
+ uint32_t counter_offset;
+ uint32_t session_offset;
+
+ /* Poll Control */
+
+ bool read_poll;
+ bool write_poll;
+
+ /* Flags */
+
+ bool ipv6;
+ bool udp;
+ bool has_counter;
+ bool pin_counter;
+ bool cookie;
+ bool cookie_is_64;
+
+} NetL2TPV3State;
+
+static int l2tpv3_can_send(void *opaque);
+static void net_l2tpv3_send(void *opaque);
+static void l2tpv3_writable(void *opaque);
+
+static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
+{
+ qemu_set_fd_handler2(s->fd,
+ s->read_poll ? l2tpv3_can_send : NULL,
+ s->read_poll ? net_l2tpv3_send : NULL,
+ s->write_poll ? l2tpv3_writable : NULL,
+ s);
+}
+
+static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
+{
+ if (s->read_poll != enable) {
+ s->read_poll = enable;
+ l2tpv3_update_fd_handler(s);
+ }
+}
+
+static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
+{
+ if (s->write_poll != enable) {
+ s->write_poll = enable;
+ l2tpv3_update_fd_handler(s);
+ }
+}
+
+static void l2tpv3_writable(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+ l2tpv3_write_poll(s, false);
+ qemu_flush_queued_packets(&s->nc);
+}
+
+static int l2tpv3_can_send(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+
+ return qemu_can_send_packet(&s->nc);
+}
+
+static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ l2tpv3_read_poll(s, true);
+}
+
+static void l2tpv3_poll(NetClientState *nc, bool enable)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ l2tpv3_write_poll(s, enable);
+ l2tpv3_read_poll(s, enable);
+}
+
+static void l2tpv3_form_header(NetL2TPV3State *s)
+{
+ uint32_t *counter;
+
+ if (s->udp) {
+ stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
+ }
+ stl_be_p(
+ (uint32_t *) (s->header_buf + s->session_offset),
+ s->tx_session
+ );
+ if (s->cookie) {
+ if (s->cookie_is_64) {
+ stq_be_p(
+ (uint64_t *)(s->header_buf + s->cookie_offset),
+ s->tx_cookie
+ );
+ } else {
+ stl_be_p(
+ (uint32_t *) (s->header_buf + s->cookie_offset),
+ s->tx_cookie
+ );
+ }
+ }
+ if (s->has_counter) {
+ counter = (uint32_t *)(s->header_buf + s->counter_offset);
+ if (s->pin_counter) {
+ *counter = 0;
+ } else {
+ stl_be_p(counter, ++s->counter);
+ }
+ }
+}
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
+ const struct iovec *iov,
+ int iovcnt)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ struct msghdr message;
+ int ret;
+
+ if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+ error_report(
+ "iovec too long %d > %d, change l2tpv3.h",
+ iovcnt, MAX_L2TPV3_IOVCNT
+ );
+ return -1;
+ }
+ l2tpv3_form_header(s);
+ memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+ s->vec->iov_base = s->header_buf;
+ s->vec->iov_len = s->offset;
+ message.msg_name = s->dgram_dst;
+ message.msg_namelen = s->dst_size;
+ message.msg_iov = s->vec;
+ message.msg_iovlen = iovcnt + 1;
+ message.msg_control = NULL;
+ message.msg_controllen = 0;
+ message.msg_flags = 0;
+ do {
+ ret = sendmsg(s->fd, &message, 0);
+ } while ((ret == -1) && (errno == EINTR));
+ if (ret > 0) {
+ ret -= s->offset;
+ } else if (ret == 0) {
+ /* belt and braces - should not occur on DGRAM
+ * we should get an error and never a 0 send
+ */
+ ret = iov_size(iov, iovcnt);
+ } else {
+ /* signal upper layer that socket buffer is full */
+ ret = -errno;
+ if (ret == -EAGAIN || ret == -ENOBUFS) {
+ l2tpv3_write_poll(s, true);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
+ const uint8_t *buf,
+ size_t size)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ struct iovec *vec;
+ struct msghdr message;
+ ssize_t ret = 0;
+
+ l2tpv3_form_header(s);
+ vec = s->vec;
+ vec->iov_base = s->header_buf;
+ vec->iov_len = s->offset;
+ vec++;
+ vec->iov_base = (void *) buf;
+ vec->iov_len = size;
+ message.msg_name = s->dgram_dst;
+ message.msg_namelen = s->dst_size;
+ message.msg_iov = s->vec;
+ message.msg_iovlen = 2;
+ message.msg_control = NULL;
+ message.msg_controllen = 0;
+ message.msg_flags = 0;
+ do {
+ ret = sendmsg(s->fd, &message, 0);
+ } while ((ret == -1) && (errno == EINTR));
+ if (ret > 0) {
+ ret -= s->offset;
+ } else if (ret == 0) {
+ /* belt and braces - should not occur on DGRAM
+ * we should get an error and never a 0 send
+ */
+ ret = size;
+ } else {
+ ret = -errno;
+ if (ret == -EAGAIN || ret == -ENOBUFS) {
+ /* signal upper layer that socket buffer is full */
+ l2tpv3_write_poll(s, true);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+{
+
+ uint32_t *session;
+ uint64_t cookie;
+
+ if ((!s->udp) && (!s->ipv6)) {
+ buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+ }
+
+ /* we do not do a strict check for "data" packets as per
+ * the RFC spec because the pure IP spec does not have
+ * that anyway.
+ */
+
+ if (s->cookie) {
+ if (s->cookie_is_64) {
+ cookie = ldq_be_p(buf + s->cookie_offset);
+ } else {
+ cookie = ldl_be_p(buf + s->cookie_offset);
+ }
+ if (cookie != s->rx_cookie) {
+ if (!s->header_mismatch) {
+ error_report("unknown cookie id");
+ }
+ return -1;
+ }
+ }
+ session = (uint32_t *) (buf + s->session_offset);
+ if (ldl_be_p(session) != s->rx_session) {
+ if (!s->header_mismatch) {
+ error_report("session mismatch");
+ }
+ return -1;
+ }
+ return 0;
+}
+
+static void net_l2tpv3_process_queue(NetL2TPV3State *s)
+{
+ int size = 0;
+ struct iovec *vec;
+ bool bad_read;
+ int data_size;
+ struct mmsghdr *msgvec;
+
+ /* go into ring mode only if there is a "pending" tail */
+ if (s->queue_depth > 0) {
+ do {
+ msgvec = s->msgvec + s->queue_tail;
+ if (msgvec->msg_len > 0) {
+ data_size = msgvec->msg_len - s->header_size;
+ vec = msgvec->msg_hdr.msg_iov;
+ if ((data_size > 0) &&
+ (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
+ vec++;
+ /* Use the legacy delivery for now, we will
+ * switch to using our own ring as a queueing mechanism
+ * at a later date
+ */
+ size = qemu_send_packet_async(
+ &s->nc,
+ vec->iov_base,
+ data_size,
+ l2tpv3_send_completed
+ );
+ if (size == 0) {
+ l2tpv3_read_poll(s, false);
+ }
+ bad_read = false;
+ } else {
+ bad_read = true;
+ if (!s->header_mismatch) {
+ /* report error only once */
+ error_report("l2tpv3 header verification failed");
+ s->header_mismatch = true;
+ }
+ }
+ } else {
+ bad_read = true;
+ }
+ s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
+ s->queue_depth--;
+ } while (
+ (s->queue_depth > 0) &&
+ qemu_can_send_packet(&s->nc) &&
+ ((size > 0) || bad_read)
+ );
+ }
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+ int target_count, count;
+ struct mmsghdr *msgvec;
+
+ /* go into ring mode only if there is a "pending" tail */
+
+ if (s->queue_depth) {
+
+ /* The ring buffer we use has variable intake
+ * count of how much we can read varies - adjust accordingly
+ */
+
+ target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
+
+ /* Ensure we do not overrun the ring when we have
+ * a lot of enqueued packets
+ */
+
+ if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
+ target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
+ }
+ } else {
+
+ /* we do not have any pending packets - we can use
+ * the whole message vector linearly instead of using
+ * it as a ring
+ */
+
+ s->queue_head = 0;
+ s->queue_tail = 0;
+ target_count = MAX_L2TPV3_MSGCNT;
+ }
+
+ msgvec = s->msgvec + s->queue_head;
+ if (target_count > 0) {
+ do {
+ count = recvmmsg(
+ s->fd,
+ msgvec,
+ target_count, MSG_DONTWAIT, NULL);
+ } while ((count == -1) && (errno == EINTR));
+ if (count < 0) {
+ /* Recv error - we still need to flush packets here,
+ * (re)set queue head to current position
+ */
+ count = 0;
+ }
+ s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
+ s->queue_depth += count;
+ }
+ net_l2tpv3_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+ int i, j;
+ struct iovec *iov;
+ struct mmsghdr *cleanup = msgvec;
+ if (cleanup) {
+ for (i = 0; i < count; i++) {
+ if (cleanup->msg_hdr.msg_iov) {
+ iov = cleanup->msg_hdr.msg_iov;
+ for (j = 0; j < iovcount; j++) {
+ g_free(iov->iov_base);
+ iov++;
+ }
+ g_free(cleanup->msg_hdr.msg_iov);
+ }
+ cleanup++;
+ }
+ g_free(msgvec);
+ }
+}
+
+static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
+{
+ int i;
+ struct iovec *iov;
+ struct mmsghdr *msgvec, *result;
+
+ msgvec = g_malloc(sizeof(struct mmsghdr) * count);
+ result = msgvec;
+ for (i = 0; i < count ; i++) {
+ msgvec->msg_hdr.msg_name = NULL;
+ msgvec->msg_hdr.msg_namelen = 0;
+ iov = g_malloc(sizeof(struct iovec) * IOVSIZE);
+ msgvec->msg_hdr.msg_iov = iov;
+ iov->iov_base = g_malloc(s->header_size);
+ iov->iov_len = s->header_size;
+ iov++ ;
+ iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+ iov->iov_len = BUFFER_SIZE;
+ msgvec->msg_hdr.msg_iovlen = 2;
+ msgvec->msg_hdr.msg_control = NULL;
+ msgvec->msg_hdr.msg_controllen = 0;
+ msgvec->msg_hdr.msg_flags = 0;
+ msgvec++;
+ }
+ return result;
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ qemu_purge_queued_packets(nc);
+ l2tpv3_read_poll(s, false);
+ l2tpv3_write_poll(s, false);
+ if (s->fd > 0) {
+ close(s->fd);
+ }
+ destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
+ g_free(s->vec);
+ g_free(s->header_buf);
+ g_free(s->dgram_dst);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
+ .size = sizeof(NetL2TPV3State),
+ .receive = net_l2tpv3_receive_dgram,
+ .receive_iov = net_l2tpv3_receive_dgram_iov,
+ .poll = l2tpv3_poll,
+ .cleanup = net_l2tpv3_cleanup,
+};
+
+int net_init_l2tpv3(const NetClientOptions *opts,
+ const char *name,
+ NetClientState *peer)
+{
+
+
+ const NetdevL2TPv3Options *l2tpv3;
+ NetL2TPV3State *s;
+ NetClientState *nc;
+ int fd = -1, gairet;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ char *srcport, *dstport;
+
+ nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+ s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ s->queue_head = 0;
+ s->queue_tail = 0;
+ s->header_mismatch = false;
+
+ assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+ l2tpv3 = opts->l2tpv3;
+
+ if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
+ s->ipv6 = l2tpv3->ipv6;
+ } else {
+ s->ipv6 = false;
+ }
+
+ if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
+ error_report("l2tpv3_open : offset must be less than 256 bytes");
+ goto outerr;
+ }
+
+ if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
+ if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
+ s->cookie = true;
+ } else {
+ goto outerr;
+ }
+ } else {
+ s->cookie = false;
+ }
+
+ if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
+ s->cookie_is_64 = true;
+ } else {
+ s->cookie_is_64 = false;
+ }
+
+ if (l2tpv3->has_udp && l2tpv3->udp) {
+ s->udp = true;
+ if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
+ error_report("l2tpv3_open : need both src and dst port for udp");
+ goto outerr;
+ } else {
+ srcport = l2tpv3->srcport;
+ dstport = l2tpv3->dstport;
+ }
+ } else {
+ s->udp = false;
+ srcport = NULL;
+ dstport = NULL;
+ }
+
+
+ s->offset = 4;
+ s->session_offset = 0;
+ s->cookie_offset = 4;
+ s->counter_offset = 4;
+
+ s->tx_session = l2tpv3->txsession;
+ if (l2tpv3->has_rxsession) {
+ s->rx_session = l2tpv3->rxsession;
+ } else {
+ s->rx_session = s->tx_session;
+ }
+
+ if (s->cookie) {
+ s->rx_cookie = l2tpv3->rxcookie;
+ s->tx_cookie = l2tpv3->txcookie;
+ if (s->cookie_is_64 == true) {
+ /* 64 bit cookie */
+ s->offset += 8;
+ s->counter_offset += 8;
+ } else {
+ /* 32 bit cookie */
+ s->offset += 4;
+ s->counter_offset += 4;
+ }
+ }
+
+ memset(&hints, 0, sizeof(hints));
+
+ if (s->ipv6) {
+ hints.ai_family = AF_INET6;
+ } else {
+ hints.ai_family = AF_INET;
+ }
+ if (s->udp) {
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_protocol = 0;
+ s->offset += 4;
+ s->counter_offset += 4;
+ s->session_offset += 4;
+ s->cookie_offset += 4;
+ } else {
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_L2TP;
+ }
+
+ gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
+
+ if ((gairet != 0) || (result == NULL)) {
+ error_report(
+ "l2tpv3_open : could not resolve src, errno = %s",
+ gai_strerror(gairet)
+ );
+ goto outerr;
+ }
+ fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+ if (fd == -1) {
+ fd = -errno;
+ error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
+ freeaddrinfo(result);
+ goto outerr;
+ }
+ if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
+ error_report("l2tpv3_open : could not bind socket err=%i", errno);
+ goto outerr;
+ }
+ if (result) {
+ freeaddrinfo(result);
+ }
+
+ memset(&hints, 0, sizeof(hints));
+
+ if (s->ipv6) {
+ hints.ai_family = AF_INET6;
+ } else {
+ hints.ai_family = AF_INET;
+ }
+ if (s->udp) {
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_protocol = 0;
+ } else {
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_L2TP;
+ }
+
+ result = NULL;
+ gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
+ if ((gairet != 0) || (result == NULL)) {
+ error_report(
+ "l2tpv3_open : could not resolve dst, error = %s",
+ gai_strerror(gairet)
+ );
+ goto outerr;
+ }
+
+ s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage));
+ memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage));
+ memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
+ s->dst_size = result->ai_addrlen;
+
+ if (result) {
+ freeaddrinfo(result);
+ }
+
+ if (l2tpv3->has_counter && l2tpv3->counter) {
+ s->has_counter = true;
+ s->offset += 4;
+ } else {
+ s->has_counter = false;
+ }
+
+ if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
+ s->has_counter = true; /* pin counter implies that there is counter */
+ s->pin_counter = true;
+ } else {
+ s->pin_counter = false;
+ }
+
+ if (l2tpv3->has_offset) {
+ /* extra offset */
+ s->offset += l2tpv3->offset;
+ }
+
+ if ((s->ipv6) || (s->udp)) {
+ s->header_size = s->offset;
+ } else {
+ s->header_size = s->offset + sizeof(struct iphdr);
+ }
+
+ s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
+ s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);
+ s->header_buf = g_malloc(s->header_size);
+
+ qemu_set_nonblock(fd);
+
+ s->fd = fd;
+ s->counter = 0;
+
+ l2tpv3_read_poll(s, true);
+
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+ "l2tpv3: connected");
+ return 0;
+outerr:
+ qemu_del_net_client(nc);
+ if (fd > 0) {
+ close(fd);
+ }
+ if (result) {
+ freeaddrinfo(result);
+ }
+ return -1;
+}
+
diff --git a/net/net.c b/net/net.c
index 3dac29b844..0869c60bee 100644
--- a/net/net.c
+++ b/net/net.c
@@ -250,7 +250,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
{
NetClientState **peers = conf->peers.ncs;
NICState *nic;
- int i, queues = MAX(1, conf->queues);
+ int i, queues = MAX(1, conf->peers.queues);
assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC);
assert(info->size >= sizeof(NICState));
@@ -363,7 +363,7 @@ void qemu_del_net_client(NetClientState *nc)
void qemu_del_nic(NICState *nic)
{
- int i, queues = MAX(nic->conf->queues, 1);
+ int i, queues = MAX(nic->conf->peers.queues, 1);
/* If this is a peer NIC and peer has already been deleted, free it now. */
if (nic->peer_deleted) {
@@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
#ifdef CONFIG_VHOST_NET_USED
[NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
#endif
+#ifdef CONFIG_LINUX
+ [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3,
+#endif
};
@@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
#ifdef CONFIG_VHOST_NET_USED
case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
#endif
+#ifdef CONFIG_LINUX
+ case NET_CLIENT_OPTIONS_KIND_L2TPV3:
+#endif
break;
default:
diff --git a/qapi-schema.json b/qapi-schema.json
index e7727a1153..a83befc05b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -8,6 +8,9 @@
# QAPI block definitions
{ 'include': 'qapi/block.json' }
+# QAPI event definitions
+{ 'include': 'qapi/event.json' }
+
##
# LostTickPolicy:
#
@@ -211,12 +214,18 @@
#
# @filename: the filename of the character device
#
+# @frontend-open: shows whether the frontend device attached to this backend
+# (eg. with the chardev=... option) is in open or closed state
+# (since 2.1)
+#
# Notes: @filename is encoded using the QEMU command line character device
# encoding. See the QEMU man page for details.
#
# Since: 0.14.0
##
-{ 'type': 'ChardevInfo', 'data': {'label': 'str', 'filename': 'str'} }
+{ 'type': 'ChardevInfo', 'data': {'label': 'str',
+ 'filename': 'str',
+ 'frontend-open': 'bool'} }
##
# @query-chardev:
@@ -654,8 +663,9 @@
#
# @host: IP address
#
-# @service: The service name of vnc port. This may depend on the host system's
-# service database so symbolic names should not be relied on.
+# @service: The service name of the vnc port. This may depend on the host
+# system's service database so symbolic names should not be relied
+# on.
#
# @family: address family
#
@@ -694,7 +704,7 @@
##
{ 'type': 'VncClientInfo',
'base': 'VncBasicInfo',
- 'data': { '*x509_dname' : 'str', '*sasl_username': 'str' } }
+ 'data': { '*x509_dname': 'str', '*sasl_username': 'str' } }
##
# @VncInfo:
@@ -2040,6 +2050,62 @@
'*udp': 'str' } }
##
+# @NetdevL2TPv3Options
+#
+# Connect the VLAN to Ethernet over L2TPv3 Static tunnel
+#
+# @src: source address
+#
+# @dst: destination address
+#
+# @srcport: #optional source port - mandatory for udp, optional for ip
+#
+# @dstport: #optional destination port - mandatory for udp, optional for ip
+#
+# @ipv6: #optional - force the use of ipv6
+#
+# @udp: #optional - use the udp version of l2tpv3 encapsulation
+#
+# @cookie64: #optional - use 64 bit coookies
+#
+# @counter: #optional have sequence counter
+#
+# @pincounter: #optional pin sequence counter to zero -
+# workaround for buggy implementations or
+# networks with packet reorder
+#
+# @txcookie: #optional 32 or 64 bit transmit cookie
+#
+# @rxcookie: #optional 32 or 64 bit receive cookie
+#
+# @txsession: 32 bit transmit session
+#
+# @rxsession: #optional 32 bit receive session - if not specified
+# set to the same value as transmit
+#
+# @offset: #optional additional offset - allows the insertion of
+# additional application-specific data before the packet payload
+#
+# Since 2.1
+##
+{ 'type': 'NetdevL2TPv3Options',
+ 'data': {
+ 'src': 'str',
+ 'dst': 'str',
+ '*srcport': 'str',
+ '*dstport': 'str',
+ '*ipv6': 'bool',
+ '*udp': 'bool',
+ '*cookie64': 'bool',
+ '*counter': 'bool',
+ '*pincounter': 'bool',
+ '*txcookie': 'uint64',
+ '*rxcookie': 'uint64',
+ 'txsession': 'uint32',
+ '*rxsession': 'uint32',
+ '*offset': 'uint32' } }
+
+##
# @NetdevVdeOptions
#
# Connect the VLAN to a vde switch running on the host.
@@ -2150,6 +2216,9 @@
# A discriminated record of network device traits.
#
# Since 1.2
+#
+# 'l2tpv3' - since 2.1
+#
##
{ 'union': 'NetClientOptions',
'data': {
@@ -2157,6 +2226,7 @@
'nic': 'NetLegacyNicOptions',
'user': 'NetdevUserOptions',
'tap': 'NetdevTapOptions',
+ 'l2tpv3': 'NetdevL2TPv3Options',
'socket': 'NetdevSocketOptions',
'vde': 'NetdevVdeOptions',
'dump': 'NetdevDumpOptions',
@@ -3398,5 +3468,3 @@
##
{ 'enum': 'GuestPanicAction',
'data': [ 'pause' ] }
-
-{ 'include': 'qapi-event.json' }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index af6b436540..faf394cc76 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -765,6 +765,13 @@
# @format: #optional the format of the new destination, default is to
# probe if @mode is 'existing', else the format of the source
#
+# @node-name: #optional the new block driver state node name in the graph
+# (Since 2.1)
+#
+# @replaces: #optional with sync=full graph node name to be replaced by the new
+# image when a whole image copy is done. This can be used to repair
+# broken Quorum files. (Since 2.1)
+#
# @mode: #optional whether and how QEMU should create a new image, default is
# 'absolute-paths'.
#
@@ -797,6 +804,7 @@
##
{ 'command': 'drive-mirror',
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+ '*node-name': 'str', '*replaces': 'str',
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
@@ -1329,12 +1337,15 @@
#
# @vote-threshold: the vote limit under which a read will fail
#
+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
+# (Since 2.1)
+#
# Since: 2.0
##
{ 'type': 'BlockdevOptionsQuorum',
'data': { '*blkverify': 'bool',
'children': [ 'BlockdevRef' ],
- 'vote-threshold': 'int' } }
+ 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
##
# @BlockdevOptions
@@ -1436,7 +1447,8 @@
# @device: device name
#
# @msg: informative message for human consumption, such as the kind of
-# corruption being detected
+# corruption being detected. It should not be parsed by machine as it is
+# not guaranteed to be stable
#
# @offset: #optional, if the corruption resulted from an image access, this is
# the access offset into the image
@@ -1544,19 +1556,32 @@
{ 'event': 'BLOCK_JOB_ERROR',
'data': { 'device' : 'str',
'operation': 'IoOperationType',
- 'action' : 'BlockdevOnError' } }
+ 'action' : 'BlockErrorAction' } }
##
# @BLOCK_JOB_READY
#
# Emitted when a block job is ready to complete
#
+# @type: job type
+#
# @device: device name
#
+# @len: maximum progress value
+#
+# @offset: current progress value. On success this is equal to len.
+# On failure this is less than len
+#
+# @speed: rate limit, bytes per second
+#
# Note: The "ready to complete" status is always reset by a @BLOCK_JOB_ERROR
# event
#
# Since: 1.3
##
{ 'event': 'BLOCK_JOB_READY',
- 'data': { 'device': 'str' } }
+ 'data': { 'type' : 'BlockJobType',
+ 'device': 'str',
+ 'len' : 'int',
+ 'offset': 'int',
+ 'speed' : 'int' } }
diff --git a/qapi-event.json b/qapi/event.json
index e7a47f9927..ff97aeb377 100644
--- a/qapi-event.json
+++ b/qapi/event.json
@@ -1,8 +1,8 @@
##
# @SHUTDOWN
#
-# Emitted when the virtual machine has shutdown, possibly indicating that QEMU
-# is about about to exit.
+# Emitted when the virtual machine has shut down, indicating that qemu is
+# about to exit.
#
# Note: If the command-line option "-no-shutdown" has been specified, qemu will
# not exit, and a STOP event will eventually follow the SHUTDOWN event
@@ -316,3 +316,17 @@
{ 'event': 'QUORUM_REPORT_BAD',
'data': { '*error': 'str', 'node-name': 'str',
'sector-num': 'int', 'sector-count': 'int' } }
+
+##
+# @VSERPORT_CHANGE
+#
+# Emitted when the guest opens or closes a virtio-serial port.
+#
+# @id: device identifier of the virtio-serial port
+#
+# @open: true if the guest has opened the virtio-serial port
+#
+# Since: 2.1
+##
+{ 'event': 'VSERPORT_CHANGE',
+ 'data': { 'id': 'str', 'open': 'bool' } }
diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c
index 6a0974eb48..36eb3bcfd6 100644
--- a/qemu-bridge-helper.c
+++ b/qemu-bridge-helper.c
@@ -229,7 +229,7 @@ int main(int argc, char **argv)
unsigned long ifargs[4];
#endif
int ifindex;
- int fd, ctlfd, unixfd = -1;
+ int fd = -1, ctlfd = -1, unixfd = -1;
int use_vnet = 0;
int mtu;
const char *bridge = NULL;
@@ -436,7 +436,12 @@ int main(int argc, char **argv)
/* profit! */
cleanup:
-
+ if (fd >= 0) {
+ close(fd);
+ }
+ if (ctlfd >= 0) {
+ close(ctlfd);
+ }
while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&acl_list, entry);
g_free(acl_rule);
diff --git a/qemu-char.c b/qemu-char.c
index cbd6b9a025..51917de462 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -3705,6 +3705,7 @@ ChardevInfoList *qmp_query_chardev(Error **errp)
info->value = g_malloc0(sizeof(*info->value));
info->value->label = g_strdup(chr->label);
info->value->filename = g_strdup(chr->filename);
+ info->value->frontend_open = chr->fe_open;
info->next = chr_list;
chr_list = info;
diff --git a/qemu-options.hx b/qemu-options.hx
index ff76ad4830..9e5468678b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
" (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n"
" (default=" DEFAULT_BRIDGE_HELPER ")\n"
#endif
+#ifdef __linux__
+ "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n"
+ " connect the VLAN to an Ethernet over L2TPv3 pseudowire\n"
+ " Linux kernel 3.3+ as well as most routers can talk\n"
+ " L2TPv3. This transport allows to connect a VM to a VM,\n"
+ " VM to a router and even VM to Host. It is a nearly-universal\n"
+ " standard (RFC3391). Note - this implementation uses static\n"
+ " pre-configured tunnels (same as the Linux kernel).\n"
+ " use 'src=' to specify source address\n"
+ " use 'dst=' to specify destination address\n"
+ " use 'udp=on' to specify udp encapsulation\n"
+ " use 'dstport=' to specify destination udp port\n"
+ " use 'dstport=' to specify destination udp port\n"
+ " use 'ipv6=on' to force v6\n"
+ " L2TPv3 uses cookies to prevent misconfiguration as\n"
+ " well as a weak security measure\n"
+ " use 'rxcookie=0x012345678' to specify a rxcookie\n"
+ " use 'txcookie=0x012345678' to specify a txcookie\n"
+ " use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n"
+ " use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n"
+ " use 'pincounter=on' to work around broken counter handling in peer\n"
+ " use 'offset=X' to add an extra offset between header and data\n"
+#endif
"-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
" connect the vlan 'n' to another VLAN using a socket connection\n"
"-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n"
@@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \
-net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
@end example
+@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular
+protocol to transport Ethernet (and other Layer 2) data frames between
+two systems. It is present in routers, firewalls and the Linux kernel
+(from version 3.3 onwards).
+
+This transport allows a VM to communicate to another VM, router or firewall directly.
+
+@item src=@var{srcaddr}
+ source address (mandatory)
+@item dst=@var{dstaddr}
+ destination address (mandatory)
+@item udp
+ select udp encapsulation (default is ip).
+@item srcport=@var{srcport}
+ source udp port.
+@item dstport=@var{dstport}
+ destination udp port.
+@item ipv6
+ force v6, otherwise defaults to v4.
+@item rxcookie=@var{rxcookie}
+@item txcookie=@var{txcookie}
+ Cookies are a weak form of security in the l2tpv3 specification.
+Their function is mostly to prevent misconfiguration. By default they are 32
+bit.
+@item cookie64
+ Set cookie size to 64 bit instead of the default 32
+@item counter=off
+ Force a 'cut-down' L2TPv3 with no counter as in
+draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00
+@item pincounter=on
+ Work around broken counter handling in peer. This may also help on
+networks which have packet reorder.
+@item offset=@var{offset}
+ Add an extra offset between header and data
+
+For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan
+on the remote Linux host 1.2.3.4:
+@example
+# Setup tunnel on linux host using raw ip as encapsulation
+# on 1.2.3.4
+ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \
+ encap udp udp_sport 16384 udp_dport 16384
+ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \
+ 0xFFFFFFFF peer_session_id 0xFFFFFFFF
+ifconfig vmtunnel0 mtu 1500
+ifconfig vmtunnel0 up
+brctl addif br-lan vmtunnel0
+
+
+# on 4.3.2.1
+# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
+
+qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter
+
+
+@end example
+
@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
@item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
diff --git a/qmp-commands.hx b/qmp-commands.hx
index e4a1c80434..65218bc147 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1293,6 +1293,7 @@ EQMP
{
.name = "drive-mirror",
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
+ "node-name:s?,replaces:s?,"
"on-source-error:s?,on-target-error:s?,"
"granularity:i?,buf-size:i?",
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
@@ -1314,6 +1315,10 @@ Arguments:
- "device": device name to operate on (json-string)
- "target": name of new image file (json-string)
- "format": format of new image (json-string, optional)
+- "node-name": the name of the new block driver state in the node graph
+ (json-string, optional)
+- "replaces": the block driver node name to replace when finished
+ (json-string, optional)
- "mode": how an image file should be created into the target
file/device (NewImageMode, optional, default 'absolute-paths')
- "speed": maximum speed of the streaming job, in bytes per second
@@ -1921,19 +1926,28 @@ Each json-object contain the following:
- "label": device's label (json-string)
- "filename": device's file (json-string)
+- "frontend-open": open/closed state of the frontend device attached to this
+ backend (json-bool)
Example:
-> { "execute": "query-chardev" }
<- {
- "return":[
+ "return": [
+ {
+ "label": "charchannel0",
+ "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server",
+ "frontend-open": false
+ },
{
- "label":"monitor",
- "filename":"stdio"
+ "label": "charmonitor",
+ "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server",
+ "frontend-open": true
},
{
- "label":"serial0",
- "filename":"vc"
+ "label": "charserial0",
+ "filename": "pty:/dev/pts/2",
+ "frontend-open": true
}
]
}
diff --git a/scripts/qapi-event.py b/scripts/qapi-event.py
index 3a1cd61914..601e3076ab 100644
--- a/scripts/qapi-event.py
+++ b/scripts/qapi-event.py
@@ -26,9 +26,8 @@ def _generate_event_api_name(event_name, params):
api_name += "bool has_%s,\n" % c_var(argname)
api_name += "".ljust(l)
- if argentry == "str":
- api_name += "const "
- api_name += "%s %s,\n" % (c_type(argentry), c_var(argname))
+ api_name += "%s %s,\n" % (c_type(argentry, is_param=True),
+ c_var(argname))
api_name += "".ljust(l)
api_name += "Error **errp)"
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 54b97cb48e..f2c6d1f840 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -255,7 +255,7 @@ def check_event(expr, expr_info):
if structured:
raise QAPIExprError(expr_info,
"Nested structure define in event is not "
- "supported now, event '%s', argname '%s'"
+ "supported, event '%s', argname '%s'"
% (expr['event'], argname))
def check_union(expr, expr_info):
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 97a81d8660..9a91af9dbf 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1138,6 +1138,8 @@
"POWER7 v2.3")
POWERPC_DEF("POWER7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7P,
"POWER7+ v2.1")
+ POWERPC_DEF("POWER8E_v1.0", CPU_POWERPC_POWER8E_v10, POWER8E,
+ "POWER8E v1.0")
POWERPC_DEF("POWER8_v1.0", CPU_POWERPC_POWER8_v10, POWER8,
"POWER8 v1.0")
POWERPC_DEF("970", CPU_POWERPC_970, 970,
@@ -1386,6 +1388,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "POWER5gs", "POWER5+" },
{ "POWER7", "POWER7_v2.3" },
{ "POWER7+", "POWER7+_v2.1" },
+ { "POWER8E", "POWER8E_v1.0" },
{ "POWER8", "POWER8_v1.0" },
{ "970fx", "970fx_v3.1" },
{ "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index db75896012..c39d03a504 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -559,9 +559,12 @@ enum {
CPU_POWERPC_POWER7P_BASE = 0x004A0000,
CPU_POWERPC_POWER7P_MASK = 0xFFFF0000,
CPU_POWERPC_POWER7P_v21 = 0x004A0201,
- CPU_POWERPC_POWER8_BASE = 0x004B0000,
+ CPU_POWERPC_POWER8E_BASE = 0x004B0000,
+ CPU_POWERPC_POWER8E_MASK = 0xFFFF0000,
+ CPU_POWERPC_POWER8E_v10 = 0x004B0100,
+ CPU_POWERPC_POWER8_BASE = 0x004D0000,
CPU_POWERPC_POWER8_MASK = 0xFFFF0000,
- CPU_POWERPC_POWER8_v10 = 0x004B0100,
+ CPU_POWERPC_POWER8_v10 = 0x004D0100,
CPU_POWERPC_970 = 0x00390202,
CPU_POWERPC_970FX_v10 = 0x00391100,
CPU_POWERPC_970FX_v20 = 0x003C0200,
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 13c7031265..f1f0a52998 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -119,7 +119,9 @@ void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f,
fprintf_function cpu_fprintf, int flags);
hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg);
int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
int ppc64_cpu_write_elf64_qemunote(WriteCoreDumpFunction f,
CPUState *cpu, void *opaque);
int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 74407ee209..08ae527cb6 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2012,7 +2012,7 @@ enum {
PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \
PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
- PPC2_ALTIVEC_207 | PPC2_ISA207S)
+ PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP)
};
/*****************************************************************************/
diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
index 381a3c7e31..694d303e19 100644
--- a/target-ppc/gdbstub.c
+++ b/target-ppc/gdbstub.c
@@ -21,6 +21,31 @@
#include "qemu-common.h"
#include "exec/gdbstub.h"
+static int ppc_gdb_register_len_apple(int n)
+{
+ switch (n) {
+ case 0 ... 31:
+ /* gprs */
+ return 8;
+ case 32 ... 63:
+ /* fprs */
+ return 8;
+ case 64 ... 95:
+ return 16;
+ case 64+32: /* nip */
+ case 65+32: /* msr */
+ case 67+32: /* lr */
+ case 68+32: /* ctr */
+ case 69+32: /* xer */
+ case 70+32: /* fpscr */
+ return 8;
+ case 66+32: /* cr */
+ return 4;
+ default:
+ return 0;
+ }
+}
+
static int ppc_gdb_register_len(int n)
{
switch (n) {
@@ -132,6 +157,65 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
return r;
}
+int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ int r = ppc_gdb_register_len_apple(n);
+
+ if (!r) {
+ return r;
+ }
+
+ if (n < 32) {
+ /* gprs */
+ gdb_get_reg64(mem_buf, env->gpr[n]);
+ } else if (n < 64) {
+ /* fprs */
+ stfq_p(mem_buf, env->fpr[n-32]);
+ } else if (n < 96) {
+ /* Altivec */
+ stq_p(mem_buf, n - 64);
+ stq_p(mem_buf + 8, 0);
+ } else {
+ switch (n) {
+ case 64 + 32:
+ gdb_get_reg64(mem_buf, env->nip);
+ break;
+ case 65 + 32:
+ gdb_get_reg64(mem_buf, env->msr);
+ break;
+ case 66 + 32:
+ {
+ uint32_t cr = 0;
+ int i;
+ for (i = 0; i < 8; i++) {
+ cr |= env->crf[i] << (32 - ((i + 1) * 4));
+ }
+ gdb_get_reg32(mem_buf, cr);
+ break;
+ }
+ case 67 + 32:
+ gdb_get_reg64(mem_buf, env->lr);
+ break;
+ case 68 + 32:
+ gdb_get_reg64(mem_buf, env->ctr);
+ break;
+ case 69 + 32:
+ gdb_get_reg64(mem_buf, env->xer);
+ break;
+ case 70 + 32:
+ gdb_get_reg64(mem_buf, env->fpscr);
+ break;
+ }
+ }
+ if (msr_le) {
+ /* If cpu is in LE mode, convert memory contents to LE. */
+ ppc_gdb_swap_register(mem_buf, n, r);
+ }
+ return r;
+}
+
int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -185,3 +269,56 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
}
return r;
}
+int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ int r = ppc_gdb_register_len_apple(n);
+
+ if (!r) {
+ return r;
+ }
+ if (msr_le) {
+ /* If cpu is in LE mode, convert memory contents to LE. */
+ ppc_gdb_swap_register(mem_buf, n, r);
+ }
+ if (n < 32) {
+ /* gprs */
+ env->gpr[n] = ldq_p(mem_buf);
+ } else if (n < 64) {
+ /* fprs */
+ env->fpr[n-32] = ldfq_p(mem_buf);
+ } else {
+ switch (n) {
+ case 64 + 32:
+ env->nip = ldq_p(mem_buf);
+ break;
+ case 65 + 32:
+ ppc_store_msr(env, ldq_p(mem_buf));
+ break;
+ case 66 + 32:
+ {
+ uint32_t cr = ldl_p(mem_buf);
+ int i;
+ for (i = 0; i < 8; i++) {
+ env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
+ }
+ break;
+ }
+ case 67 + 32:
+ env->lr = ldq_p(mem_buf);
+ break;
+ case 68 + 32:
+ env->ctr = ldq_p(mem_buf);
+ break;
+ case 69 + 32:
+ env->xer = ldq_p(mem_buf);
+ break;
+ case 70 + 32:
+ /* fpscr */
+ store_fpscr(env, ldq_p(mem_buf), 0xffffffff);
+ break;
+ }
+ }
+ return r;
+}
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 561f8ccf2f..2d87108d8b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -63,6 +63,7 @@ static int cap_ppc_smt;
static int cap_ppc_rma;
static int cap_spapr_tce;
static int cap_spapr_multitce;
+static int cap_spapr_vfio;
static int cap_hior;
static int cap_one_reg;
static int cap_epr;
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
+ cap_spapr_vfio = false;
cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void)
return cap_spapr_multitce;
}
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+ bool vfio_accel)
{
struct kvm_create_spapr_tce args = {
.liobn = liobn,
@@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
* destroying the table, which the upper layers -will- do
*/
*pfd = -1;
- if (!cap_spapr_tce) {
+ if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
return NULL;
}
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 412cc7f3c1..1118122d89 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
#ifndef CONFIG_USER_ONLY
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
bool kvmppc_spapr_use_multitce(void);
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+ bool vfio_accel);
int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
int kvmppc_reset_htab(int shift_hint);
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
@@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void)
}
static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
- uint32_t window_size, int *fd)
+ uint32_t window_size, int *fd,
+ bool vfio_accel)
{
return NULL;
}
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 48017219a4..b23933f7bd 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -426,7 +426,6 @@ static inline uint32_t SPR(uint32_t opcode)
return ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
}
/*** Get constants ***/
-EXTRACT_HELPER(IMM, 12, 8);
/* 16 bits signed immediate value */
EXTRACT_SHELPER(SIMM, 0, 16);
/* 16 bits unsigned immediate value */
@@ -459,8 +458,6 @@ EXTRACT_HELPER(FPFLM, 17, 8);
EXTRACT_HELPER(FPW, 16, 1);
/*** Jump target decoding ***/
-/* Displacement */
-EXTRACT_SHELPER(d, 0, 16);
/* Immediate address */
static inline target_ulong LI(uint32_t opcode)
{
@@ -2665,11 +2662,6 @@ static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2)
tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx);
}
-static inline void gen_qemu_ld8s(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
- tcg_gen_qemu_ld8s(arg1, arg2, ctx->mem_idx);
-}
-
static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2)
{
TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask;
@@ -4123,8 +4115,9 @@ static void gen_mcrxr(DisasContext *ctx)
tcg_gen_trunc_tl_i32(t0, cpu_so);
tcg_gen_trunc_tl_i32(t1, cpu_ov);
tcg_gen_trunc_tl_i32(dst, cpu_ca);
- tcg_gen_shri_i32(t0, t0, 2);
- tcg_gen_shri_i32(t1, t1, 1);
+ tcg_gen_shli_i32(t0, t0, 3);
+ tcg_gen_shli_i32(t1, t1, 2);
+ tcg_gen_shli_i32(dst, dst, 1);
tcg_gen_or_i32(dst, dst, t0);
tcg_gen_or_i32(dst, dst, t1);
tcg_temp_free_i32(t0);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 85581c9537..a3bb336e16 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -34,6 +34,7 @@
//#define PPC_DUMP_CPU
//#define PPC_DEBUG_SPR
//#define PPC_DUMP_SPR_ACCESSES
+/* #define USE_APPLE_GDB */
/* For user-mode emulation, we don't emulate any IRQ controller */
#if defined(CONFIG_USER_ONLY)
@@ -8188,16 +8189,16 @@ static void init_proc_POWER8(CPUPPCState *env)
init_proc_book3s_64(env, BOOK3S_CPU_POWER8);
}
-POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+POWERPC_FAMILY(POWER8E)(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
dc->fw_name = "PowerPC,POWER8";
- dc->desc = "POWER8";
+ dc->desc = "POWER8E";
dc->props = powerpc_servercpu_properties;
- pcc->pvr = CPU_POWERPC_POWER8_BASE;
- pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+ pcc->pvr = CPU_POWERPC_POWER8E_BASE;
+ pcc->pvr_mask = CPU_POWERPC_POWER8E_MASK;
pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
pcc->init_proc = init_proc_POWER8;
pcc->check_pow = check_pow_nocheck;
@@ -8251,6 +8252,18 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
pcc->l1_icache_size = 0x8000;
pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
}
+
+POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+ ppc_POWER8E_cpu_family_class_init(oc, data);
+
+ dc->desc = "POWER8";
+ pcc->pvr = CPU_POWERPC_POWER8_BASE;
+ pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+}
#endif /* defined (TARGET_PPC64) */
@@ -9667,6 +9680,13 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
#endif
cc->gdb_num_core_regs = 71;
+
+#ifdef USE_APPLE_GDB
+ cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
+ cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
+ cc->gdb_num_core_regs = 71 + 32;
+#endif
+
#if defined(TARGET_PPC64)
cc->gdb_core_xml_file = "power64-core.xml";
#else
diff --git a/tests/qapi-schema/event-nest-struct.err b/tests/qapi-schema/event-nest-struct.err
index e4a0faac9c..91bde1c967 100644
--- a/tests/qapi-schema/event-nest-struct.err
+++ b/tests/qapi-schema/event-nest-struct.err
@@ -1 +1 @@
-tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported now, event 'EVENT_A', argname 'a'
+tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported, event 'EVENT_A', argname 'a'
diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031
index 1d920ea87a..2a77ba8cbb 100755
--- a/tests/qemu-iotests/031
+++ b/tests/qemu-iotests/031
@@ -56,22 +56,22 @@ for IMGOPTS in "compat=0.10" "compat=1.1"; do
echo === Create image with unknown header extension ===
echo
_make_test_img 64M
- ./qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension"
- ./qcow2.py "$TEST_IMG" dump-header
+ $PYTHON qcow2.py "$TEST_IMG" add-header-ext 0x12345678 "This is a test header extension"
+ $PYTHON qcow2.py "$TEST_IMG" dump-header
_check_test_img
echo
echo === Rewrite header with no backing file ===
echo
$QEMU_IMG rebase -u -b "" "$TEST_IMG"
- ./qcow2.py "$TEST_IMG" dump-header
+ $PYTHON qcow2.py "$TEST_IMG" dump-header
_check_test_img
echo
echo === Add a backing file and format ===
echo
$QEMU_IMG rebase -u -b "/some/backing/file/path" -F host_device "$TEST_IMG"
- ./qcow2.py "$TEST_IMG" dump-header
+ $PYTHON qcow2.py "$TEST_IMG" dump-header
done
# success, all done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 03b6aa9de7..a77365347d 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -53,15 +53,15 @@ IMGOPTS="compat=1.1"
echo === Create image with unknown autoclear feature bit ===
echo
_make_test_img 64M
-./qcow2.py "$TEST_IMG" set-feature-bit autoclear 63
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 63
+$PYTHON qcow2.py "$TEST_IMG" dump-header
echo
echo === Repair image ===
echo
_check_test_img -r all
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
# success, all done
echo "*** done"
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index 27fe4bdacc..84c9167660 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -63,7 +63,7 @@ _make_test_img $size
$QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
# The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img
echo
@@ -75,7 +75,7 @@ _make_test_img $size
_no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io
# The dirty bit must be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img
echo
@@ -84,7 +84,7 @@ echo "== Read-only access must still work =="
$QEMU_IO -r -c "read -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
# The dirty bit must be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "== Repairing the image file must succeed =="
@@ -92,7 +92,7 @@ echo "== Repairing the image file must succeed =="
_check_test_img -r all
# The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "== Data should still be accessible after repair =="
@@ -108,12 +108,12 @@ _make_test_img $size
_no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io
# The dirty bit must be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io
# The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "== Creating an image file with lazy_refcounts=off =="
@@ -124,7 +124,7 @@ _make_test_img $size
_no_dump_exec $QEMU_IO -c "write -P 0x5a 0 512" -c "abort" "$TEST_IMG" 2>&1 | _filter_qemu_io
# The dirty bit must not be set since lazy_refcounts=off
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img
echo
@@ -140,8 +140,8 @@ $QEMU_IO -c "write 0 512" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG commit "$TEST_IMG"
# The dirty bit must not be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
-./qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG".base dump-header | grep incompatible_features
_check_test_img
TEST_IMG="$TEST_IMG".base _check_test_img
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 734b6a6bb4..d1668109dd 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -35,12 +35,13 @@ test_img = os.path.join(iotests.test_dir, 'test.img')
class ImageCommitTestCase(iotests.QMPTestCase):
'''Abstract base class for image commit test cases'''
- def run_commit_test(self, top, base):
+ def run_commit_test(self, top, base, need_ready=False):
self.assert_no_active_block_jobs()
result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
self.assert_qmp(result, 'return', {})
completed = False
+ ready = False
while not completed:
for event in self.vm.get_qmp_events(wait=True):
if event['event'] == 'BLOCK_JOB_COMPLETED':
@@ -48,8 +49,11 @@ class ImageCommitTestCase(iotests.QMPTestCase):
self.assert_qmp(event, 'data/device', 'drive0')
self.assert_qmp(event, 'data/offset', self.image_len)
self.assert_qmp(event, 'data/len', self.image_len)
+ if need_ready:
+ self.assertTrue(ready, "Expecting BLOCK_JOB_COMPLETED event")
completed = True
elif event['event'] == 'BLOCK_JOB_READY':
+ ready = True
self.assert_qmp(event, 'data/type', 'commit')
self.assert_qmp(event, 'data/device', 'drive0')
self.assert_qmp(event, 'data/len', self.image_len)
@@ -63,7 +67,7 @@ class TestSingleDrive(ImageCommitTestCase):
test_len = 1 * 1024 * 256
def setUp(self):
- iotests.create_image(backing_img, TestSingleDrive.image_len)
+ iotests.create_image(backing_img, self.image_len)
qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
qemu_io('-c', 'write -P 0xab 0 524288', backing_img)
@@ -105,7 +109,7 @@ class TestSingleDrive(ImageCommitTestCase):
self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
def test_top_is_active(self):
- self.run_commit_test(test_img, backing_img)
+ self.run_commit_test(test_img, backing_img, need_ready=True)
self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
@@ -238,6 +242,8 @@ class TestSetSpeed(ImageCommitTestCase):
self.cancel_and_wait(resume=True)
+class TestActiveZeroLengthImage(TestSingleDrive):
+ image_len = 0
if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index b6f257674e..42314e9c00 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-................
+........................
----------------------------------------------------------------------
-Ran 16 tests
+Ran 24 tests
OK
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ec470b2007..0815e19274 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -28,6 +28,12 @@ target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
test_img = os.path.join(iotests.test_dir, 'test.img')
target_img = os.path.join(iotests.test_dir, 'target.img')
+quorum_img1 = os.path.join(iotests.test_dir, 'quorum1.img')
+quorum_img2 = os.path.join(iotests.test_dir, 'quorum2.img')
+quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
+quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
+quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
+
class ImageMirroringTestCase(iotests.QMPTestCase):
'''Abstract base class for image mirroring test cases'''
@@ -42,8 +48,8 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
ready = True
def wait_ready_and_cancel(self, drive='drive0'):
- self.wait_ready(drive)
- event = self.cancel_and_wait()
+ self.wait_ready(drive=drive)
+ event = self.cancel_and_wait(drive=drive)
self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
self.assert_qmp(event, 'data/type', 'mirror')
self.assert_qmp(event, 'data/offset', self.image_len)
@@ -52,19 +58,19 @@ class ImageMirroringTestCase(iotests.QMPTestCase):
def complete_and_wait(self, drive='drive0', wait_ready=True):
'''Complete a block job and wait for it to finish'''
if wait_ready:
- self.wait_ready()
+ self.wait_ready(drive=drive)
result = self.vm.qmp('block-job-complete', device=drive)
self.assert_qmp(result, 'return', {})
- event = self.wait_until_completed()
+ event = self.wait_until_completed(drive=drive)
self.assert_qmp(event, 'data/type', 'mirror')
class TestSingleDrive(ImageMirroringTestCase):
image_len = 1 * 1024 * 1024 # MB
def setUp(self):
- iotests.create_image(backing_img, TestSingleDrive.image_len)
+ iotests.create_image(backing_img, self.image_len)
qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
self.vm = iotests.VM().add_drive(test_img)
self.vm.launch()
@@ -163,7 +169,7 @@ class TestSingleDrive(ImageMirroringTestCase):
self.assert_no_active_block_jobs()
qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d'
- % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img)
+ % (self.image_len, self.image_len), target_img)
result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
buf_size=65536, mode='existing', target=target_img)
self.assert_qmp(result, 'return', {})
@@ -179,7 +185,7 @@ class TestSingleDrive(ImageMirroringTestCase):
self.assert_no_active_block_jobs()
qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
- % (TestSingleDrive.image_len, backing_img), target_img)
+ % (self.image_len, backing_img), target_img)
result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
mode='existing', target=target_img)
self.assert_qmp(result, 'return', {})
@@ -206,6 +212,11 @@ class TestSingleDrive(ImageMirroringTestCase):
target=target_img)
self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+class TestSingleDriveZeroLength(TestSingleDrive):
+ image_len = 0
+ test_small_buffer2 = None
+ test_large_cluster = None
+
class TestMirrorNoBacking(ImageMirroringTestCase):
image_len = 2 * 1024 * 1024 # MB
@@ -718,5 +729,187 @@ class TestUnbackedSource(ImageMirroringTestCase):
self.complete_and_wait()
self.assert_no_active_block_jobs()
+class TestRepairQuorum(ImageMirroringTestCase):
+ """ This class test quorum file repair using drive-mirror.
+ It's mostly a fork of TestSingleDrive """
+ image_len = 1 * 1024 * 1024 # MB
+ IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
+
+ def setUp(self):
+ self.vm = iotests.VM()
+
+ # Add each individual quorum images
+ for i in self.IMAGES:
+ qemu_img('create', '-f', iotests.imgfmt, i,
+ str(TestSingleDrive.image_len))
+ # Assign a node name to each quorum image in order to manipulate
+ # them
+ opts = "node-name=img%i" % self.IMAGES.index(i)
+ self.vm = self.vm.add_drive(i, opts)
+
+ self.vm.launch()
+
+ #assemble the quorum block device from the individual files
+ args = { "options" : { "driver": "quorum", "id": "quorum0",
+ "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } }
+ result = self.vm.qmp("blockdev-add", **args)
+ self.assert_qmp(result, 'return', {})
+
+
+ def tearDown(self):
+ self.vm.shutdown()
+ for i in self.IMAGES + [ quorum_repair_img ]:
+ # Do a try/except because the test may have deleted some images
+ try:
+ os.remove(i)
+ except OSError:
+ pass
+
+ def test_complete(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name="repair0",
+ replaces="img1",
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait(drive="quorum0")
+ result = self.vm.qmp('query-named-block-nodes')
+ self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+ # TODO: a better test requiring some QEMU infrastructure will be added
+ # to check that this file is really driven by quorum
+ self.vm.shutdown()
+ self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+ 'target image does not match source after mirroring')
+
+ def test_cancel(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name="repair0",
+ replaces="img1",
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.cancel_and_wait(drive="quorum0", force=True)
+ # here we check that the last registered quorum file has not been
+ # swapped out and unref
+ result = self.vm.qmp('query-named-block-nodes')
+ self.assert_qmp(result, 'return[0]/file', quorum_img3)
+ self.vm.shutdown()
+
+ def test_cancel_after_ready(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name="repair0",
+ replaces="img1",
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready_and_cancel(drive="quorum0")
+ result = self.vm.qmp('query-named-block-nodes')
+ # here we check that the last registered quorum file has not been
+ # swapped out and unref
+ self.assert_qmp(result, 'return[0]/file', quorum_img3)
+ self.vm.shutdown()
+ self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+ 'target image does not match source after mirroring')
+
+ def test_pause(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name="repair0",
+ replaces="img1",
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ result = self.vm.qmp('block-job-pause', device='quorum0')
+ self.assert_qmp(result, 'return', {})
+
+ time.sleep(1)
+ result = self.vm.qmp('query-block-jobs')
+ offset = self.dictpath(result, 'return[0]/offset')
+
+ time.sleep(1)
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/offset', offset)
+
+ result = self.vm.qmp('block-job-resume', device='quorum0')
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait(drive="quorum0")
+ self.vm.shutdown()
+ self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+ 'target image does not match source after mirroring')
+
+ def test_medium_not_found(self):
+ result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full',
+ node_name='repair0',
+ replaces='img1',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ def test_image_not_found(self):
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name='repair0',
+ replaces='img1',
+ mode='existing',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ def test_device_not_found(self):
+ result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full',
+ node_name='repair0',
+ replaces='img1',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+ def test_wrong_sync_mode(self):
+ result = self.vm.qmp('drive-mirror', device='quorum0',
+ node_name='repair0',
+ replaces='img1',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ def test_no_node_name(self):
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ replaces='img1',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ def test_unexistant_replaces(self):
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name='repair0',
+ replaces='img77',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ def test_after_a_quorum_snapshot(self):
+ result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
+ snapshot_file=quorum_snapshot_file,
+ snapshot_node_name="snap1");
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name='repair0',
+ replaces="img1",
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ node_name='repair0',
+ replaces="snap1",
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait(drive="quorum0")
+ result = self.vm.qmp('query-named-block-nodes')
+ self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+ # TODO: a better test requiring some QEMU infrastructure will be added
+ # to check that this file is really driven by quorum
+ self.vm.shutdown()
+
if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 6d9bee1a4b..42147c0b58 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...........................
+..............................................
----------------------------------------------------------------------
-Ran 27 tests
+Ran 46 tests
OK
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index c4af131a66..a41334e022 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -92,6 +92,7 @@ echo
run_qemu -drive file="$TEST_IMG",format=foo
run_qemu -drive file="$TEST_IMG",driver=foo
+run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2
echo
echo === Overriding backing file ===
@@ -99,6 +100,11 @@ echo
echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults
+# Drivers that don't support backing files
+run_qemu -drive file="$TEST_IMG",driver=raw,backing.file.filename="$TEST_IMG.orig"
+run_qemu -drive file="$TEST_IMG",file.backing.driver=file,file.backing.filename="$TEST_IMG.orig"
+run_qemu -drive file="$TEST_IMG",file.backing.driver=qcow2,file.backing.file.filename="$TEST_IMG.orig"
+
echo
echo === Enable and disable lazy refcounting on the command line, plus some invalid values ===
echo
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 31e329e893..d7b0f503af 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -38,7 +38,10 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=foo
QEMU_PROG: -drive file=TEST_DIR/t.qcow2,format=foo: 'foo' invalid format
Testing: -drive file=TEST_DIR/t.qcow2,driver=foo
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Invalid driver: 'foo'
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=foo: could not open disk image TEST_DIR/t.qcow2: Unknown driver 'foo'
+
+Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,format=qcow2: could not open disk image TEST_DIR/t.qcow2: Driver specified twice
=== Overriding backing file ===
@@ -50,6 +53,15 @@ ide0-hd0: TEST_DIR/t.qcow2 (qcow2)
Backing file: TEST_DIR/t.qcow2.orig (chain depth: 1)
(qemu) qququiquit
+Testing: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=raw,backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files
+
+Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=file,file.backing.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files
+
+Testing: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.backing.driver=qcow2,file.backing.file.filename=TEST_DIR/t.qcow2.orig: could not open disk image TEST_DIR/t.qcow2: Driver doesn't support backing files
+
=== Enable and disable lazy refcounting on the command line, plus some invalid values ===
diff --git a/tests/qemu-iotests/054 b/tests/qemu-iotests/054
index c8b7082b4e..bd94153d66 100755
--- a/tests/qemu-iotests/054
+++ b/tests/qemu-iotests/054
@@ -49,7 +49,7 @@ _make_test_img $((1024*1024))T
echo
echo "creating too large image (1 EB) using qcow2.py"
_make_test_img 4G
-./qcow2.py "$TEST_IMG" set-header size $((1024 ** 6))
+$PYTHON qcow2.py "$TEST_IMG" set-header size $((1024 ** 6))
_check_test_img
# success, all done
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index f0116aab1d..3cffc12fec 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -68,13 +68,13 @@ poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
_check_test_img
# The corrupt bit should not be set anyway
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to write something, thereby forcing the corrupt bit to be set
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
# The corrupt bit must now be set
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to open the image R/W (which should fail)
$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
@@ -99,19 +99,19 @@ poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
# Redirect new data cluster onto refcount block
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
_check_test_img
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to fix it
_check_test_img -r all
# The corrupt bit should be cleared
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Look if it's really really fixed
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "=== Testing cluster data reference into inactive L2 table ==="
@@ -124,13 +124,13 @@ $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
"\x80\x00\x00\x00\x00\x04\x00\x00"
_check_test_img
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img -r all
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Check data
$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index d3a6b388b5..ab98def6d4 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -48,9 +48,9 @@ echo "=== Testing version downgrade with zero expansion ==="
echo
IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
_check_test_img
@@ -59,9 +59,9 @@ echo "=== Testing dirty version downgrade ==="
echo
IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
_check_test_img
@@ -69,11 +69,11 @@ echo
echo "=== Testing version downgrade with unknown compat/autoclear flags ==="
echo
IMGOPTS="compat=1.1" _make_test_img 64M
-./qcow2.py "$TEST_IMG" set-feature-bit compatible 42
-./qcow2.py "$TEST_IMG" set-feature-bit autoclear 42
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" set-feature-bit compatible 42
+$PYTHON qcow2.py "$TEST_IMG" set-feature-bit autoclear 42
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
_check_test_img
echo
@@ -81,9 +81,9 @@ echo "=== Testing version upgrade and resize ==="
echo
IMGOPTS="compat=0.10" _make_test_img 64M
$QEMU_IO -c "write -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IMG amend -o "compat=1.1,lazy_refcounts=on,size=128M" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IO -c "read -P 0x2a 42M 64k" "$TEST_IMG" | _filter_qemu_io
_check_test_img
@@ -92,9 +92,9 @@ echo "=== Testing dirty lazy_refcounts=off ==="
echo
IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" | _filter_qemu_io
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IMG amend -o "lazy_refcounts=off" "$TEST_IMG"
-./qcow2.py "$TEST_IMG" dump-header
+$PYTHON qcow2.py "$TEST_IMG" dump-header
$QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
_check_test_img
diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065
index ab5445f62d..e89b61d70b 100755
--- a/tests/qemu-iotests/065
+++ b/tests/qemu-iotests/065
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
#
# Test for additional information emitted by qemu-img info on qcow2
# images
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index b512d00cc8..7ae4be2053 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -134,15 +134,28 @@ run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
EOF
echo
+echo "== using quorum rewrite corrupted mode =="
+
+quorum="$quorum,file.rewrite-corrupted=on"
+
+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
+
+echo
+echo "== checking that quorum has corrected the corrupted file =="
+
+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
+echo
echo "== breaking quorum =="
$QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
echo
echo "== checking that quorum is broken =="
$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
-
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 2241cec148..073515ea50 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
+== using quorum rewrite corrupted mode ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking that quorum has corrected the corrupted file ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
== breaking quorum ==
wrote 10485760/10485760 bytes at offset 0
10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== checking that quorum is broken ==
qemu-io: can't open: Could not read image for determining its format: Input/output error
diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083
index f764534782..991a9d91db 100755
--- a/tests/qemu-iotests/083
+++ b/tests/qemu-iotests/083
@@ -44,7 +44,7 @@ choose_tcp_port() {
wait_for_tcp_port() {
while ! (netstat --tcp --listening --numeric | \
- grep "$1.*0.0.0.0:\*.*LISTEN") 2>&1 >/dev/null; do
+ grep "$1.*0\\.0\\.0\\.0:\\*.*LISTEN") 2>&1 >/dev/null; do
sleep 0.1
done
}
@@ -55,8 +55,8 @@ filter_nbd() {
# callbacks sometimes, making them unreliable.
#
# Filter out the TCP port number since this changes between runs.
- sed -e 's#^nbd.c:.*##g' \
- -e 's#nbd:127.0.0.1:[^:]*:#nbd:127.0.0.1:PORT:#g'
+ sed -e 's#^.*nbd\.c:.*##g' \
+ -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g'
}
check_disconnect() {
@@ -81,8 +81,8 @@ EOF
nbd_url="nbd:127.0.0.1:$port:exportname=foo"
fi
- ./nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
- wait_for_tcp_port "127.0.0.1:$port"
+ $PYTHON nbd-fault-injector.py $extra_args "127.0.0.1:$port" "$TEST_DIR/nbd-fault-injector.conf" 2>&1 >/dev/null &
+ wait_for_tcp_port "127\\.0\\.0\\.1:$port"
$QEMU_IO -c "read 0 512" "$nbd_url" 2>&1 | _filter_qemu_io | filter_nbd
echo
diff --git a/tests/qemu-iotests/095 b/tests/qemu-iotests/095
new file mode 100755
index 0000000000..acc7dbf182
--- /dev/null
+++ b/tests/qemu-iotests/095
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Test for commit of larger active layer
+#
+# This tests live snapshots of images on a running QEMU instance, using
+# QMP commands. Both single disk snapshots, and transactional group
+# snapshots are performed.
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_qemu
+ rm -f "${TEST_IMG}.base" "${TEST_IMG}.snp1"
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+size_smaller=5M
+size_larger=100M
+
+_make_test_img $size_smaller
+mv "${TEST_IMG}" "${TEST_IMG}.base"
+
+_make_test_img -b "${TEST_IMG}.base" $size_larger
+mv "${TEST_IMG}" "${TEST_IMG}.snp1"
+
+_make_test_img -b "${TEST_IMG}.snp1" $size_larger
+
+echo
+echo "=== Base image info before commit and resize ==="
+$QEMU_IMG info "${TEST_IMG}.base" | _filter_testdir
+
+echo
+echo === Running QEMU Live Commit Test ===
+echo
+
+qemu_comm_method="qmp"
+_launch_qemu -drive file="${TEST_IMG}",if=virtio,id=test
+h=$QEMU_HANDLE
+
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" "return"
+
+_send_qemu_cmd $h "{ 'execute': 'block-commit',
+ 'arguments': { 'device': 'test',
+ 'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED"
+
+echo
+echo "=== Base image info after commit and resize ==="
+$QEMU_IMG info "${TEST_IMG}.base" | _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/095.out b/tests/qemu-iotests/095.out
new file mode 100644
index 0000000000..5864ddac2b
--- /dev/null
+++ b/tests/qemu-iotests/095.out
@@ -0,0 +1,31 @@
+QA output created by 095
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=5242880
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=104857600 backing_file='TEST_DIR/t.IMGFMT.base'
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=104857600 backing_file='TEST_DIR/t.IMGFMT.snp1'
+
+=== Base image info before commit and resize ===
+image: TEST_DIR/t.qcow2.base
+file format: qcow2
+virtual size: 5.0M (5242880 bytes)
+disk size: 196K
+cluster_size: 65536
+Format specific information:
+ compat: 1.1
+ lazy refcounts: false
+
+=== Running QEMU Live Commit Test ===
+
+{"return": {}}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "test", "len": 104857600, "offset": 104857600, "speed": 0, "type": "commit"}}
+
+=== Base image info after commit and resize ===
+image: TEST_DIR/t.qcow2.base
+file format: qcow2
+virtual size: 100M (104857600 bytes)
+disk size: 196K
+cluster_size: 65536
+Format specific information:
+ compat: 1.1
+ lazy refcounts: false
+*** done
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index e2ed5a95f8..8ca40116d7 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -34,22 +34,95 @@ timestamp=${TIMESTAMP:=false}
# generic initialization
iam=check
+_init_error()
+{
+ echo "$iam: $1" >&2
+ exit 1
+}
+
+if [ -L "$0" ]
+then
+ # called from the build tree
+ source_iotests=$(dirname "$(readlink "$0")")
+ if [ -z "$source_iotests" ]
+ then
+ _init_error "failed to obtain source tree name from check symlink"
+ fi
+ source_iotests=$(cd "$source_iotests"; pwd) || _init_error "failed to enter source tree"
+ build_iotests=$PWD
+else
+ # called from the source tree
+ source_iotests=$PWD
+ # this may be an in-tree build (note that in the following code we may not
+ # assume that it truly is and have to test whether the build results
+ # actually exist)
+ build_iotests=$PWD
+fi
+
+build_root="$build_iotests/../.."
+
+if [ -x "$build_iotests/socket_scm_helper" ]
+then
+ export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper"
+fi
+
+# if ./qemu exists, it should be prioritized and will be chosen by common.config
+if [[ -z "$QEMU_PROG" && ! -x './qemu' ]]
+then
+ arch=$(uname -m 2> /dev/null)
+
+ if [[ -n $arch && -x "$build_root/$arch-softmmu/qemu-system-$arch" ]]
+ then
+ export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch"
+ else
+ pushd "$build_root" > /dev/null
+ for binary in *-softmmu/qemu-system-*
+ do
+ if [ -x "$binary" ]
+ then
+ export QEMU_PROG="$build_root/$binary"
+ break
+ fi
+ done
+ popd > /dev/null
+ fi
+fi
+
+if [[ -z $QEMU_IMG_PROG && -x "$build_root/qemu-img" && ! -x './qemu-img' ]]
+then
+ export QEMU_IMG_PROG="$build_root/qemu-img"
+fi
+
+if [[ -z $QEMU_IO_PROG && -x "$build_root/qemu-io" && ! -x './qemu-io' ]]
+then
+ export QEMU_IO_PROG="$build_root/qemu-io"
+fi
+
+if [[ -z $QEMU_NBD_PROG && -x "$build_root/qemu-nbd" && ! -x './qemu-nbd' ]]
+then
+ export QEMU_NBD_PROG="$build_root/qemu-nbd"
+fi
+
+# we need common.env
+if ! . "$build_iotests/common.env"
+then
+ _init_error "failed to source common.env (make sure the qemu-iotests are run from tests/qemu-iotests in the build tree)"
+fi
+
# we need common.config
-if ! . ./common.config
+if ! . "$source_iotests/common.config"
then
- echo "$iam: failed to source common.config"
- exit 1
+ _init_error "failed to source common.config"
fi
# we need common.rc
-if ! . ./common.rc
+if ! . "$source_iotests/common.rc"
then
- echo "check: failed to source common.rc"
- exit 1
+ _init_error "failed to source common.rc"
fi
# we need common
-. ./common
+. "$source_iotests/common"
#if [ `id -u` -ne 0 ]
#then
@@ -194,7 +267,7 @@ do
echo " - expunged"
rm -f $seq.out.bad
echo "/^$seq\$/d" >>$tmp.expunged
- elif [ ! -f $seq ]
+ elif [ ! -f "$source_iotests/$seq" ]
then
echo " - no such test?"
echo "/^$seq\$/d" >>$tmp.expunged
@@ -215,9 +288,16 @@ do
start=`_wallclock`
$timestamp && echo -n " ["`date "+%T"`"]"
- [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
+
+ if [ "$(head -n 1 "$source_iotests/$seq")" == "#!/usr/bin/env python" ]; then
+ run_command="$PYTHON $seq"
+ else
+ run_command="./$seq"
+ fi
+ export OUTPUT_DIR=$PWD
+ (cd "$source_iotests";
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
- ./$seq >$tmp.out 2>&1
+ $run_command >$tmp.out 2>&1)
sts=$?
$timestamp && _timestamp
stop=`_wallclock`
@@ -242,17 +322,17 @@ do
err=true
fi
- reference=$seq.out
+ reference="$source_iotests/$seq.out"
if [ "$CACHEMODE" = "none" ]; then
- [ -f $seq.out.nocache ] && reference=$seq.out.nocache
+ [ -f "$source_iotests/$seq.out.nocache" ] && reference="$source_iotests/$seq.out.nocache"
fi
- if [ ! -f $reference ]
+ if [ ! -f "$reference" ]
then
echo " - no qualified output"
err=true
else
- if diff -w $reference $tmp.out >/dev/null 2>&1
+ if diff -w "$reference" $tmp.out >/dev/null 2>&1
then
echo ""
if $err
@@ -264,7 +344,7 @@ do
else
echo " - output mismatch (see $seq.out.bad)"
mv $tmp.out $seq.out.bad
- $diff -w $reference $seq.out.bad
+ $diff -w "$reference" $seq.out.bad
err=true
fi
fi
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 0aaf84d015..e4083f4212 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -25,8 +25,7 @@ _setenvironment()
export MSGVERB
}
-here=`pwd`
-rm -f $here/$iam.out
+rm -f "$OUTPUT_DIR/$iam.out"
_setenvironment
check=${check-true}
@@ -59,7 +58,7 @@ do
if $group
then
# arg after -g
- group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+ group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
s/ .*//p
}'`
if [ -z "$group_list" ]
@@ -84,7 +83,7 @@ s/ .*//p
then
# arg after -x
[ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null
- group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+ group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
s/ .*//p
}'`
if [ -z "$group_list" ]
@@ -366,7 +365,7 @@ testlist options
BEGIN { for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \
| while read id
do
- if grep -s "^$id " group >/dev/null
+ if grep -s "^$id " "$source_iotests/group" >/dev/null
then
# in group file ... OK
echo $id >>$tmp.list
@@ -402,7 +401,7 @@ else
touch $tmp.list
else
# no test numbers, do everything from group file
- sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <group >$tmp.list
+ sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <"$source_iotests/group" >$tmp.list
fi
fi
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index d90a8bca8b..bd6790be63 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -126,7 +126,7 @@ fi
export TEST_DIR
if [ -z "$SAMPLE_IMG_DIR" ]; then
- SAMPLE_IMG_DIR=`pwd`/sample_images
+ SAMPLE_IMG_DIR="$source_iotests/sample_images"
fi
if [ ! -d "$SAMPLE_IMG_DIR" ]; then
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 195c5646aa..e0ea7e3a7d 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -318,9 +318,9 @@ _do()
status=1; exit
fi
- (eval "echo '---' \"$_cmd\"") >>$here/$seq.full
+ (eval "echo '---' \"$_cmd\"") >>"$OUTPUT_DIR/$seq.full"
(eval "$_cmd") >$tmp._out 2>&1; ret=$?
- cat $tmp._out >>$here/$seq.full
+ cat $tmp._out >>"$OUTPUT_DIR/$seq.full"
if [ $# -eq 2 ]; then
if [ $ret -eq 0 ]; then
echo "done"
@@ -344,7 +344,7 @@ _do()
#
_notrun()
{
- echo "$*" >$seq.notrun
+ echo "$*" >"$OUTPUT_DIR/$seq.notrun"
echo "$seq not run: $*"
status=0
exit
@@ -354,7 +354,7 @@ _notrun()
#
_fail()
{
- echo "$*" | tee -a $here/$seq.full
+ echo "$*" | tee -a "$OUTPUT_DIR/$seq.full"
echo "(see $seq.full for details)"
status=1
exit 1
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 0f074403ae..e3dc4e8754 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -99,3 +99,4 @@
090 rw auto quick
091 rw auto
092 rw auto quick
+095 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index f6c437c0c3..39a4cfcf4d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -37,6 +37,7 @@ qemu_args = os.environ.get('QEMU', 'qemu').strip().split(' ')
imgfmt = os.environ.get('IMGFMT', 'raw')
imgproto = os.environ.get('IMGPROTO', 'file')
test_dir = os.environ.get('TEST_DIR', '/var/tmp')
+output_dir = os.environ.get('OUTPUT_DIR', '.')
cachemode = os.environ.get('CACHEMODE')
socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
@@ -278,7 +279,7 @@ def notrun(reason):
# Each test in qemu-iotests has a number ("seq")
seq = os.path.basename(sys.argv[0])
- open('%s.notrun' % seq, 'wb').write(reason + '\n')
+ open('%s/%s.notrun' % (output_dir, seq), 'wb').write(reason + '\n')
print '%s not run: %s' % (seq, reason)
sys.exit(0)
diff --git a/trace-events b/trace-events
index ba01ad52cf..d071b97dd7 100644
--- a/trace-events
+++ b/trace-events
@@ -1169,12 +1169,13 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
qxl_render_update_area_done(void *cookie) "%p"
# hw/ppc/spapr_pci.c
-spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
+spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)"
spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
-spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
+spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u"
spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u"
# hw/intc/xics.c
xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x"
@@ -1188,6 +1189,12 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+xics_alloc(int src, int irq) "source#%d, irq %d"
+xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use"
+xics_alloc_failed_no_left(int src) "source#%d, no irq left"
+xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
+xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs"
+xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
# hw/ppc/spapr.c
spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes"
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 43de3add29..6dc27ce04f 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -1111,6 +1111,7 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst,
size_t num_opts, num_dst_opts;
QemuOptDesc *desc;
bool need_init = false;
+ bool need_head_update;
if (!list) {
return dst;
@@ -1121,6 +1122,12 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst,
*/
if (!dst) {
need_init = true;
+ need_head_update = true;
+ } else {
+ /* Moreover, even if dst is not NULL, the realloc may move it to a
+ * different address in which case we may get a stale tail pointer
+ * in dst->head. */
+ need_head_update = QTAILQ_EMPTY(&dst->head);
}
num_opts = count_opts_list(dst);
@@ -1131,9 +1138,11 @@ QemuOptsList *qemu_opts_append(QemuOptsList *dst,
if (need_init) {
dst->name = NULL;
dst->implied_opt_name = NULL;
- QTAILQ_INIT(&dst->head);
dst->merge_lists = false;
}
+ if (need_head_update) {
+ QTAILQ_INIT(&dst->head);
+ }
dst->desc[num_dst_opts].name = NULL;
/* append list->desc to dst->desc */
diff --git a/vmstate.c b/vmstate.c
index c9965205df..ef2f87bdad 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -43,11 +43,18 @@ static int vmstate_size(void *opaque, VMStateField *field)
return size;
}
-static void *vmstate_base_addr(void *opaque, VMStateField *field)
+static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
{
void *base_addr = opaque + field->offset;
if (field->flags & VMS_POINTER) {
+ if (alloc && (field->flags & VMS_ALLOC)) {
+ int n_elems = vmstate_n_elems(opaque, field);
+ if (n_elems) {
+ gsize size = n_elems * field->size;
+ *((void **)base_addr + field->start) = g_malloc(size);
+ }
+ }
base_addr = *(void **)base_addr + field->start;
}
@@ -81,7 +88,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
field->field_exists(opaque, version_id)) ||
(!field->field_exists &&
field->version_id <= version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field);
+ void *base_addr = vmstate_base_addr(opaque, field, true);
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
@@ -135,7 +142,7 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
while (field->name) {
if (!field->field_exists ||
field->field_exists(opaque, vmsd->version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field);
+ void *base_addr = vmstate_base_addr(opaque, field, false);
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);