diff options
168 files changed, 6225 insertions, 1360 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index fb5324285c..adc59735a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -610,6 +610,7 @@ F: hw/*/*vhost* virtio M: Anthony Liguori <aliguori@amazon.com> +M: Michael S. Tsirkin <mst@redhat.com> S: Supported F: hw/*/virtio* @@ -290,7 +290,7 @@ common de-ch es fo fr-ca hu ja mk nl-be pt sl tr \ bepo cz ifdef INSTALL_BLOBS -BLOBS=bios.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \ +BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \ vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \ acpi-dsdt.aml q35-acpi-dsdt.aml \ ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin \ diff --git a/Makefile.objs b/Makefile.objs index 857bb53ae4..ac1d0e1c28 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -43,7 +43,6 @@ libcacard-y += libcacard/vcardt.o ifeq ($(CONFIG_SOFTMMU),y) common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/ common-obj-y += net/ -common-obj-y += readline.o common-obj-y += qdev-monitor.o device-hotplug.o common-obj-$(CONFIG_WIN32) += os-win32.o common-obj-$(CONFIG_POSIX) += os-posix.o @@ -32,6 +32,7 @@ #include "sysemu/sysemu.h" #include "qemu/notify.h" #include "block/coroutine.h" +#include "block/qapi.h" #include "qmp-commands.h" #include "qemu/timer.h" @@ -69,11 +70,11 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, +static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, +static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, int64_t sector_num, @@ -90,6 +91,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); +static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = + QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); + static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); @@ -188,7 +192,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs) * @is_write: is the IO a write */ static void bdrv_io_limits_intercept(BlockDriverState *bs, - int nb_sectors, + unsigned int bytes, bool is_write) { /* does this io must wait */ @@ -201,9 +205,8 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs, } /* the IO will be executed, do the accounting */ - throttle_account(&bs->throttle_state, - is_write, - nb_sectors * BDRV_SECTOR_SIZE); + throttle_account(&bs->throttle_state, is_write, bytes); + /* if the next request must wait -> do nothing */ if (throttle_schedule_timer(&bs->throttle_state, is_write)) { @@ -214,6 +217,16 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs, qemu_co_queue_next(&bs->throttled_reqs[is_write]); } +size_t bdrv_opt_mem_align(BlockDriverState *bs) +{ + if (!bs || !bs->drv) { + /* 4k should be on the safe side */ + return 4096; + } + + return bs->bl.opt_mem_alignment; +} + /* check if the path starts with "<protocol>:" */ static int path_has_protocol(const char *path) { @@ -327,7 +340,7 @@ BlockDriverState *bdrv_new(const char *device_name) QLIST_INIT(&bs->dirty_bitmaps); pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); if (device_name[0] != '\0') { - QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); + QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); } bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); @@ -479,6 +492,43 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options, return ret; } +int bdrv_refresh_limits(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + + memset(&bs->bl, 0, sizeof(bs->bl)); + + if (!drv) { + return 0; + } + + /* Take some limits from the children as a default */ + if (bs->file) { + bdrv_refresh_limits(bs->file); + bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; + bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; + } else { + bs->bl.opt_mem_alignment = 512; + } + + if (bs->backing_hd) { + bdrv_refresh_limits(bs->backing_hd); + bs->bl.opt_transfer_length = + MAX(bs->bl.opt_transfer_length, + bs->backing_hd->bl.opt_transfer_length); + bs->bl.opt_mem_alignment = + MAX(bs->bl.opt_mem_alignment, + bs->backing_hd->bl.opt_mem_alignment); + } + + /* Then let the driver override it */ + if (drv->bdrv_refresh_limits) { + return drv->bdrv_refresh_limits(bs); + } + + return 0; +} + /* * Create a uniquely-named empty temporary file. * Return 0 upon success, otherwise a negative errno value. @@ -732,6 +782,33 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) return open_flags; } +static int bdrv_assign_node_name(BlockDriverState *bs, + const char *node_name, + Error **errp) +{ + if (!node_name) { + return 0; + } + + /* empty string node name is invalid */ + if (node_name[0] == '\0') { + error_setg(errp, "Empty node name"); + return -EINVAL; + } + + /* takes care of avoiding duplicates node names */ + if (bdrv_find_node(node_name)) { + error_setg(errp, "Duplicate node name"); + return -EINVAL; + } + + /* copy node name into the bs and insert it into the graph list */ + pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); + QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); + + return 0; +} + /* * Common part for opening disk images and files * @@ -742,6 +819,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, { int ret, open_flags; const char *filename; + const char *node_name = NULL; Error *local_err = NULL; assert(drv != NULL); @@ -756,6 +834,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); + node_name = qdict_get_try_str(options, "node-name"); + ret = bdrv_assign_node_name(bs, node_name, errp); + if (ret < 0) { + return ret; + } + qdict_del(options, "node-name"); + /* bdrv_open() with directly using a protocol as drv. This layer is already * opened, so assign it to bs (while file becomes a closed BlockDriverState) * and return immediately. */ @@ -765,7 +850,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, } bs->open_flags = flags; - bs->buffer_alignment = 512; + bs->guest_block_size = 512; + bs->request_alignment = 512; bs->zero_beyond_eof = true; open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); @@ -833,6 +919,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, goto free_and_fail; } + bdrv_refresh_limits(bs); + assert(bdrv_opt_mem_align(bs) != 0); + assert(bs->request_alignment != 0); + #ifndef _WIN32 if (bs->is_temporary) { assert(bs->filename[0] != '\0'); @@ -858,9 +948,10 @@ free_and_fail: * dictionary, it needs to use QINCREF() before calling bdrv_file_open. */ int bdrv_file_open(BlockDriverState **pbs, const char *filename, - QDict *options, int flags, Error **errp) + const char *reference, QDict *options, int flags, + Error **errp) { - BlockDriverState *bs; + BlockDriverState *bs = NULL; BlockDriver *drv; const char *drvname; bool allow_protocol_prefix = false; @@ -872,6 +963,24 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } + if (reference) { + if (filename || qdict_size(options)) { + error_setg(errp, "Cannot reference an existing block device with " + "additional options or a new filename"); + return -EINVAL; + } + QDECREF(options); + + bs = bdrv_find(reference); + if (!bs) { + error_setg(errp, "Cannot find block device '%s'", reference); + return -ENODEV; + } + bdrv_ref(bs); + *pbs = bs; + return 0; + } + bs = bdrv_new(""); bs->options = options; options = qdict_clone_shallow(options); @@ -929,14 +1038,19 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, goto fail; } - ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err); + if (!drv->bdrv_file_open) { + ret = bdrv_open(bs, filename, options, flags, drv, &local_err); + options = NULL; + } else { + ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err); + } if (ret < 0) { error_propagate(errp, local_err); goto fail; } /* Check if any unknown options were used */ - if (qdict_size(options) != 0) { + if (options && (qdict_size(options) != 0)) { const QDictEntry *entry = qdict_first(options); error_setg(errp, "Block protocol '%s' doesn't support the option '%s'", drv->format_name, entry->key); @@ -1016,12 +1130,92 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_free(local_err); return ret; } - pstrcpy(bs->backing_file, sizeof(bs->backing_file), - bs->backing_hd->file->filename); + + if (bs->backing_hd->file) { + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->backing_hd->file->filename); + } + + /* Recalculate the BlockLimits with the backing file */ + bdrv_refresh_limits(bs); + return 0; } /* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. + * + * If force_raw is true, bdrv_file_open() will be used, thereby preventing any + * image format auto-detection. If it is false and a filename is given, + * bdrv_open() will be used for auto-detection. + * + * If allow_none is true, no image will be opened if filename is false and no + * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * + * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. + * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict + * itself, all options starting with "${bdref_key}." are considered part of the + * BlockdevRef. + * + * The BlockdevRef will be removed from the options QDict. + */ +int bdrv_open_image(BlockDriverState **pbs, const char *filename, + QDict *options, const char *bdref_key, int flags, + bool force_raw, bool allow_none, Error **errp) +{ + QDict *image_options; + int ret; + char *bdref_key_dot; + const char *reference; + + bdref_key_dot = g_strdup_printf("%s.", bdref_key); + qdict_extract_subqdict(options, &image_options, bdref_key_dot); + g_free(bdref_key_dot); + + reference = qdict_get_try_str(options, bdref_key); + if (!filename && !reference && !qdict_size(image_options)) { + if (allow_none) { + ret = 0; + } else { + error_setg(errp, "A block device must be specified for \"%s\"", + bdref_key); + ret = -EINVAL; + } + goto done; + } + + if (filename && !force_raw) { + /* If a filename is given and the block driver should be detected + automatically (instead of using none), use bdrv_open() in order to do + that auto-detection. */ + BlockDriverState *bs; + + if (reference) { + error_setg(errp, "Cannot reference an existing block device while " + "giving a filename"); + ret = -EINVAL; + goto done; + } + + bs = bdrv_new(""); + ret = bdrv_open(bs, filename, image_options, flags, NULL, errp); + if (ret < 0) { + bdrv_unref(bs); + } else { + *pbs = bs; + } + } else { + ret = bdrv_file_open(pbs, filename, reference, image_options, flags, + errp); + } + +done: + qdict_del(options, bdref_key); + return ret; +} + +/* * Opens a disk image (raw, qcow2, vmdk, ...) * * options is a QDict of options to pass to the block drivers, or NULL for an @@ -1036,7 +1230,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char tmp_filename[PATH_MAX + 1]; BlockDriverState *file = NULL; - QDict *file_options = NULL; const char *drvname; Error *local_err = NULL; @@ -1122,10 +1315,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, flags |= BDRV_O_ALLOW_RDWR; } - qdict_extract_subqdict(options, &file_options, "file."); - - ret = bdrv_file_open(&file, filename, file_options, - bdrv_open_flags(bs, flags | BDRV_O_UNMAP), &local_err); + ret = bdrv_open_image(&file, filename, options, "file", + bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true, + &local_err); if (ret < 0) { goto fail; } @@ -1143,7 +1335,13 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, } if (!drv) { - ret = find_image_format(file, filename, &drv, &local_err); + if (file) { + ret = find_image_format(file, filename, &drv, &local_err); + } else { + error_setg(errp, "Must specify either driver or file"); + ret = -EINVAL; + goto unlink_and_fail; + } } if (!drv) { @@ -1156,7 +1354,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, goto unlink_and_fail; } - if (bs->file != file) { + if (file && (bs->file != file)) { bdrv_unref(file); file = NULL; } @@ -1427,6 +1625,8 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) reopen_state->bs->enable_write_cache = !!(reopen_state->flags & BDRV_O_CACHE_WB); reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); + + bdrv_refresh_limits(reopen_state->bs); } /* @@ -1501,7 +1701,7 @@ void bdrv_close_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bdrv_close(bs); } } @@ -1530,7 +1730,7 @@ static bool bdrv_requests_pending(BlockDriverState *bs) static bool bdrv_requests_pending_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { if (bdrv_requests_pending(bs)) { return true; } @@ -1557,7 +1757,7 @@ void bdrv_drain_all(void) BlockDriverState *bs; while (busy) { - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bdrv_start_throttled_reqs(bs); } @@ -1566,14 +1766,19 @@ void bdrv_drain_all(void) } } -/* make a BlockDriverState anonymous by removing from bdrv_state list. +/* make a BlockDriverState anonymous by removing from bdrv_state and + * graph_bdrv_state list. Also, NULL terminate the device_name to prevent double remove */ void bdrv_make_anon(BlockDriverState *bs) { if (bs->device_name[0] != '\0') { - QTAILQ_REMOVE(&bdrv_states, bs, list); + QTAILQ_REMOVE(&bdrv_states, bs, device_list); } bs->device_name[0] = '\0'; + if (bs->node_name[0] != '\0') { + QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); + } + bs->node_name[0] = '\0'; } static void bdrv_rebind(BlockDriverState *bs) @@ -1593,7 +1798,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->dev_ops = bs_src->dev_ops; bs_dest->dev_opaque = bs_src->dev_opaque; bs_dest->dev = bs_src->dev; - bs_dest->buffer_alignment = bs_src->buffer_alignment; + bs_dest->guest_block_size = bs_src->guest_block_size; bs_dest->copy_on_read = bs_src->copy_on_read; bs_dest->enable_write_cache = bs_src->enable_write_cache; @@ -1627,7 +1832,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* keep the same entry in bdrv_states */ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name), bs_src->device_name); - bs_dest->list = bs_src->list; + bs_dest->device_list = bs_src->device_list; + + /* keep the same entry in graph_bdrv_states + * We do want to swap name but don't want to swap linked list entries + */ + bs_dest->node_list = bs_src->node_list; } /* @@ -1745,7 +1955,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev) bs->dev = NULL; bs->dev_ops = NULL; bs->dev_opaque = NULL; - bs->buffer_alignment = 512; + bs->guest_block_size = 512; } /* TODO change to return DeviceState * when all users are qdevified */ @@ -1876,10 +2086,10 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) int bdrv_commit(BlockDriverState *bs) { BlockDriver *drv = bs->drv; - int64_t sector, total_sectors; + int64_t sector, total_sectors, length, backing_length; int n, ro, open_flags; int ret = 0; - uint8_t *buf; + uint8_t *buf = NULL; char filename[PATH_MAX]; if (!drv) @@ -1904,7 +2114,29 @@ int bdrv_commit(BlockDriverState *bs) } } - total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; + length = bdrv_getlength(bs); + if (length < 0) { + ret = length; + goto ro_cleanup; + } + + backing_length = bdrv_getlength(bs->backing_hd); + if (backing_length < 0) { + ret = backing_length; + goto ro_cleanup; + } + + /* If our top snapshot is larger than the backing file image, + * grow the backing file image if possible. If not possible, + * we must return an error */ + if (length > backing_length) { + ret = bdrv_truncate(bs->backing_hd, length); + if (ret < 0) { + goto ro_cleanup; + } + } + + total_sectors = length >> BDRV_SECTOR_BITS; buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); for (sector = 0; sector < total_sectors; sector += n) { @@ -1913,13 +2145,13 @@ int bdrv_commit(BlockDriverState *bs) goto ro_cleanup; } if (ret) { - if (bdrv_read(bs, sector, buf, n) != 0) { - ret = -EIO; + ret = bdrv_read(bs, sector, buf, n); + if (ret < 0) { goto ro_cleanup; } - if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { - ret = -EIO; + ret = bdrv_write(bs->backing_hd, sector, buf, n); + if (ret < 0) { goto ro_cleanup; } } @@ -1927,6 +2159,9 @@ int bdrv_commit(BlockDriverState *bs) if (drv->bdrv_make_empty) { ret = drv->bdrv_make_empty(bs); + if (ret < 0) { + goto ro_cleanup; + } bdrv_flush(bs); } @@ -1934,9 +2169,11 @@ int bdrv_commit(BlockDriverState *bs) * Make sure all data we wrote to the backing device is actually * stable on disk. */ - if (bs->backing_hd) + if (bs->backing_hd) { bdrv_flush(bs->backing_hd); + } + ret = 0; ro_cleanup: g_free(buf); @@ -1952,7 +2189,7 @@ int bdrv_commit_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { if (bs->drv && bs->backing_hd) { int ret = bdrv_commit(bs); if (ret < 0) { @@ -1970,6 +2207,10 @@ int bdrv_commit_all(void) */ static void tracked_request_end(BdrvTrackedRequest *req) { + if (req->serialising) { + req->bs->serialising_in_flight--; + } + QLIST_REMOVE(req, list); qemu_co_queue_restart_all(&req->wait_queue); } @@ -1979,15 +2220,18 @@ static void tracked_request_end(BdrvTrackedRequest *req) */ static void tracked_request_begin(BdrvTrackedRequest *req, BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, bool is_write) + int64_t offset, + unsigned int bytes, bool is_write) { *req = (BdrvTrackedRequest){ .bs = bs, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .is_write = is_write, - .co = qemu_coroutine_self(), + .offset = offset, + .bytes = bytes, + .is_write = is_write, + .co = qemu_coroutine_self(), + .serialising = false, + .overlap_offset = offset, + .overlap_bytes = bytes, }; qemu_co_queue_init(&req->wait_queue); @@ -1995,6 +2239,21 @@ static void tracked_request_begin(BdrvTrackedRequest *req, QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); } +static void mark_request_serialising(BdrvTrackedRequest *req, size_t align) +{ + int64_t overlap_offset = req->offset & ~(align - 1); + int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) + - overlap_offset; + + if (!req->serialising) { + req->bs->serialising_in_flight++; + req->serialising = true; + } + + req->overlap_offset = MIN(req->overlap_offset, overlap_offset); + req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); +} + /** * Round a region to cluster boundaries */ @@ -2016,53 +2275,75 @@ void bdrv_round_to_clusters(BlockDriverState *bs, } } +static int bdrv_get_cluster_size(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + int ret; + + ret = bdrv_get_info(bs, &bdi); + if (ret < 0 || bdi.cluster_size == 0) { + return bs->request_alignment; + } else { + return bdi.cluster_size; + } +} + static bool tracked_request_overlaps(BdrvTrackedRequest *req, - int64_t sector_num, int nb_sectors) { + int64_t offset, unsigned int bytes) +{ /* aaaa bbbb */ - if (sector_num >= req->sector_num + req->nb_sectors) { + if (offset >= req->overlap_offset + req->overlap_bytes) { return false; } /* bbbb aaaa */ - if (req->sector_num >= sector_num + nb_sectors) { + if (req->overlap_offset >= offset + bytes) { return false; } return true; } -static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) { + BlockDriverState *bs = self->bs; BdrvTrackedRequest *req; - int64_t cluster_sector_num; - int cluster_nb_sectors; bool retry; + bool waited = false; - /* If we touch the same cluster it counts as an overlap. This guarantees - * that allocating writes will be serialized and not race with each other - * for the same cluster. For example, in copy-on-read it ensures that the - * CoR read and write operations are atomic and guest writes cannot - * interleave between them. - */ - bdrv_round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + if (!bs->serialising_in_flight) { + return false; + } do { retry = false; QLIST_FOREACH(req, &bs->tracked_requests, list) { - if (tracked_request_overlaps(req, cluster_sector_num, - cluster_nb_sectors)) { + if (req == self || (!req->serialising && !self->serialising)) { + continue; + } + if (tracked_request_overlaps(req, self->overlap_offset, + self->overlap_bytes)) + { /* Hitting this means there was a reentrant request, for * example, a block driver issuing nested requests. This must * never happen since it means deadlock. */ assert(qemu_coroutine_self() != req->co); - qemu_co_queue_wait(&req->wait_queue); - retry = true; - break; + /* If the request is already (indirectly) waiting for us, or + * will wait for us as soon as it wakes up, then just go on + * (instead of producing a deadlock in the former case). */ + if (!req->waiting_for) { + self->waiting_for = req; + qemu_co_queue_wait(&req->wait_queue); + self->waiting_for = NULL; + retry = true; + waited = true; + break; + } } } } while (retry); + + return waited; } /* @@ -2224,6 +2505,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, } new_top_bs->backing_hd = base_bs; + bdrv_refresh_limits(new_top_bs); QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { /* so that bdrv_close() does not recursively close the chain */ @@ -2271,8 +2553,7 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, typedef struct RwCo { BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; + int64_t offset; QEMUIOVector *qiov; bool is_write; int ret; @@ -2284,34 +2565,32 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) RwCo *rwco = opaque; if (!rwco->is_write) { - rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, - rwco->flags); - } else { - rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, + rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, + rwco->qiov->size, rwco->qiov, rwco->flags); + } else { + rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, + rwco->qiov->size, rwco->qiov, + rwco->flags); } } /* * Process a vectored synchronous request using coroutines */ -static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, bool is_write, - BdrvRequestFlags flags) +static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, + QEMUIOVector *qiov, bool is_write, + BdrvRequestFlags flags) { Coroutine *co; RwCo rwco = { .bs = bs, - .sector_num = sector_num, - .nb_sectors = qiov->size >> BDRV_SECTOR_BITS, + .offset = offset, .qiov = qiov, .is_write = is_write, .ret = NOT_DONE, .flags = flags, }; - assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0); /** * In sync call context, when the vcpu is blocked, this throttling timer @@ -2350,7 +2629,8 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, }; qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags); + return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, + &qiov, is_write, flags); } /* return < 0 if error. See bdrv_write() for the return codes */ @@ -2386,11 +2666,6 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); } -int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) -{ - return bdrv_rwv_co(bs, sector_num, qiov, true, 0); -} - int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { @@ -2440,117 +2715,53 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) } } -int bdrv_pread(BlockDriverState *bs, int64_t offset, - void *buf, int count1) +int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) { - uint8_t tmp_buf[BDRV_SECTOR_SIZE]; - int len, nb_sectors, count; - int64_t sector_num; + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = bytes, + }; int ret; - count = count1; - /* first read to align to sector start */ - len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); - if (len > count) - len = count; - sector_num = offset >> BDRV_SECTOR_BITS; - if (len > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); - count -= len; - if (count == 0) - return count1; - sector_num++; - buf += len; - } - - /* read the sectors "in place" */ - nb_sectors = count >> BDRV_SECTOR_BITS; - if (nb_sectors > 0) { - if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) - return ret; - sector_num += nb_sectors; - len = nb_sectors << BDRV_SECTOR_BITS; - buf += len; - count -= len; + if (bytes < 0) { + return -EINVAL; } - /* add data from the last sector */ - if (count > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - memcpy(buf, tmp_buf, count); + qemu_iovec_init_external(&qiov, &iov, 1); + ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); + if (ret < 0) { + return ret; } - return count1; + + return bytes; } int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) { - uint8_t tmp_buf[BDRV_SECTOR_SIZE]; - int len, nb_sectors, count; - int64_t sector_num; int ret; - count = qiov->size; - - /* first write to align to sector start */ - len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); - if (len > count) - len = count; - sector_num = offset >> BDRV_SECTOR_BITS; - if (len > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), - len); - if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - count -= len; - if (count == 0) - return qiov->size; - sector_num++; - } - - /* write the sectors "in place" */ - nb_sectors = count >> BDRV_SECTOR_BITS; - if (nb_sectors > 0) { - QEMUIOVector qiov_inplace; - - qemu_iovec_init(&qiov_inplace, qiov->niov); - qemu_iovec_concat(&qiov_inplace, qiov, len, - nb_sectors << BDRV_SECTOR_BITS); - ret = bdrv_writev(bs, sector_num, &qiov_inplace); - qemu_iovec_destroy(&qiov_inplace); - if (ret < 0) { - return ret; - } - - sector_num += nb_sectors; - len = nb_sectors << BDRV_SECTOR_BITS; - count -= len; + ret = bdrv_prwv_co(bs, offset, qiov, true, 0); + if (ret < 0) { + return ret; } - /* add data from the last sector */ - if (count > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count); - if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - } return qiov->size; } int bdrv_pwrite(BlockDriverState *bs, int64_t offset, - const void *buf, int count1) + const void *buf, int bytes) { QEMUIOVector qiov; struct iovec iov = { .iov_base = (void *) buf, - .iov_len = count1, + .iov_len = bytes, }; + if (bytes < 0) { + return -EINVAL; + } + qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_pwritev(bs, offset, &qiov); } @@ -2646,40 +2857,34 @@ err: } /* - * Handle a read request in coroutine context + * Forwards an already correctly aligned request to the BlockDriver. This + * handles copy on read and zeroing after EOF; any other features must be + * implemented by the caller. */ -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, + BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, + int64_t align, QEMUIOVector *qiov, int flags) { BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; int ret; - if (!drv) { - return -ENOMEDIUM; - } - if (bdrv_check_request(bs, sector_num, nb_sectors)) { - return -EIO; - } + int64_t sector_num = offset >> BDRV_SECTOR_BITS; + unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - if (bs->copy_on_read) { - flags |= BDRV_REQ_COPY_ON_READ; - } - if (flags & BDRV_REQ_COPY_ON_READ) { - bs->copy_on_read_in_flight++; - } + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - if (bs->copy_on_read_in_flight) { - wait_for_overlapping_requests(bs, sector_num, nb_sectors); - } - - /* throttling disk I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, nb_sectors, false); + /* Handle Copy on Read and associated serialisation */ + if (flags & BDRV_REQ_COPY_ON_READ) { + /* If we touch the same cluster it counts as an overlap. This + * guarantees that allocating writes will be serialized and not race + * with each other for the same cluster. For example, in copy-on-read + * it ensures that the CoR read and write operations are atomic and + * guest writes cannot interleave between them. */ + mark_request_serialising(req, bdrv_get_cluster_size(bs)); } - tracked_request_begin(&req, bs, sector_num, nb_sectors, false); + wait_serialising_requests(req); if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; @@ -2695,6 +2900,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } } + /* Forward the request to the BlockDriver */ if (!(bs->zero_beyond_eof && bs->growable)) { ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); } else { @@ -2708,7 +2914,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE); - max_nb_sectors = MAX(0, total_sectors - sector_num); + max_nb_sectors = MAX(0, ROUND_UP(total_sectors - sector_num, + align >> BDRV_SECTOR_BITS)); if (max_nb_sectors > 0) { ret = drv->bdrv_co_readv(bs, sector_num, MIN(nb_sectors, max_nb_sectors), qiov); @@ -2726,15 +2933,95 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } out: + return ret; +} + +/* + * Handle a read request in coroutine context + */ +static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; + + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint8_t *head_buf = NULL; + uint8_t *tail_buf = NULL; + QEMUIOVector local_qiov; + bool use_local_qiov = false; + int ret; + + if (!drv) { + return -ENOMEDIUM; + } + if (bdrv_check_byte_request(bs, offset, bytes)) { + return -EIO; + } + + if (bs->copy_on_read) { + flags |= BDRV_REQ_COPY_ON_READ; + } + + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, bytes, false); + } + + /* Align read if necessary by padding qiov */ + if (offset & (align - 1)) { + head_buf = qemu_blockalign(bs, align); + qemu_iovec_init(&local_qiov, qiov->niov + 2); + qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + + bytes += offset & (align - 1); + offset = offset & ~(align - 1); + } + + if ((offset + bytes) & (align - 1)) { + if (!use_local_qiov) { + qemu_iovec_init(&local_qiov, qiov->niov + 1); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + } + tail_buf = qemu_blockalign(bs, align); + qemu_iovec_add(&local_qiov, tail_buf, + align - ((offset + bytes) & (align - 1))); + + bytes = ROUND_UP(bytes, align); + } + + tracked_request_begin(&req, bs, offset, bytes, false); + ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, + use_local_qiov ? &local_qiov : qiov, + flags); tracked_request_end(&req); - if (flags & BDRV_REQ_COPY_ON_READ) { - bs->copy_on_read_in_flight--; + if (use_local_qiov) { + qemu_iovec_destroy(&local_qiov); + qemu_vfree(head_buf); + qemu_vfree(tail_buf); } return ret; } +static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) { + return -EINVAL; + } + + return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, flags); +} + int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -2828,46 +3115,37 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, } /* - * Handle a write request in coroutine context + * Forwards an already correctly aligned write request to the BlockDriver. */ -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, + BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, int flags) { BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; + bool waited; int ret; - if (!bs->drv) { - return -ENOMEDIUM; - } - if (bs->read_only) { - return -EACCES; - } - if (bdrv_check_request(bs, sector_num, nb_sectors)) { - return -EIO; - } + int64_t sector_num = offset >> BDRV_SECTOR_BITS; + unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - if (bs->copy_on_read_in_flight) { - wait_for_overlapping_requests(bs, sector_num, nb_sectors); - } - - /* throttling disk I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, nb_sectors, true); - } + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - tracked_request_begin(&req, bs, sector_num, nb_sectors, true); + waited = wait_serialising_requests(req); + assert(!waited || !req->serialising); - ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); + ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); if (ret < 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); } else { + BLKDBG_EVENT(bs, BLKDBG_PWRITEV); ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); if (ret == 0 && !bs->enable_write_cache) { ret = bdrv_co_flush(bs); @@ -2882,11 +3160,143 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); } + return ret; +} + +/* + * Handle a write request in coroutine context + */ +static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + BdrvTrackedRequest req; + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint8_t *head_buf = NULL; + uint8_t *tail_buf = NULL; + QEMUIOVector local_qiov; + bool use_local_qiov = false; + int ret; + + if (!bs->drv) { + return -ENOMEDIUM; + } + if (bs->read_only) { + return -EACCES; + } + if (bdrv_check_byte_request(bs, offset, bytes)) { + return -EIO; + } + + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, bytes, true); + } + + /* + * Align write if necessary by performing a read-modify-write cycle. + * Pad qiov with the read parts and be sure to have a tracked request not + * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. + */ + tracked_request_begin(&req, bs, offset, bytes, true); + + if (offset & (align - 1)) { + QEMUIOVector head_qiov; + struct iovec head_iov; + + mark_request_serialising(&req, align); + wait_serialising_requests(&req); + + head_buf = qemu_blockalign(bs, align); + head_iov = (struct iovec) { + .iov_base = head_buf, + .iov_len = align, + }; + qemu_iovec_init_external(&head_qiov, &head_iov, 1); + + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); + ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, + align, &head_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + + qemu_iovec_init(&local_qiov, qiov->niov + 2); + qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + + bytes += offset & (align - 1); + offset = offset & ~(align - 1); + } + + if ((offset + bytes) & (align - 1)) { + QEMUIOVector tail_qiov; + struct iovec tail_iov; + size_t tail_bytes; + bool waited; + + mark_request_serialising(&req, align); + waited = wait_serialising_requests(&req); + assert(!waited || !use_local_qiov); + + tail_buf = qemu_blockalign(bs, align); + tail_iov = (struct iovec) { + .iov_base = tail_buf, + .iov_len = align, + }; + qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); + + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, + align, &tail_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + + if (!use_local_qiov) { + qemu_iovec_init(&local_qiov, qiov->niov + 1); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + } + + tail_bytes = (offset + bytes) & (align - 1); + qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); + + bytes = ROUND_UP(bytes, align); + } + + ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, + use_local_qiov ? &local_qiov : qiov, + flags); + +fail: tracked_request_end(&req); + if (use_local_qiov) { + qemu_iovec_destroy(&local_qiov); + qemu_vfree(head_buf); + qemu_vfree(tail_buf); + } + return ret; } +static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) { + return -EINVAL; + } + + return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, flags); +} + int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -3110,11 +3520,12 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), } } +/* This function is to find block backend bs */ BlockDriverState *bdrv_find(const char *name) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { if (!strcmp(name, bs->device_name)) { return bs; } @@ -3122,19 +3533,83 @@ BlockDriverState *bdrv_find(const char *name) return NULL; } +/* This function is to find a node in the bs graph */ +BlockDriverState *bdrv_find_node(const char *node_name) +{ + BlockDriverState *bs; + + assert(node_name); + + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + if (!strcmp(node_name, bs->node_name)) { + return bs; + } + } + return NULL; +} + +/* Put this QMP function here so it can access the static graph_bdrv_states. */ +BlockDeviceInfoList *bdrv_named_nodes_list(void) +{ + BlockDeviceInfoList *list, *entry; + BlockDriverState *bs; + + list = NULL; + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + entry = g_malloc0(sizeof(*entry)); + entry->value = bdrv_block_device_info(bs); + entry->next = list; + list = entry; + } + + return list; +} + +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp) +{ + BlockDriverState *bs = NULL; + + if ((!device && !node_name) || (device && node_name)) { + error_setg(errp, "Use either device or node-name but not both"); + return NULL; + } + + if (device) { + bs = bdrv_find(device); + + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return NULL; + } + + return bs; + } + + bs = bdrv_find_node(node_name); + + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, node_name); + return NULL; + } + + return bs; +} + BlockDriverState *bdrv_next(BlockDriverState *bs) { if (!bs) { return QTAILQ_FIRST(&bdrv_states); } - return QTAILQ_NEXT(bs, list); + return QTAILQ_NEXT(bs, device_list); } void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { it(opaque, bs); } } @@ -3154,7 +3629,7 @@ int bdrv_flush_all(void) BlockDriverState *bs; int result = 0; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { int ret = bdrv_flush(bs); if (ret < 0 && !result) { result = ret; @@ -4278,7 +4753,7 @@ void bdrv_invalidate_cache_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bdrv_invalidate_cache(bs); } } @@ -4287,7 +4762,7 @@ void bdrv_clear_incoming_migration_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); } } @@ -4314,9 +4789,15 @@ int bdrv_flush(BlockDriverState *bs) return rwco.ret; } +typedef struct DiscardCo { + BlockDriverState *bs; + int64_t sector_num; + int nb_sectors; + int ret; +} DiscardCo; static void coroutine_fn bdrv_discard_co_entry(void *opaque) { - RwCo *rwco = opaque; + DiscardCo *rwco = opaque; rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); } @@ -4400,7 +4881,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { Coroutine *co; - RwCo rwco = { + DiscardCo rwco = { .bs = bs, .sector_num = sector_num, .nb_sectors = nb_sectors, @@ -4505,14 +4986,14 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, return NULL; } -void bdrv_set_buffer_alignment(BlockDriverState *bs, int align) +void bdrv_set_guest_block_size(BlockDriverState *bs, int align) { - bs->buffer_alignment = align; + bs->guest_block_size = align; } void *qemu_blockalign(BlockDriverState *bs, size_t size) { - return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); + return qemu_memalign(bdrv_opt_mem_align(bs), size); } /* @@ -4521,9 +5002,13 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size) bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; + size_t alignment = bdrv_opt_mem_align(bs); for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { + if ((uintptr_t) qiov->iov[i].iov_base % alignment) { + return false; + } + if (qiov->iov[i].iov_len % alignment) { return false; } } @@ -4875,21 +5360,68 @@ int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options) return bs->drv->bdrv_amend_options(bs, options); } -ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs) +/* Used to recurse on single child block filters. + * Single child block filter will store their child in bs->file. + */ +bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate) { - if (bs->drv->bdrv_check_ext_snapshot) { - return bs->drv->bdrv_check_ext_snapshot(bs); + if (!bs->drv) { + return false; } - if (bs->file && bs->file->drv && bs->file->drv->bdrv_check_ext_snapshot) { - return bs->file->drv->bdrv_check_ext_snapshot(bs); + if (!bs->drv->authorizations[BS_IS_A_FILTER]) { + if (bs == candidate) { + return true; + } else { + return false; + } + } + + if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) { + return false; + } + + if (!bs->file) { + return false; } - /* external snapshots are allowed by default */ - return EXT_SNAPSHOT_ALLOWED; + return bdrv_recurse_is_first_non_filter(bs->file, candidate); } -ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs) +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate) +{ + if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) { + return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); + } + + return bdrv_generic_is_first_non_filter(bs, candidate); +} + +/* This function checks if the candidate is the first non filter bs down it's + * bs chain. Since we don't have pointers to parents it explore all bs chains + * from the top. Some filters can choose not to pass down the recursion. + */ +bool bdrv_is_first_non_filter(BlockDriverState *candidate) { - return EXT_SNAPSHOT_FORBIDDEN; + BlockDriverState *bs; + + /* walk down the bs forest recursively */ + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + bool perm; + + if (!bs->file) { + continue; + } + + perm = bdrv_recurse_is_first_non_filter(bs->file, candidate); + + /* candidate is the first non filter */ + if (perm) { + return true; + } + } + + return false; } diff --git a/block/backup.c b/block/backup.c index 0198514043..15a2e55e8e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -181,8 +181,13 @@ static int coroutine_fn backup_before_write_notify( void *opaque) { BdrvTrackedRequest *req = opaque; + int64_t sector_num = req->offset >> BDRV_SECTOR_BITS; + int nb_sectors = req->bytes >> BDRV_SECTOR_BITS; - return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL); + assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + + return backup_do_cow(req->bs, sector_num, nb_sectors, NULL); } static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/block/blkdebug.c b/block/blkdebug.c index ebc5f13464..56c4cd084f 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -186,6 +186,14 @@ static const char *event_names[BLKDBG_EVENT_MAX] = { [BLKDBG_FLUSH_TO_OS] = "flush_to_os", [BLKDBG_FLUSH_TO_DISK] = "flush_to_disk", + + [BLKDBG_PWRITEV_RMW_HEAD] = "pwritev_rmw.head", + [BLKDBG_PWRITEV_RMW_AFTER_HEAD] = "pwritev_rmw.after_head", + [BLKDBG_PWRITEV_RMW_TAIL] = "pwritev_rmw.tail", + [BLKDBG_PWRITEV_RMW_AFTER_TAIL] = "pwritev_rmw.after_tail", + [BLKDBG_PWRITEV] = "pwritev", + [BLKDBG_PWRITEV_ZERO] = "pwritev_zero", + [BLKDBG_PWRITEV_DONE] = "pwritev_done", }; static int get_event_by_name(const char *name, BlkDebugEvent *event) @@ -271,19 +279,33 @@ static void remove_rule(BlkdebugRule *rule) g_free(rule); } -static int read_config(BDRVBlkdebugState *s, const char *filename) +static int read_config(BDRVBlkdebugState *s, const char *filename, + QDict *options, Error **errp) { - FILE *f; + FILE *f = NULL; int ret; struct add_rule_data d; + Error *local_err = NULL; + + if (filename) { + f = fopen(filename, "r"); + if (f == NULL) { + error_setg_errno(errp, errno, "Could not read blkdebug config file"); + return -errno; + } - f = fopen(filename, "r"); - if (f == NULL) { - return -errno; + ret = qemu_config_parse(f, config_groups, filename); + if (ret < 0) { + error_setg(errp, "Could not parse blkdebug config file"); + ret = -EINVAL; + goto fail; + } } - ret = qemu_config_parse(f, config_groups, filename); - if (ret < 0) { + qemu_config_parse_qdict(options, config_groups, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + ret = -EINVAL; goto fail; } @@ -298,7 +320,9 @@ static int read_config(BDRVBlkdebugState *s, const char *filename) fail: qemu_opts_reset(&inject_error_opts); qemu_opts_reset(&set_state_opts); - fclose(f); + if (f) { + fclose(f); + } return ret; } @@ -310,7 +334,9 @@ static void blkdebug_parse_filename(const char *filename, QDict *options, /* Parse the blkdebug: prefix */ if (!strstart(filename, "blkdebug:", &filename)) { - error_setg(errp, "File name string must start with 'blkdebug:'"); + /* There was no prefix; therefore, all options have to be already + present in the QDict (except for the filename) */ + qdict_put(options, "x-image", qstring_from_str(filename)); return; } @@ -346,6 +372,11 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "[internal use only, will be removed]", }, + { + .name = "align", + .type = QEMU_OPT_SIZE, + .help = "Required alignment in bytes", + }, { /* end of list */ } }, }; @@ -356,7 +387,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, BDRVBlkdebugState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - const char *filename, *config; + const char *config; + uint64_t align; int ret; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); @@ -367,30 +399,31 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - /* Read rules from config file */ + /* Read rules from config file or command line options */ config = qemu_opt_get(opts, "config"); - if (config) { - ret = read_config(s, config); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not read blkdebug config file"); - goto fail; - } + ret = read_config(s, config, options, errp); + if (ret) { + goto fail; } /* Set initial state */ s->state = 1; /* Open the backing file */ - filename = qemu_opt_get(opts, "x-image"); - if (filename == NULL) { - error_setg(errp, "Could not retrieve image file name"); - ret = -EINVAL; + ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image", + flags, true, false, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); goto fail; } - ret = bdrv_file_open(&bs->file, filename, NULL, flags, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); + /* Set request alignment */ + align = qemu_opt_get_size(opts, "align", bs->request_alignment); + if (align > 0 && align < INT_MAX && !(align & (align - 1))) { + bs->request_alignment = align; + } else { + error_setg(errp, "Invalid alignment"); + ret = -EINVAL; goto fail; } diff --git a/block/blkverify.c b/block/blkverify.c index 1c1637f55e..cfcbcf41c3 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -78,7 +78,9 @@ static void blkverify_parse_filename(const char *filename, QDict *options, /* Parse the blkverify: prefix */ if (!strstart(filename, "blkverify:", &filename)) { - error_setg(errp, "File name string must start with 'blkverify:'"); + /* There was no prefix; therefore, all options have to be already + present in the QDict (except for the filename) */ + qdict_put(options, "x-image", qstring_from_str(filename)); return; } @@ -122,7 +124,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, BDRVBlkverifyState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - const char *filename, *raw; int ret; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); @@ -133,33 +134,19 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - /* Parse the raw image filename */ - raw = qemu_opt_get(opts, "x-raw"); - if (raw == NULL) { - error_setg(errp, "Could not retrieve raw image filename"); - ret = -EINVAL; - goto fail; - } - - ret = bdrv_file_open(&bs->file, raw, NULL, flags, &local_err); + /* Open the raw file */ + ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options, + "raw", flags, true, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto fail; } /* Open the test file */ - filename = qemu_opt_get(opts, "x-image"); - if (filename == NULL) { - error_setg(errp, "Could not retrieve test image filename"); - ret = -EINVAL; - goto fail; - } - - s->test_file = bdrv_new(""); - ret = bdrv_open(s->test_file, filename, NULL, flags, NULL, &local_err); + ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options, + "test", flags, false, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); - bdrv_unref(s->test_file); s->test_file = NULL; goto fail; } @@ -417,7 +404,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_aio_writev = blkverify_aio_writev, .bdrv_aio_flush = blkverify_aio_flush, - .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden, + .authorizations = { true, false }, }; static void bdrv_blkverify_init(void) diff --git a/block/cow.c b/block/cow.c index dc15e46b6c..7fc0b12163 100644 --- a/block/cow.c +++ b/block/cow.c @@ -351,7 +351,8 @@ static int cow_create(const char *filename, QEMUOptionParameter *options, return ret; } - ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&cow_bs, filename, NULL, NULL, BDRV_O_RDWR, + &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); diff --git a/block/curl.c b/block/curl.c index a6039366da..a8075847b8 100644 --- a/block/curl.c +++ b/block/curl.c @@ -34,6 +34,11 @@ #define DPRINTF(fmt, ...) do { } while (0) #endif +#if LIBCURL_VERSION_NUM >= 0x071000 +/* The multi interface timer callback was introduced in 7.16.0 */ +#define NEED_CURL_TIMER_CALLBACK +#endif + #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \ CURLPROTO_FTP | CURLPROTO_FTPS | \ CURLPROTO_TFTP) @@ -77,6 +82,7 @@ typedef struct CURLState typedef struct BDRVCURLState { CURLM *multi; + QEMUTimer timer; size_t len; CURLState states[CURL_NUM_STATES]; char *url; @@ -87,6 +93,23 @@ typedef struct BDRVCURLState { static void curl_clean_state(CURLState *s); static void curl_multi_do(void *arg); +#ifdef NEED_CURL_TIMER_CALLBACK +static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) +{ + BDRVCURLState *s = opaque; + + DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms); + if (timeout_ms == -1) { + timer_del(&s->timer); + } else { + int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000; + timer_mod(&s->timer, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns); + } + return 0; +} +#endif + static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, void *s, void *sp) { @@ -209,20 +232,10 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, return FIND_RET_NONE; } -static void curl_multi_do(void *arg) +static void curl_multi_read(BDRVCURLState *s) { - BDRVCURLState *s = (BDRVCURLState *)arg; - int running; - int r; int msgs_in_queue; - if (!s->multi) - return; - - do { - r = curl_multi_socket_all(s->multi, &running); - } while(r == CURLM_CALL_MULTI_PERFORM); - /* Try to find done transfers, so we can free the easy * handle again. */ do { @@ -266,6 +279,41 @@ static void curl_multi_do(void *arg) } while(msgs_in_queue); } +static void curl_multi_do(void *arg) +{ + BDRVCURLState *s = (BDRVCURLState *)arg; + int running; + int r; + + if (!s->multi) { + return; + } + + do { + r = curl_multi_socket_all(s->multi, &running); + } while(r == CURLM_CALL_MULTI_PERFORM); + + curl_multi_read(s); +} + +static void curl_multi_timeout_do(void *arg) +{ +#ifdef NEED_CURL_TIMER_CALLBACK + BDRVCURLState *s = (BDRVCURLState *)arg; + int running; + + if (!s->multi) { + return; + } + + curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); + + curl_multi_read(s); +#else + abort(); +#endif +} + static CURLState *curl_init_state(BDRVCURLState *s) { CURLState *state = NULL; @@ -473,12 +521,20 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, curl_easy_cleanup(state->curl); state->curl = NULL; + aio_timer_init(bdrv_get_aio_context(bs), &s->timer, + QEMU_CLOCK_REALTIME, SCALE_NS, + curl_multi_timeout_do, s); + // Now we know the file exists and its size, so let's // initialize the multi interface! s->multi = curl_multi_init(); curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s); curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb); +#ifdef NEED_CURL_TIMER_CALLBACK + curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); + curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb); +#endif curl_multi_do(s); qemu_opts_del(opts); @@ -597,6 +653,9 @@ static void curl_close(BlockDriverState *bs) } if (s->multi) curl_multi_cleanup(s->multi); + + timer_del(&s->timer); + g_free(s->url); } diff --git a/block/gluster.c b/block/gluster.c index 563d497dc2..a009b15ded 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -21,19 +21,15 @@ #include "qemu/uri.h" typedef struct GlusterAIOCB { - BlockDriverAIOCB common; int64_t size; int ret; - bool *finished; QEMUBH *bh; + Coroutine *coroutine; } GlusterAIOCB; typedef struct BDRVGlusterState { struct glfs *glfs; - int fds[2]; struct glfs_fd *fd; - int event_reader_pos; - GlusterAIOCB *event_acb; } BDRVGlusterState; #define GLUSTER_FD_READ 0 @@ -231,46 +227,32 @@ out: return NULL; } -static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +static void qemu_gluster_complete_aio(void *opaque) { - int ret; - bool *finished = acb->finished; - BlockDriverCompletionFunc *cb = acb->common.cb; - void *opaque = acb->common.opaque; - - if (!acb->ret || acb->ret == acb->size) { - ret = 0; /* Success */ - } else if (acb->ret < 0) { - ret = acb->ret; /* Read/Write failed */ - } else { - ret = -EIO; /* Partial read/write - fail it */ - } + GlusterAIOCB *acb = (GlusterAIOCB *)opaque; - qemu_aio_release(acb); - cb(opaque, ret); - if (finished) { - *finished = true; - } + qemu_bh_delete(acb->bh); + acb->bh = NULL; + qemu_coroutine_enter(acb->coroutine, NULL); } -static void qemu_gluster_aio_event_reader(void *opaque) +/* + * AIO callback routine called from GlusterFS thread. + */ +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) { - BDRVGlusterState *s = opaque; - ssize_t ret; - - do { - char *p = (char *)&s->event_acb; - - ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, - sizeof(s->event_acb) - s->event_reader_pos); - if (ret > 0) { - s->event_reader_pos += ret; - if (s->event_reader_pos == sizeof(s->event_acb)) { - s->event_reader_pos = 0; - qemu_gluster_complete_aio(s->event_acb, s); - } - } - } while (ret < 0 && errno == EINTR); + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + + if (!ret || ret == acb->size) { + acb->ret = 0; /* Success */ + } else if (ret < 0) { + acb->ret = ret; /* Read/Write failed */ + } else { + acb->ret = -EIO; /* Partial read/write - fail it */ + } + + acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); + qemu_bh_schedule(acb->bh); } /* TODO Convert to fine grained options */ @@ -309,7 +291,6 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, filename = qemu_opt_get(opts, "filename"); - s->glfs = qemu_gluster_init(gconf, filename); if (!s->glfs) { ret = -errno; @@ -329,18 +310,8 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, s->fd = glfs_open(s->glfs, gconf->image, open_flags); if (!s->fd) { ret = -errno; - goto out; } - ret = qemu_pipe(s->fds); - if (ret < 0) { - ret = -errno; - goto out; - } - fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], - qemu_gluster_aio_event_reader, NULL, s); - out: qemu_opts_del(opts); qemu_gluster_gconf_free(gconf); @@ -356,12 +327,65 @@ out: return ret; } +#ifdef CONFIG_GLUSTERFS_ZEROFILL +static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) +{ + int ret; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + BDRVGlusterState *s = bs->opaque; + off_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; + + acb->size = size; + acb->ret = 0; + acb->coroutine = qemu_coroutine_self(); + + ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + if (ret < 0) { + ret = -errno; + goto out; + } + + qemu_coroutine_yield(); + ret = acb->ret; + +out: + g_slice_free(GlusterAIOCB, acb); + return ret; +} + +static inline bool gluster_supports_zerofill(void) +{ + return 1; +} + +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, + int64_t size) +{ + return glfs_zerofill(fd, offset, size); +} + +#else +static inline bool gluster_supports_zerofill(void) +{ + return 0; +} + +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, + int64_t size) +{ + return 0; +} +#endif + static int qemu_gluster_create(const char *filename, QEMUOptionParameter *options, Error **errp) { struct glfs *glfs; struct glfs_fd *fd; int ret = 0; + int prealloc = 0; int64_t total_size = 0; GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); @@ -374,6 +398,19 @@ static int qemu_gluster_create(const char *filename, while (options && options->name) { if (!strcmp(options->name, BLOCK_OPT_SIZE)) { total_size = options->value.n / BDRV_SECTOR_SIZE; + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { + if (!options->value.s || !strcmp(options->value.s, "off")) { + prealloc = 0; + } else if (!strcmp(options->value.s, "full") && + gluster_supports_zerofill()) { + prealloc = 1; + } else { + error_setg(errp, "Invalid preallocation mode: '%s'" + " or GlusterFS doesn't support zerofill API", + options->value.s); + ret = -EINVAL; + goto out; + } } options++; } @@ -383,9 +420,15 @@ static int qemu_gluster_create(const char *filename, if (!fd) { ret = -errno; } else { - if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) { + if (prealloc && qemu_gluster_zerofill(fd, 0, + total_size * BDRV_SECTOR_SIZE)) { + ret = -errno; + } + } else { ret = -errno; } + if (glfs_close(fd) != 0) { ret = -errno; } @@ -398,58 +441,18 @@ out: return ret; } -static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) -{ - GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; - bool finished = false; - - acb->finished = &finished; - while (!finished) { - qemu_aio_wait(); - } -} - -static const AIOCBInfo gluster_aiocb_info = { - .aiocb_size = sizeof(GlusterAIOCB), - .cancel = qemu_gluster_aio_cancel, -}; - -static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) -{ - GlusterAIOCB *acb = (GlusterAIOCB *)arg; - BlockDriverState *bs = acb->common.bs; - BDRVGlusterState *s = bs->opaque; - int retval; - - acb->ret = ret; - retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); - if (retval != sizeof(acb)) { - /* - * Gluster AIO callback thread failed to notify the waiting - * QEMU thread about IO completion. - */ - error_report("Gluster AIO completion failed: %s", strerror(errno)); - abort(); - } -} - -static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int write) +static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) { int ret; - GlusterAIOCB *acb; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); BDRVGlusterState *s = bs->opaque; - size_t size; - off_t offset; - - offset = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; + size_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = size; acb->ret = 0; - acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, @@ -460,13 +463,16 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, } if (ret < 0) { + ret = -errno; goto out; } - return &acb->common; + + qemu_coroutine_yield(); + ret = acb->ret; out: - qemu_aio_release(acb); - return NULL; + g_slice_free(GlusterAIOCB, acb); + return ret; } static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) @@ -482,71 +488,68 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) return 0; } -static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); + return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0); } -static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); + return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); } -static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) { int ret; - GlusterAIOCB *acb; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); BDRVGlusterState *s = bs->opaque; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = 0; acb->ret = 0; - acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); if (ret < 0) { + ret = -errno; goto out; } - return &acb->common; + + qemu_coroutine_yield(); + ret = acb->ret; out: - qemu_aio_release(acb); - return NULL; + g_slice_free(GlusterAIOCB, acb); + return ret; } #ifdef CONFIG_GLUSTERFS_DISCARD -static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb, - void *opaque) +static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { int ret; - GlusterAIOCB *acb; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); BDRVGlusterState *s = bs->opaque; - size_t size; - off_t offset; - - offset = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; + size_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = 0; acb->ret = 0; - acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); if (ret < 0) { + ret = -errno; goto out; } - return &acb->common; + + qemu_coroutine_yield(); + ret = acb->ret; out: - qemu_aio_release(acb); - return NULL; + g_slice_free(GlusterAIOCB, acb); + return ret; } #endif @@ -581,10 +584,6 @@ static void qemu_gluster_close(BlockDriverState *bs) { BDRVGlusterState *s = bs->opaque; - close(s->fds[GLUSTER_FD_READ]); - close(s->fds[GLUSTER_FD_WRITE]); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL); - if (s->fd) { glfs_close(s->fd); s->fd = NULL; @@ -604,6 +603,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = { .type = OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_PREALLOC, + .type = OPT_STRING, + .help = "Preallocation mode (allowed values: off, full)" + }, { NULL } }; @@ -618,12 +622,15 @@ static BlockDriver bdrv_gluster = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; @@ -639,12 +646,15 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; @@ -660,12 +670,15 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; @@ -681,12 +694,15 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; diff --git a/block/iscsi.c b/block/iscsi.c index c0ea0c4543..6f4af72a75 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -308,7 +308,7 @@ retry: iscsi_co_generic_cb, &iTask); if (iTask.task == NULL) { g_free(buf); - return -EIO; + return -ENOMEM; } #if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, @@ -376,7 +376,7 @@ retry: break; } if (iTask.task == NULL) { - return -EIO; + return -ENOMEM; } #if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); @@ -419,7 +419,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) retry: if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; + return -ENOMEM; } while (!iTask.complete) { @@ -669,7 +669,7 @@ retry: sector_qemu2lun(sector_num, iscsilun), 8 + 16, iscsi_co_generic_cb, &iTask) == NULL) { - ret = -EIO; + ret = -ENOMEM; goto out; } @@ -753,7 +753,7 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, retry: if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; + return -ENOMEM; } while (!iTask.complete) { @@ -822,7 +822,7 @@ retry: iscsilun->zeroblock, iscsilun->block_size, nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; + return -ENOMEM; } while (!iTask.complete) { @@ -1217,6 +1217,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, goto out; } bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun); + bs->request_alignment = iscsilun->block_size; /* Medium changer or tape. We dont have any emulation for this so this must * be sg ioctl compatible. We force it to be sg, otherwise qemu will try @@ -1265,23 +1266,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(task); task = NULL; - - if (iscsilun->bl.max_unmap < 0xffffffff) { - bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap, - iscsilun); - } - bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, - iscsilun); - - if (iscsilun->bl.max_ws_len < 0xffffffff) { - bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len, - iscsilun); - } - bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, - iscsilun); - - bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len, - iscsilun); } #if defined(LIBISCSI_FEATURE_NOP_COUNTER) @@ -1326,6 +1310,41 @@ static void iscsi_close(BlockDriverState *bs) memset(iscsilun, 0, sizeof(IscsiLun)); } +static int iscsi_refresh_limits(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + + /* We don't actually refresh here, but just return data queried in + * iscsi_open(): iscsi targets don't change their limits. */ + if (iscsilun->lbp.lbpu || iscsilun->lbp.lbpws) { + if (iscsilun->bl.max_unmap < 0xffffffff) { + bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap, + iscsilun); + } + bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, + iscsilun); + + if (iscsilun->bl.max_ws_len < 0xffffffff) { + bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len, + iscsilun); + } + bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, + iscsilun); + + bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len, + iscsilun); + } + return 0; +} + +/* We have nothing to do for iSCSI reopen, stub just returns + * success */ +static int iscsi_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int iscsi_truncate(BlockDriverState *bs, int64_t offset) { IscsiLun *iscsilun = bs->opaque; @@ -1434,10 +1453,12 @@ static BlockDriver bdrv_iscsi = { .bdrv_close = iscsi_close, .bdrv_create = iscsi_create, .create_options = iscsi_create_options, + .bdrv_reopen_prepare = iscsi_reopen_prepare, .bdrv_getlength = iscsi_getlength, .bdrv_get_info = iscsi_get_info, .bdrv_truncate = iscsi_truncate, + .bdrv_refresh_limits = iscsi_refresh_limits, #if defined(LIBISCSI_FEATURE_IOVECTOR) .bdrv_co_get_block_status = iscsi_co_get_block_status, diff --git a/block/mirror.c b/block/mirror.c index 2932bab27a..2a4333474e 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -96,6 +96,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); } + qemu_iovec_destroy(&op->qiov); g_slice_free(MirrorOp, op); qemu_coroutine_enter(s->common.co, NULL); } @@ -630,11 +631,49 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { + int64_t length, base_length; + int orig_base_flags; + + orig_base_flags = bdrv_get_flags(base); + if (bdrv_reopen(base, bs->open_flags, errp)) { return; } + + length = bdrv_getlength(bs); + if (length < 0) { + error_setg(errp, "Unable to determine length of %s", bs->filename); + goto error_restore_flags; + } + + base_length = bdrv_getlength(base); + if (base_length < 0) { + error_setg(errp, "Unable to determine length of %s", base->filename); + goto error_restore_flags; + } + + if (length > base_length) { + if (bdrv_truncate(base, length) < 0) { + error_setg(errp, "Top image %s is larger than base image %s, and " + "resize of base image failed", + bs->filename, base->filename); + goto error_restore_flags; + } + } + bdrv_ref(base); mirror_start_job(bs, base, speed, 0, 0, on_error, on_error, cb, opaque, errp, &commit_active_job_driver, false, base); + if (error_is_set(errp)) { + goto error_restore_flags; + } + + return; + +error_restore_flags: + /* ignore error and errp for bdrv_reopen, because we want to propagate + * the original error */ + bdrv_reopen(base, orig_base_flags, NULL); + return; } diff --git a/block/qapi.c b/block/qapi.c index a32cb79db8..8f4134b40a 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -29,6 +29,60 @@ #include "qapi/qmp-output-visitor.h" #include "qapi/qmp/types.h" +BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs) +{ + BlockDeviceInfo *info = g_malloc0(sizeof(*info)); + + info->file = g_strdup(bs->filename); + info->ro = bs->read_only; + info->drv = g_strdup(bs->drv->format_name); + info->encrypted = bs->encrypted; + info->encryption_key_missing = bdrv_key_required(bs); + + if (bs->node_name[0]) { + info->has_node_name = true; + info->node_name = g_strdup(bs->node_name); + } + + if (bs->backing_file[0]) { + info->has_backing_file = true; + info->backing_file = g_strdup(bs->backing_file); + } + + info->backing_file_depth = bdrv_get_backing_file_depth(bs); + + if (bs->io_limits_enabled) { + ThrottleConfig cfg; + throttle_get_config(&bs->throttle_state, &cfg); + info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; + info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; + info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; + + info->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg; + info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg; + info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg; + + info->has_bps_max = cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->bps_max = cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->has_bps_rd_max = cfg.buckets[THROTTLE_BPS_READ].max; + info->bps_rd_max = cfg.buckets[THROTTLE_BPS_READ].max; + info->has_bps_wr_max = cfg.buckets[THROTTLE_BPS_WRITE].max; + info->bps_wr_max = cfg.buckets[THROTTLE_BPS_WRITE].max; + + info->has_iops_max = cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->iops_max = cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max; + info->iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max; + info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + + info->has_iops_size = cfg.op_size; + info->iops_size = cfg.op_size; + } + + return info; +} + /* * Returns 0 on success, with *p_list either set to describe snapshot * information, or NULL because there are no snapshots. Returns -errno on @@ -211,60 +265,7 @@ void bdrv_query_info(BlockDriverState *bs, if (bs->drv) { info->has_inserted = true; - info->inserted = g_malloc0(sizeof(*info->inserted)); - info->inserted->file = g_strdup(bs->filename); - info->inserted->ro = bs->read_only; - info->inserted->drv = g_strdup(bs->drv->format_name); - info->inserted->encrypted = bs->encrypted; - info->inserted->encryption_key_missing = bdrv_key_required(bs); - - if (bs->backing_file[0]) { - info->inserted->has_backing_file = true; - info->inserted->backing_file = g_strdup(bs->backing_file); - } - - info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs); - - if (bs->io_limits_enabled) { - ThrottleConfig cfg; - throttle_get_config(&bs->throttle_state, &cfg); - info->inserted->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; - info->inserted->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; - info->inserted->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; - - info->inserted->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg; - info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg; - info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg; - - info->inserted->has_bps_max = - cfg.buckets[THROTTLE_BPS_TOTAL].max; - info->inserted->bps_max = - cfg.buckets[THROTTLE_BPS_TOTAL].max; - info->inserted->has_bps_rd_max = - cfg.buckets[THROTTLE_BPS_READ].max; - info->inserted->bps_rd_max = - cfg.buckets[THROTTLE_BPS_READ].max; - info->inserted->has_bps_wr_max = - cfg.buckets[THROTTLE_BPS_WRITE].max; - info->inserted->bps_wr_max = - cfg.buckets[THROTTLE_BPS_WRITE].max; - - info->inserted->has_iops_max = - cfg.buckets[THROTTLE_OPS_TOTAL].max; - info->inserted->iops_max = - cfg.buckets[THROTTLE_OPS_TOTAL].max; - info->inserted->has_iops_rd_max = - cfg.buckets[THROTTLE_OPS_READ].max; - info->inserted->iops_rd_max = - cfg.buckets[THROTTLE_OPS_READ].max; - info->inserted->has_iops_wr_max = - cfg.buckets[THROTTLE_OPS_WRITE].max; - info->inserted->iops_wr_max = - cfg.buckets[THROTTLE_OPS_WRITE].max; - - info->inserted->has_iops_size = cfg.op_size; - info->inserted->iops_size = cfg.op_size; - } + info->inserted = bdrv_block_device_info(bs); bs0 = bs; p_image_info = &info->inserted->image; @@ -318,6 +319,11 @@ BlockStats *bdrv_query_stats(const BlockDriverState *bs) s->parent = bdrv_query_stats(bs->file); } + if (bs->backing_hd) { + s->has_backing = true; + s->backing = bdrv_query_stats(bs->backing_hd); + } + return s; } diff --git a/block/qcow.c b/block/qcow.c index c470e05f60..948b0c5601 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -691,7 +691,8 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options, return ret; } - ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&qcow_bs, filename, NULL, NULL, BDRV_O_RDWR, + &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); diff --git a/block/qcow2.c b/block/qcow2.c index 8ec9db10f8..2da62b8a90 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -718,7 +718,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } qemu_opts_del(opts); - bs->bl.write_zeroes_alignment = s->cluster_sectors; if (s->use_lazy_refcounts && s->qcow_version < 3) { error_setg(errp, "Lazy refcounts require a qcow2 image with at least " @@ -751,6 +750,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, return ret; } +static int qcow2_refresh_limits(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + + bs->bl.write_zeroes_alignment = s->cluster_sectors; + + return 0; +} + static int qcow2_set_key(BlockDriverState *bs, const char *key) { BDRVQcowState *s = bs->opaque; @@ -1483,7 +1491,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, return ret; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -2268,6 +2276,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_change_backing_file = qcow2_change_backing_file, + .bdrv_refresh_limits = qcow2_refresh_limits, .bdrv_invalidate_cache = qcow2_invalidate_cache, .create_options = qcow2_create_options, diff --git a/block/qcow2.h b/block/qcow2.h index 303eb26629..b5b7d13630 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -340,11 +340,11 @@ typedef enum QCow2MetadataOverlap { #define QCOW2_OL_ALL \ (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2) -#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL -#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL +#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL +#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL -#define REFT_OFFSET_MASK 0xffffffffffffff00ULL +#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset) { diff --git a/block/qed.c b/block/qed.c index 450a1fa2e9..694e6e2ee0 100644 --- a/block/qed.c +++ b/block/qed.c @@ -495,7 +495,6 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } } - bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS; s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, qed_need_check_timer_cb, s); @@ -507,6 +506,15 @@ out: return ret; } +static int bdrv_qed_refresh_limits(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS; + + return 0; +} + /* We have nothing to do for QED reopen, stubs just return * success */ static int bdrv_qed_reopen_prepare(BDRVReopenState *state, @@ -563,8 +571,8 @@ static int qed_create(const char *filename, uint32_t cluster_size, return ret; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB, - &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB, &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); @@ -1616,6 +1624,7 @@ static BlockDriver bdrv_qed = { .bdrv_truncate = bdrv_qed_truncate, .bdrv_getlength = bdrv_qed_getlength, .bdrv_get_info = bdrv_qed_get_info, + .bdrv_refresh_limits = bdrv_qed_refresh_limits, .bdrv_change_backing_file = bdrv_qed_change_backing_file, .bdrv_invalidate_cache = bdrv_qed_invalidate_cache, .bdrv_check = bdrv_qed_check, diff --git a/block/raw-posix.c b/block/raw-posix.c index 0676037e13..126a634e45 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -127,6 +127,8 @@ typedef struct BDRVRawState { int fd; int type; int open_flags; + size_t buf_align; + #if defined(__linux__) /* linux floppy specific */ int64_t fd_open_time; @@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename) } #endif +static void raw_probe_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + char *buf; + unsigned int sector_size; + + /* For /dev/sg devices the alignment is not really used. + With buffered I/O, we don't have any restrictions. */ + if (bs->sg || !(s->open_flags & O_DIRECT)) { + bs->request_alignment = 1; + s->buf_align = 1; + return; + } + + /* Try a few ioctls to get the right size */ + bs->request_alignment = 0; + s->buf_align = 0; + +#ifdef BLKSSZGET + if (ioctl(s->fd, BLKSSZGET, §or_size) >= 0) { + bs->request_alignment = sector_size; + } +#endif +#ifdef DKIOCGETBLOCKSIZE + if (ioctl(s->fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) { + bs->request_alignment = sector_size; + } +#endif +#ifdef DIOCGSECTORSIZE + if (ioctl(s->fd, DIOCGSECTORSIZE, §or_size) >= 0) { + bs->request_alignment = sector_size; + } +#endif +#ifdef CONFIG_XFS + if (s->is_xfs) { + struct dioattr da; + if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) { + bs->request_alignment = da.d_miniosz; + /* The kernel returns wrong information for d_mem */ + /* s->buf_align = da.d_mem; */ + } + } +#endif + + /* If we could not get the sizes so far, we can only guess them */ + if (!s->buf_align) { + size_t align; + buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE); + for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { + if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) { + s->buf_align = align; + break; + } + } + qemu_vfree(buf); + } + + if (!bs->request_alignment) { + size_t align; + buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE); + for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { + if (pread(s->fd, buf, align, 0) >= 0) { + bs->request_alignment = align; + break; + } + } + qemu_vfree(buf); + } +} + static void raw_parse_flags(int bdrv_flags, int *open_flags) { assert(open_flags != NULL); @@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state, return ret; } - static void raw_reopen_commit(BDRVReopenState *state) { BDRVRawReopenState *raw_s = state->opaque; @@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state) state->opaque = NULL; } +static int raw_refresh_limits(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; -/* XXX: use host sector size if necessary with: -#ifdef DIOCGSECTORSIZE - { - unsigned int sectorsize = 512; - if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) && - sectorsize > bufsize) - bufsize = sectorsize; - } -#endif -#ifdef CONFIG_COCOA - uint32_t blockSize = 512; - if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) { - bufsize = blockSize; - } -#endif -*/ + raw_probe_alignment(bs); + bs->bl.opt_mem_alignment = s->buf_align; + + return 0; +} static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) { @@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = raw_aio_discard, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = hdev_aio_discard, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, diff --git a/block/raw-win32.c b/block/raw-win32.c index ce314fd54f..beb7f2395e 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -202,6 +202,35 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static void raw_probe_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + DWORD sectorsPerCluster, freeClusters, totalClusters, count; + DISK_GEOMETRY_EX dg; + BOOL status; + + if (s->type == FTYPE_CD) { + bs->request_alignment = 2048; + return; + } + if (s->type == FTYPE_HARDDISK) { + status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, + NULL, 0, &dg, sizeof(dg), &count, NULL); + if (status != 0) { + bs->request_alignment = dg.Geometry.BytesPerSector; + return; + } + /* try GetDiskFreeSpace too */ + } + + if (s->drive_path[0]) { + GetDiskFreeSpace(s->drive_path, §orsPerCluster, + &dg.Geometry.BytesPerSector, + &freeClusters, &totalClusters); + bs->request_alignment = dg.Geometry.BytesPerSector; + } +} + static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) { assert(access_flags != NULL); @@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } } + if (filename[0] && filename[1] == ':') { + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]); + } else if (filename[0] == '\\' && filename[1] == '\\') { + s->drive_path[0] = 0; + } else { + /* Relative path. */ + char buf[MAX_PATH]; + GetCurrentDirectory(MAX_PATH, buf); + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]); + } + s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, overlapped, NULL); @@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, s->aio = aio; } + raw_probe_alignment(bs); ret = 0; fail: qemu_opts_del(opts); diff --git a/block/rbd.c b/block/rbd.c index f453f04757..121fae221e 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -95,18 +95,13 @@ typedef struct RADOSCB { #define RBD_FD_WRITE 1 typedef struct BDRVRBDState { - int fds[2]; rados_t cluster; rados_ioctx_t io_ctx; rbd_image_t image; char name[RBD_MAX_IMAGE_NAME_SIZE]; char *snap; - int event_reader_pos; - RADOSCB *event_rcb; } BDRVRBDState; -static void rbd_aio_bh_cb(void *opaque); - static int qemu_rbd_next_tok(char *dst, int dst_len, char *src, char delim, const char *name, @@ -369,9 +364,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options, } /* - * This aio completion is being called from qemu_rbd_aio_event_reader() - * and runs in qemu context. It schedules a bh, but just in case the aio - * was not cancelled before. + * This aio completion is being called from rbd_finish_bh() and runs in qemu + * BH context. */ static void qemu_rbd_complete_aio(RADOSCB *rcb) { @@ -401,36 +395,19 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) acb->ret = r; } } - /* Note that acb->bh can be NULL in case where the aio was cancelled */ - acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); - qemu_bh_schedule(acb->bh); - g_free(rcb); -} -/* - * aio fd read handler. It runs in the qemu context and calls the - * completion handling of completed rados aio operations. - */ -static void qemu_rbd_aio_event_reader(void *opaque) -{ - BDRVRBDState *s = opaque; + g_free(rcb); - ssize_t ret; + if (acb->cmd == RBD_AIO_READ) { + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); + } + qemu_vfree(acb->bounce); + acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); + acb->status = 0; - do { - char *p = (char *)&s->event_rcb; - - /* now read the rcb pointer that was sent from a non qemu thread */ - ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos, - sizeof(s->event_rcb) - s->event_reader_pos); - if (ret > 0) { - s->event_reader_pos += ret; - if (s->event_reader_pos == sizeof(s->event_rcb)) { - s->event_reader_pos = 0; - qemu_rbd_complete_aio(s->event_rcb); - } - } - } while (ret < 0 && errno == EINTR); + if (!acb->cancelled) { + qemu_aio_release(acb); + } } /* TODO Convert to fine grained options */ @@ -538,23 +515,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, bs->read_only = (s->snap != NULL); - s->event_reader_pos = 0; - r = qemu_pipe(s->fds); - if (r < 0) { - error_report("error opening eventfd"); - goto failed; - } - fcntl(s->fds[0], F_SETFL, O_NONBLOCK); - fcntl(s->fds[1], F_SETFL, O_NONBLOCK); - qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader, - NULL, s); - - qemu_opts_del(opts); return 0; -failed: - rbd_close(s->image); failed_open: rados_ioctx_destroy(s->io_ctx); failed_shutdown: @@ -569,10 +532,6 @@ static void qemu_rbd_close(BlockDriverState *bs) { BDRVRBDState *s = bs->opaque; - close(s->fds[0]); - close(s->fds[1]); - qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL); - rbd_close(s->image); rados_ioctx_destroy(s->io_ctx); g_free(s->snap); @@ -600,34 +559,11 @@ static const AIOCBInfo rbd_aiocb_info = { .cancel = qemu_rbd_aio_cancel, }; -static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) +static void rbd_finish_bh(void *opaque) { - int ret = 0; - while (1) { - fd_set wfd; - int fd = s->fds[RBD_FD_WRITE]; - - /* send the op pointer to the qemu thread that is responsible - for the aio/op completion. Must do it in a qemu thread context */ - ret = write(fd, (void *)&rcb, sizeof(rcb)); - if (ret >= 0) { - break; - } - if (errno == EINTR) { - continue; - } - if (errno != EAGAIN) { - break; - } - - FD_ZERO(&wfd); - FD_SET(fd, &wfd); - do { - ret = select(fd + 1, NULL, &wfd, NULL, NULL); - } while (ret < 0 && errno == EINTR); - } - - return ret; + RADOSCB *rcb = opaque; + qemu_bh_delete(rcb->acb->bh); + qemu_rbd_complete_aio(rcb); } /* @@ -635,40 +571,18 @@ static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) * * Note: this function is being called from a non qemu thread so * we need to be careful about what we do here. Generally we only - * write to the block notification pipe, and do the rest of the - * io completion handling from qemu_rbd_aio_event_reader() which - * runs in a qemu context. + * schedule a BH, and do the rest of the io completion handling + * from rbd_finish_bh() which runs in a qemu context. */ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) { - int ret; + RBDAIOCB *acb = rcb->acb; + rcb->ret = rbd_aio_get_return_value(c); rbd_aio_release(c); - ret = qemu_rbd_send_pipe(rcb->s, rcb); - if (ret < 0) { - error_report("failed writing to acb->s->fds"); - g_free(rcb); - } -} - -/* Callback when all queued rbd_aio requests are complete */ -static void rbd_aio_bh_cb(void *opaque) -{ - RBDAIOCB *acb = opaque; - - if (acb->cmd == RBD_AIO_READ) { - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); - } - qemu_vfree(acb->bounce); - acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); - qemu_bh_delete(acb->bh); - acb->bh = NULL; - acb->status = 0; - - if (!acb->cancelled) { - qemu_aio_release(acb); - } + acb->bh = qemu_bh_new(rbd_finish_bh, rcb); + qemu_bh_schedule(acb->bh); } static int rbd_aio_discard_wrapper(rbd_image_t image, diff --git a/block/sheepdog.c b/block/sheepdog.c index b94ab6e10a..672b9c97a2 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -161,7 +161,7 @@ typedef struct SheepdogVdiReq { uint32_t id; uint32_t data_length; uint64_t vdi_size; - uint32_t vdi_id; + uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; uint8_t reserved[2]; @@ -1493,7 +1493,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot) memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; - hdr.vdi_id = s->inode.vdi_id; + hdr.base_vdi_id = s->inode.vdi_id; wlen = SD_MAX_VDI_LEN; @@ -1534,7 +1534,7 @@ static int sd_prealloc(const char *filename) Error *local_err = NULL; int ret; - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); @@ -1684,7 +1684,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, if (backing_file) { BlockDriverState *bs; - BDRVSheepdogState *s; + BDRVSheepdogState *base; BlockDriver *drv; /* Currently, only Sheepdog backing image is supported. */ @@ -1695,22 +1695,22 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, goto out; } - ret = bdrv_file_open(&bs, backing_file, NULL, 0, &local_err); + ret = bdrv_file_open(&bs, backing_file, NULL, NULL, 0, &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); goto out; } - s = bs->opaque; + base = bs->opaque; - if (!is_snapshot(&s->inode)) { + if (!is_snapshot(&base->inode)) { error_report("cannot clone from a non snapshot vdi"); bdrv_unref(bs); ret = -EINVAL; goto out; } - + s->inode.vdi_id = base->inode.vdi_id; bdrv_unref(bs); } @@ -1743,7 +1743,7 @@ static void sd_close(BlockDriverState *bs) memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_RELEASE_VDI; - hdr.vdi_id = s->inode.vdi_id; + hdr.base_vdi_id = s->inode.vdi_id; wlen = strlen(s->name) + 1; hdr.data_length = wlen; hdr.flags = SD_FLAG_CMD_WRITE; @@ -1846,7 +1846,7 @@ static bool sd_delete(BDRVSheepdogState *s) unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0; SheepdogVdiReq hdr = { .opcode = SD_OP_DEL_VDI, - .vdi_id = s->inode.vdi_id, + .base_vdi_id = s->inode.vdi_id, .data_length = wlen, .flags = SD_FLAG_CMD_WRITE, }; @@ -2442,11 +2442,12 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; - unsigned long start = sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE, + uint64_t offset = sector_num * BDRV_SECTOR_SIZE; + unsigned long start = offset / SD_DATA_OBJ_SIZE, end = DIV_ROUND_UP((sector_num + nb_sectors) * BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE); unsigned long idx; - int64_t ret = BDRV_BLOCK_DATA; + int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; for (idx = start; idx < end; idx++) { if (inode->data_vdi_id[idx] == 0) { diff --git a/block/stream.c b/block/stream.c index 46bec7d379..dd0b4ac3d2 100644 --- a/block/stream.c +++ b/block/stream.c @@ -75,6 +75,8 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base, unused->backing_hd = NULL; bdrv_unref(unused); } + + bdrv_refresh_limits(top); } static void coroutine_fn stream_run(void *opaque) diff --git a/block/vhdx.c b/block/vhdx.c index 1995778945..9ee0a612ff 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1797,7 +1797,7 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options, goto exit; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; diff --git a/block/vmdk.c b/block/vmdk.c index c6b60b4a91..99ca60fdb9 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -428,10 +428,6 @@ static int vmdk_add_extent(BlockDriverState *bs, extent->l2_size = l2_size; extent->cluster_sectors = flat ? sectors : cluster_sectors; - if (!flat) { - bs->bl.write_zeroes_alignment = - MAX(bs->bl.write_zeroes_alignment, cluster_sectors); - } if (s->num_extents > 1) { extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; } else { @@ -640,6 +636,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } + if (bdrv_getlength(file) < + le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) { + error_report("File truncated, expecting at least %lld bytes", + le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE); + return -EINVAL; + } + ret = vmdk_add_extent(bs, file, false, le64_to_cpu(header.capacity), le64_to_cpu(header.gd_offset) << 9, @@ -654,6 +657,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, } extent->compressed = le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; + if (extent->compressed) { + g_free(s->create_type); + s->create_type = g_strdup("streamOptimized"); + } extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; extent->version = le32_to_cpu(header.version); extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; @@ -769,8 +776,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, path_combine(extent_path, sizeof(extent_path), desc_file_path, fname); - ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags, - errp); + ret = bdrv_file_open(&extent_file, extent_path, NULL, NULL, + bs->open_flags, errp); if (ret) { return ret; } @@ -891,6 +898,23 @@ fail: return ret; } + +static int vmdk_refresh_limits(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_extents; i++) { + if (!s->extents[i].flat) { + bs->bl.write_zeroes_alignment = + MAX(bs->bl.write_zeroes_alignment, + s->extents[i].cluster_sectors); + } + } + + return 0; +} + static int get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent, uint64_t cluster_offset, @@ -1325,8 +1349,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; - int n, ret; - int64_t index_in_cluster; + int ret; + int64_t index_in_cluster, n; uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1469,7 +1493,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -1807,7 +1831,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options, goto exit; } } - ret = bdrv_file_open(&new_bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&new_bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -2002,6 +2026,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, .bdrv_get_specific_info = vmdk_get_specific_info, + .bdrv_refresh_limits = vmdk_refresh_limits, .create_options = vmdk_create_options, }; diff --git a/blockdev.c b/blockdev.c index e457494342..36ceece9ff 100644 --- a/blockdev.c +++ b/blockdev.c @@ -307,12 +307,11 @@ static bool check_throttle_config(ThrottleConfig *cfg, Error **errp) typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType; /* Takes the ownership of bs_opts */ -static DriveInfo *blockdev_init(QDict *bs_opts, +static DriveInfo *blockdev_init(const char *file, QDict *bs_opts, BlockInterfaceType type, Error **errp) { const char *buf; - const char *file = NULL; const char *serial; int ro = 0; int bdrv_flags = 0; @@ -354,7 +353,6 @@ static DriveInfo *blockdev_init(QDict *bs_opts, ro = qemu_opt_get_bool(opts, "read-only", 0); copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false); - file = qemu_opt_get(opts, "file"); serial = qemu_opt_get(opts, "serial"); if ((buf = qemu_opt_get(opts, "discard")) != NULL) { @@ -599,6 +597,10 @@ QemuOptsList qemu_legacy_drive_opts = { .name = "addr", .type = QEMU_OPT_STRING, .help = "pci address (virtio only)", + },{ + .name = "file", + .type = QEMU_OPT_STRING, + .help = "file name", }, /* Options that are passed on, but have special semantics with -drive */ @@ -629,6 +631,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type) const char *devaddr; bool read_only = false; bool copy_on_read; + const char *filename; Error *local_err = NULL; /* Change legacy command line options into QMP ones */ @@ -867,8 +870,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type) } } + filename = qemu_opt_get(legacy_opts, "file"); + /* Actual block device init: Functionality shared with blockdev-add */ - dinfo = blockdev_init(bs_opts, type, &local_err); + dinfo = blockdev_init(filename, bs_opts, type, &local_err); if (dinfo == NULL) { if (error_is_set(&local_err)) { qerror_report_err(local_err); @@ -942,14 +947,22 @@ static void blockdev_do_action(int kind, void *data, Error **errp) qmp_transaction(&list, errp); } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +void qmp_blockdev_snapshot_sync(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *snapshot_file, + bool has_snapshot_node_name, + const char *snapshot_node_name, bool has_format, const char *format, - bool has_mode, enum NewImageMode mode, - Error **errp) + bool has_mode, NewImageMode mode, Error **errp) { BlockdevSnapshot snapshot = { + .has_device = has_device, .device = (char *) device, + .has_node_name = has_node_name, + .node_name = (char *) node_name, .snapshot_file = (char *) snapshot_file, + .has_snapshot_node_name = has_snapshot_node_name, + .snapshot_node_name = (char *) snapshot_node_name, .has_format = has_format, .format = (char *) format, .has_mode = has_mode, @@ -1187,8 +1200,14 @@ static void external_snapshot_prepare(BlkTransactionState *common, { BlockDriver *drv; int flags, ret; + QDict *options = NULL; Error *local_err = NULL; + bool has_device = false; const char *device; + bool has_node_name = false; + const char *node_name; + bool has_snapshot_node_name = false; + const char *snapshot_node_name; const char *new_image_file; const char *format = "qcow2"; enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; @@ -1199,7 +1218,14 @@ static void external_snapshot_prepare(BlkTransactionState *common, /* get parameters */ g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); + has_device = action->blockdev_snapshot_sync->has_device; device = action->blockdev_snapshot_sync->device; + has_node_name = action->blockdev_snapshot_sync->has_node_name; + node_name = action->blockdev_snapshot_sync->node_name; + has_snapshot_node_name = + action->blockdev_snapshot_sync->has_snapshot_node_name; + snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name; + new_image_file = action->blockdev_snapshot_sync->snapshot_file; if (action->blockdev_snapshot_sync->has_format) { format = action->blockdev_snapshot_sync->format; @@ -1215,9 +1241,21 @@ static void external_snapshot_prepare(BlkTransactionState *common, return; } - state->old_bs = bdrv_find(device); - if (!state->old_bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); + state->old_bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + if (has_node_name && !has_snapshot_node_name) { + error_setg(errp, "New snapshot node name missing"); + return; + } + + if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) { + error_setg(errp, "New snapshot node name already existing"); return; } @@ -1238,7 +1276,7 @@ static void external_snapshot_prepare(BlkTransactionState *common, } } - if (bdrv_check_ext_snapshot(state->old_bs) != EXT_SNAPSHOT_ALLOWED) { + if (!bdrv_is_first_non_filter(state->old_bs)) { error_set(errp, QERR_FEATURE_DISABLED, "snapshot"); return; } @@ -1257,15 +1295,23 @@ static void external_snapshot_prepare(BlkTransactionState *common, } } + if (has_snapshot_node_name) { + options = qdict_new(); + qdict_put(options, "node-name", + qstring_from_str(snapshot_node_name)); + } + /* We will manually add the backing_hd field to the bs later */ state->new_bs = bdrv_new(""); /* TODO Inherit bs->options or only take explicit options with an * extended QMP command? */ - ret = bdrv_open(state->new_bs, new_image_file, NULL, + ret = bdrv_open(state->new_bs, new_image_file, options, flags | BDRV_O_NO_BACKING, drv, &local_err); if (ret != 0) { error_propagate(errp, local_err); } + + QDECREF(options); } static void external_snapshot_commit(BlkTransactionState *common) @@ -1476,14 +1522,19 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp) eject_device(bs, force, errp); } -void qmp_block_passwd(const char *device, const char *password, Error **errp) +void qmp_block_passwd(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *password, Error **errp) { + Error *local_err = NULL; BlockDriverState *bs; int err; - bs = bdrv_find(device); - if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); + bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); return; } @@ -1673,14 +1724,24 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) return 0; } -void qmp_block_resize(const char *device, int64_t size, Error **errp) +void qmp_block_resize(bool has_device, const char *device, + bool has_node_name, const char *node_name, + int64_t size, Error **errp) { + Error *local_err = NULL; BlockDriverState *bs; int ret; - bs = bdrv_find(device); - if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); + bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + if (!bdrv_is_first_non_filter(bs)) { + error_set(errp, QERR_FEATURE_DISABLED, "resize"); return; } @@ -1947,6 +2008,11 @@ void qmp_drive_backup(const char *device, const char *target, } } +BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) +{ + return bdrv_named_nodes_list(); +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, @@ -2210,7 +2276,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp) qdict_flatten(qdict); - blockdev_init(qdict, IF_NONE, &local_err); + blockdev_init(NULL, qdict, IF_NONE, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); goto fail; @@ -2251,10 +2317,6 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_BOOL, .help = "enable/disable snapshot mode", },{ - .name = "file", - .type = QEMU_OPT_STRING, - .help = "disk image", - },{ .name = "discard", .type = QEMU_OPT_STRING, .help = "discard operation (ignore/off, unmap/on)", @@ -256,6 +256,7 @@ coroutine_pool="" seccomp="" glusterfs="" glusterfs_discard="no" +glusterfs_zerofill="no" virtio_blk_data_plane="" gtk="" gtkabi="2.0" @@ -2701,6 +2702,9 @@ if test "$glusterfs" != "no" ; then if $pkg_config --atleast-version=5 glusterfs-api; then glusterfs_discard="yes" fi + if $pkg_config --atleast-version=6 glusterfs-api; then + glusterfs_zerofill="yes" + fi else if test "$glusterfs" = "yes" ; then feature_not_found "GlusterFS backend support" @@ -4229,6 +4233,10 @@ if test "$glusterfs_discard" = "yes" ; then echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak fi +if test "$glusterfs_zerofill" = "yes" ; then + echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak +fi + if test "$libssh2" = "yes" ; then echo "CONFIG_LIBSSH2=y" >> $config_host_mak fi @@ -4766,6 +4774,10 @@ for bios_file in \ do FILES="$FILES pc-bios/`basename $bios_file`" done +for test_file in `find $source_path/tests/acpi-test-data -type f` +do + FILES="$FILES tests/acpi-test-data`echo $test_file | sed -e 's/.*acpi-test-data//'`" +done mkdir -p $DIRS for f in $FILES ; do if [ -e "$source_path/$f" ] && [ "$source_path" != `pwd` ]; then diff --git a/disas/i386.c b/disas/i386.c index 47f1f2ea61..044e02c032 100644 --- a/disas/i386.c +++ b/disas/i386.c @@ -2632,17 +2632,17 @@ static const struct dis386 prefix_user_table[][4] = { /* PREGRP87 */ { + { "movbe", { Gv, Ev } }, { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { "movbe", { Gv, Ev } }, { "crc32", { Gdq, { CRC32_Fixup, b_mode } } }, }, /* PREGRP88 */ { + { "movbe", { Ev, Gv } }, { "(bad)", { XX } }, - { "(bad)", { XX } }, - { "(bad)", { XX } }, + { "movbe", { Ev, Gv } }, { "crc32", { Gdq, { CRC32_Fixup, v_mode } } }, }, diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 6b87e9786a..a378c87583 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -479,7 +479,7 @@ Data: None. Example: -{ "event": "WATCHDOG", +{ "event": "WAKEUP", "timestamp": { "seconds": 1344522075, "microseconds": 745528 } } WATCHDOG diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt index f6f577457d..340b751a95 100644 --- a/docs/specs/acpi_cpu_hotplug.txt +++ b/docs/specs/acpi_cpu_hotplug.txt @@ -10,7 +10,9 @@ ACPI GPE block (IO ports 0xafe0-0xafe3, byte access): Generic ACPI GPE block. Bit 2 (GPE.2) used to notify CPU hot-add/remove event to ACPI BIOS, via SCI interrupt. -CPU present bitmap (IO port 0xaf00-0xaf1f, 1-byte access): +CPU present bitmap for: + ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access) + PIIX-PM (IO port 0xaf00-0xaf1f, 1-byte access) --------------------------------------------------------------- One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only. @@ -325,7 +325,7 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x hwaddr *plen, bool resolve_subpage) { MemoryRegionSection *section; - Int128 diff; + Int128 diff, diff_page; section = address_space_lookup_region(d, addr, resolve_subpage); /* Compute offset within MemoryRegionSection */ @@ -334,7 +334,9 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x /* Compute offset within MemoryRegion */ *xlat = addr + section->offset_within_region; + diff_page = int128_make64(((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr); diff = int128_sub(section->mr->size, int128_make64(addr)); + diff = int128_min(diff, diff_page); *plen = int128_get64(int128_min(diff, int128_make64(*plen))); return section; } @@ -349,7 +351,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, hwaddr len = *plen; for (;;) { - section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true); + section = address_space_translate_internal(as->dispatch, addr, &addr, &len, true); mr = section->mr; if (!mr->iommu_ops) { @@ -1070,7 +1072,7 @@ static void *file_ram_alloc(RAMBlock *block, } /* MAP_POPULATE silently ignores failures */ - for (i = 0; i < (memory/hpagesize)-1; i++) { + for (i = 0; i < (memory/hpagesize); i++) { memset(area + (hpagesize*i), 0, 1); } diff --git a/hmp-commands.hx b/hmp-commands.hx index feca0847d0..f3fc514427 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -35,6 +35,11 @@ STEXI @item commit @findex commit Commit changes to the disk images (if -snapshot is used) or backing files. +If the backing file is smaller than the snapshot, then the backing file will be +resized to be the same size as the snapshot. If the snapshot is smaller than +the backing file, the backing file will not be truncated. If you want the +backing file to match the size of the smaller snapshot, you can safely truncate +it yourself once the commit operation successfully completes. ETEXI { @@ -871,7 +871,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict) const char *password = qdict_get_str(qdict, "password"); Error *errp = NULL; - qmp_block_passwd(device, password, &errp); + qmp_block_passwd(true, device, false, NULL, password, &errp); hmp_handle_error(mon, &errp); } @@ -893,7 +893,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) int64_t size = qdict_get_int(qdict, "size"); Error *errp = NULL; - qmp_block_resize(device, size, &errp); + qmp_block_resize(true, device, false, NULL, size, &errp); hmp_handle_error(mon, &errp); } @@ -972,7 +972,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) } mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; - qmp_blockdev_snapshot_sync(device, filename, !!format, format, + qmp_blockdev_snapshot_sync(true, device, false, NULL, + filename, false, NULL, + !!format, format, true, mode, &errp); hmp_handle_error(mon, &errp); } @@ -1092,11 +1094,11 @@ void hmp_eject(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &err); } -static void hmp_change_read_arg(Monitor *mon, const char *password, - void *opaque) +static void hmp_change_read_arg(void *opaque, const char *password, + void *readline_opaque) { qmp_change_vnc_password(password, NULL); - monitor_read_command(mon, 1); + monitor_read_command(opaque, 1); } void hmp_change(Monitor *mon, const QDict *qdict) diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index a0b63b5626..397d32babd 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -1,2 +1 @@ -common-obj-$(CONFIG_ACPI) += core.o piix4.o ich9.o - +common-obj-$(CONFIG_ACPI) += core.o piix4.o ich9.o pcihp.o cpu_hotplug.o diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c new file mode 100644 index 0000000000..48928dc0ea --- /dev/null +++ b/hw/acpi/cpu_hotplug.c @@ -0,0 +1,64 @@ +/* + * QEMU ACPI hotplug utilities + * + * Copyright (C) 2013 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "hw/hw.h" +#include "hw/acpi/cpu_hotplug.h" + +static uint64_t cpu_status_read(void *opaque, hwaddr addr, unsigned int size) +{ + AcpiCpuHotplug *cpus = opaque; + uint64_t val = cpus->sts[addr]; + + return val; +} + +static void cpu_status_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + /* TODO: implement VCPU removal on guest signal that CPU can be removed */ +} + +static const MemoryRegionOps AcpiCpuHotplug_ops = { + .read = cpu_status_read, + .write = cpu_status_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +void AcpiCpuHotplug_add(ACPIGPE *gpe, AcpiCpuHotplug *g, CPUState *cpu) +{ + CPUClass *k = CPU_GET_CLASS(cpu); + int64_t cpu_id; + + *gpe->sts = *gpe->sts | ACPI_CPU_HOTPLUG_STATUS; + cpu_id = k->get_arch_id(CPU(cpu)); + g->sts[cpu_id / 8] |= (1 << (cpu_id % 8)); +} + +void AcpiCpuHotplug_init(MemoryRegion *parent, Object *owner, + AcpiCpuHotplug *gpe_cpu, uint16_t base) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + CPUClass *cc = CPU_GET_CLASS(cpu); + int64_t id = cc->get_arch_id(cpu); + + g_assert((id / 8) < ACPI_GPE_PROC_LEN); + gpe_cpu->sts[id / 8] |= (1 << (id % 8)); + } + memory_region_init_io(&gpe_cpu->io, owner, &AcpiCpuHotplug_ops, + gpe_cpu, "acpi-cpu-hotplug", ACPI_GPE_PROC_LEN); + memory_region_add_subregion(parent, base, &gpe_cpu->io); +} diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 30f0df8713..0afac425ec 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -185,6 +185,15 @@ static void pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&pm->acpi_regs); } +static void ich9_cpu_added_req(Notifier *n, void *opaque) +{ + ICH9LPCPMRegs *pm = container_of(n, ICH9LPCPMRegs, cpu_added_notifier); + + assert(pm != NULL); + AcpiCpuHotplug_add(&pm->acpi_regs.gpe, &pm->gpe_cpu, CPU(opaque)); + acpi_update_sci(&pm->acpi_regs, pm->irq); +} + void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq) { @@ -210,6 +219,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_register_reset(pm_reset, pm); pm->powerdown_notifier.notify = pm_powerdown_req; qemu_register_powerdown_notifier(&pm->powerdown_notifier); + + AcpiCpuHotplug_init(pci_address_space_io(lpc_pci), OBJECT(lpc_pci), + &pm->gpe_cpu, ICH9_CPU_HOTPLUG_IO_BASE); + pm->cpu_added_notifier.notify = ich9_cpu_added_req; + qemu_register_cpu_added_notifier(&pm->cpu_added_notifier); } static void ich9_pm_get_gpe0_blk(Object *obj, Visitor *v, diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c new file mode 100644 index 0000000000..3fa3d7c290 --- /dev/null +++ b/hw/acpi/pcihp.c @@ -0,0 +1,316 @@ +/* + * QEMU<->ACPI BIOS PCI hotplug interface + * + * QEMU supports PCI hotplug via ACPI. This module + * implements the interface between QEMU and the ACPI BIOS. + * Interface specification - see docs/specs/acpi_pci_hotplug.txt + * + * Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) + * Copyright (c) 2006 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "hw/acpi/pcihp.h" + +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "hw/pci/pci.h" +#include "hw/acpi/acpi.h" +#include "sysemu/sysemu.h" +#include "qemu/range.h" +#include "exec/ioport.h" +#include "exec/address-spaces.h" +#include "hw/pci/pci_bus.h" +#include "qom/qom-qobject.h" +#include "qapi/qmp/qint.h" + +//#define DEBUG + +#ifdef DEBUG +# define ACPI_PCIHP_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) +#else +# define ACPI_PCIHP_DPRINTF(format, ...) do { } while (0) +#endif + +#define PCI_HOTPLUG_ADDR 0xae00 +#define PCI_HOTPLUG_SIZE 0x0014 +#define PCI_UP_BASE 0xae00 +#define PCI_DOWN_BASE 0xae04 +#define PCI_EJ_BASE 0xae08 +#define PCI_RMV_BASE 0xae0c +#define PCI_SEL_BASE 0xae10 + +typedef struct AcpiPciHpFind { + int bsel; + PCIBus *bus; +} AcpiPciHpFind; + +static int acpi_pcihp_get_bsel(PCIBus *bus) +{ + QObject *o = object_property_get_qobject(OBJECT(bus), + ACPI_PCIHP_PROP_BSEL, NULL); + int64_t bsel = -1; + if (o) { + bsel = qint_get_int(qobject_to_qint(o)); + } + if (bsel < 0) { + return -1; + } + return bsel; +} + +static void acpi_pcihp_test_hotplug_bus(PCIBus *bus, void *opaque) +{ + AcpiPciHpFind *find = opaque; + if (find->bsel == acpi_pcihp_get_bsel(bus)) { + find->bus = bus; + } +} + +static PCIBus *acpi_pcihp_find_hotplug_bus(AcpiPciHpState *s, int bsel) +{ + AcpiPciHpFind find = { .bsel = bsel, .bus = NULL }; + + if (bsel < 0) { + return NULL; + } + + pci_for_each_bus(s->root, acpi_pcihp_test_hotplug_bus, &find); + + /* Make bsel 0 eject root bus if bsel property is not set, + * for compatibility with non acpi setups. + * TODO: really needed? + */ + if (!bsel && !find.bus) { + find.bus = s->root; + } + return find.bus; +} + +static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev) +{ + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + /* + * ACPI doesn't allow hotplug of bridge devices. Don't allow + * hot-unplug of bridge devices unless they were added by hotplug + * (and so, not described by acpi). + */ + return (pc->is_bridge && !dev->qdev.hotplugged) || pc->no_hotplug; +} + +static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots) +{ + BusChild *kid, *next; + int slot = ffs(slots) - 1; + bool slot_free = true; + PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel); + + if (!bus) { + return; + } + + /* Mark request as complete */ + s->acpi_pcihp_pci_status[bsel].down &= ~(1U << slot); + + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + DeviceState *qdev = kid->child; + PCIDevice *dev = PCI_DEVICE(qdev); + if (PCI_SLOT(dev->devfn) == slot) { + if (acpi_pcihp_pc_no_hotplug(s, dev)) { + slot_free = false; + } else { + object_unparent(OBJECT(qdev)); + } + } + } + if (slot_free) { + s->acpi_pcihp_pci_status[bsel].device_present &= ~(1U << slot); + } +} + +static void acpi_pcihp_update_hotplug_bus(AcpiPciHpState *s, int bsel) +{ + BusChild *kid, *next; + PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel); + + /* Execute any pending removes during reset */ + while (s->acpi_pcihp_pci_status[bsel].down) { + acpi_pcihp_eject_slot(s, bsel, s->acpi_pcihp_pci_status[bsel].down); + } + + s->acpi_pcihp_pci_status[bsel].hotplug_enable = ~0; + s->acpi_pcihp_pci_status[bsel].device_present = 0; + + if (!bus) { + return; + } + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + DeviceState *qdev = kid->child; + PCIDevice *pdev = PCI_DEVICE(qdev); + int slot = PCI_SLOT(pdev->devfn); + + if (acpi_pcihp_pc_no_hotplug(s, pdev)) { + s->acpi_pcihp_pci_status[bsel].hotplug_enable &= ~(1U << slot); + } + + s->acpi_pcihp_pci_status[bsel].device_present |= (1U << slot); + } +} + +static void acpi_pcihp_update(AcpiPciHpState *s) +{ + int i; + + for (i = 0; i < ACPI_PCIHP_MAX_HOTPLUG_BUS; ++i) { + acpi_pcihp_update_hotplug_bus(s, i); + } +} + +void acpi_pcihp_reset(AcpiPciHpState *s) +{ + acpi_pcihp_update(s); +} + +static void enable_device(AcpiPciHpState *s, unsigned bsel, int slot) +{ + s->acpi_pcihp_pci_status[bsel].device_present |= (1U << slot); +} + +static void disable_device(AcpiPciHpState *s, unsigned bsel, int slot) +{ + s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); +} + +int acpi_pcihp_device_hotplug(AcpiPciHpState *s, PCIDevice *dev, + PCIHotplugState state) +{ + int slot = PCI_SLOT(dev->devfn); + int bsel = acpi_pcihp_get_bsel(dev->bus); + if (bsel < 0) { + return -1; + } + + /* Don't send event when device is enabled during qemu machine creation: + * it is present on boot, no hotplug event is necessary. We do send an + * event when the device is disabled later. */ + if (state == PCI_COLDPLUG_ENABLED) { + s->acpi_pcihp_pci_status[bsel].device_present |= (1U << slot); + return 0; + } + + if (state == PCI_HOTPLUG_ENABLED) { + enable_device(s, bsel, slot); + } else { + disable_device(s, bsel, slot); + } + + return 0; +} + +static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) +{ + AcpiPciHpState *s = opaque; + uint32_t val = 0; + int bsel = s->hotplug_select; + + if (bsel < 0 || bsel > ACPI_PCIHP_MAX_HOTPLUG_BUS) { + return 0; + } + + switch (addr) { + case PCI_UP_BASE - PCI_HOTPLUG_ADDR: + /* Manufacture an "up" value to cause a device check on any hotplug + * slot with a device. Extra device checks are harmless. */ + val = s->acpi_pcihp_pci_status[bsel].device_present & + s->acpi_pcihp_pci_status[bsel].hotplug_enable; + ACPI_PCIHP_DPRINTF("pci_up_read %" PRIu32 "\n", val); + break; + case PCI_DOWN_BASE - PCI_HOTPLUG_ADDR: + val = s->acpi_pcihp_pci_status[bsel].down; + ACPI_PCIHP_DPRINTF("pci_down_read %" PRIu32 "\n", val); + break; + case PCI_EJ_BASE - PCI_HOTPLUG_ADDR: + /* No feature defined yet */ + ACPI_PCIHP_DPRINTF("pci_features_read %" PRIu32 "\n", val); + break; + case PCI_RMV_BASE - PCI_HOTPLUG_ADDR: + val = s->acpi_pcihp_pci_status[bsel].hotplug_enable; + ACPI_PCIHP_DPRINTF("pci_rmv_read %" PRIu32 "\n", val); + break; + case PCI_SEL_BASE - PCI_HOTPLUG_ADDR: + val = s->hotplug_select; + ACPI_PCIHP_DPRINTF("pci_sel_read %" PRIu32 "\n", val); + default: + break; + } + + return val; +} + +static void pci_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + AcpiPciHpState *s = opaque; + switch (addr) { + case PCI_EJ_BASE - PCI_HOTPLUG_ADDR: + if (s->hotplug_select >= ACPI_PCIHP_MAX_HOTPLUG_BUS) { + break; + } + acpi_pcihp_eject_slot(s, s->hotplug_select, data); + ACPI_PCIHP_DPRINTF("pciej write %" HWADDR_PRIx " <== %" PRIu64 "\n", + addr, data); + break; + case PCI_SEL_BASE - PCI_HOTPLUG_ADDR: + s->hotplug_select = data; + ACPI_PCIHP_DPRINTF("pcisel write %" HWADDR_PRIx " <== %" PRIu64 "\n", + addr, data); + default: + break; + } +} + +static const MemoryRegionOps acpi_pcihp_io_ops = { + .read = pci_read, + .write = pci_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +void acpi_pcihp_init(AcpiPciHpState *s, PCIBus *root_bus, + MemoryRegion *address_space_io) +{ + s->root= root_bus; + memory_region_init_io(&s->io, NULL, &acpi_pcihp_io_ops, s, + "acpi-pci-hotplug", + PCI_HOTPLUG_SIZE); + memory_region_add_subregion(address_space_io, PCI_HOTPLUG_ADDR, &s->io); +} + +const VMStateDescription vmstate_acpi_pcihp_pci_status = { + .name = "acpi_pcihp_pci_status", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_UINT32(up, AcpiPciHpPciStatus), + VMSTATE_UINT32(down, AcpiPciHpPciStatus), + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 20353b983e..5d55a3c222 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -30,6 +30,8 @@ #include "hw/nvram/fw_cfg.h" #include "exec/address-spaces.h" #include "hw/acpi/piix4.h" +#include "hw/acpi/pcihp.h" +#include "hw/acpi/cpu_hotplug.h" //#define DEBUG @@ -49,21 +51,13 @@ #define PCI_EJ_BASE 0xae08 #define PCI_RMV_BASE 0xae0c -#define PIIX4_PROC_BASE 0xaf00 -#define PIIX4_PROC_LEN 32 - #define PIIX4_PCI_HOTPLUG_STATUS 2 -#define PIIX4_CPU_HOTPLUG_STATUS 4 struct pci_status { uint32_t up; /* deprecated, maintained for migration compatibility */ uint32_t down; }; -typedef struct CPUStatus { - uint8_t sts[PIIX4_PROC_LEN]; -} CPUStatus; - typedef struct PIIX4PMState { /*< private >*/ PCIDevice parent_obj; @@ -73,8 +67,6 @@ typedef struct PIIX4PMState { uint32_t io_base; MemoryRegion io_gpe; - MemoryRegion io_pci; - MemoryRegion io_cpu; ACPIREGS ar; APMState apm; @@ -88,16 +80,21 @@ typedef struct PIIX4PMState { Notifier machine_ready; Notifier powerdown_notifier; - /* for pci hotplug */ + /* for legacy pci hotplug (compatible with qemu 1.6 and older) */ + MemoryRegion io_pci; struct pci_status pci0_status; uint32_t pci0_hotplug_enable; uint32_t pci0_slot_device_present; + /* for new pci hotplug (with PCI2PCI bridge support) */ + AcpiPciHpState acpi_pci_hotplug; + bool use_acpi_pci_hotplug; + uint8_t disable_s3; uint8_t disable_s4; uint8_t s4_val; - CPUStatus gpe_cpu; + AcpiCpuHotplug gpe_cpu; Notifier cpu_added_notifier; } PIIX4PMState; @@ -263,6 +260,18 @@ static int acpi_load_old(QEMUFile *f, void *opaque, int version_id) return ret; } +static bool vmstate_test_use_acpi_pci_hotplug(void *opaque, int version_id) +{ + PIIX4PMState *s = opaque; + return s->use_acpi_pci_hotplug; +} + +static bool vmstate_test_no_use_acpi_pci_hotplug(void *opaque, int version_id) +{ + PIIX4PMState *s = opaque; + return !s->use_acpi_pci_hotplug; +} + /* qemu-kvm 1.2 uses version 3 but advertised as 2 * To support incoming qemu-kvm 1.2 migration, change version_id * and minimum_version_id to 2 below (which breaks migration from @@ -285,8 +294,12 @@ static const VMStateDescription vmstate_acpi = { VMSTATE_TIMER(ar.tmr.timer, PIIX4PMState), VMSTATE_INT64(ar.tmr.overflow_time, PIIX4PMState), VMSTATE_STRUCT(ar.gpe, PIIX4PMState, 2, vmstate_gpe, ACPIGPE), - VMSTATE_STRUCT(pci0_status, PIIX4PMState, 2, vmstate_pci_status, - struct pci_status), + VMSTATE_STRUCT_TEST(pci0_status, PIIX4PMState, + vmstate_test_no_use_acpi_pci_hotplug, + 2, vmstate_pci_status, + struct pci_status), + VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState, + vmstate_test_use_acpi_pci_hotplug), VMSTATE_END_OF_LIST() } }; @@ -364,7 +377,11 @@ static void piix4_reset(void *opaque) pci_conf[0x5B] = 0x02; } pm_io_space_update(s); - piix4_update_hotplug(s); + if (s->use_acpi_pci_hotplug) { + acpi_pcihp_reset(&s->acpi_pci_hotplug); + } else { + piix4_update_hotplug(s); + } } static void piix4_pm_powerdown_req(Notifier *n, void *opaque) @@ -375,6 +392,26 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&s->ar); } +static int piix4_acpi_pci_hotplug(DeviceState *qdev, PCIDevice *dev, + PCIHotplugState state) +{ + PIIX4PMState *s = PIIX4_PM(qdev); + int ret = acpi_pcihp_device_hotplug(&s->acpi_pci_hotplug, dev, state); + if (ret < 0) { + return ret; + } + s->ar.gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS; + + acpi_update_sci(&s->ar, s->irq); + return 0; +} + +static void piix4_update_bus_hotplug(PCIBus *bus, void *opaque) +{ + PIIX4PMState *s = opaque; + pci_bus_hotplug(bus, piix4_acpi_pci_hotplug, DEVICE(s)); +} + static void piix4_pm_machine_ready(Notifier *n, void *opaque) { PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready); @@ -388,6 +425,10 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque) pci_conf[0x63] = 0x60; pci_conf[0x67] = (memory_region_present(io_as, 0x3f8) ? 0x08 : 0) | (memory_region_present(io_as, 0x2f8) ? 0x90 : 0); + + if (s->use_acpi_pci_hotplug) { + pci_for_each_bus(d->bus, piix4_update_bus_hotplug, s); + } } static void piix4_pm_add_propeties(PIIX4PMState *s) @@ -509,6 +550,8 @@ static Property piix4_pm_properties[] = { DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + use_acpi_pci_hotplug, true), DEFINE_PROP_END_OF_LIST(), }; @@ -632,61 +675,13 @@ static const MemoryRegionOps piix4_pci_ops = { }, }; -static uint64_t cpu_status_read(void *opaque, hwaddr addr, unsigned int size) -{ - PIIX4PMState *s = opaque; - CPUStatus *cpus = &s->gpe_cpu; - uint64_t val = cpus->sts[addr]; - - return val; -} - -static void cpu_status_write(void *opaque, hwaddr addr, uint64_t data, - unsigned int size) -{ - /* TODO: implement VCPU removal on guest signal that CPU can be removed */ -} - -static const MemoryRegionOps cpu_hotplug_ops = { - .read = cpu_status_read, - .write = cpu_status_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 1, - }, -}; - -typedef enum { - PLUG, - UNPLUG, -} HotplugEventType; - -static void piix4_cpu_hotplug_req(PIIX4PMState *s, CPUState *cpu, - HotplugEventType action) -{ - CPUStatus *g = &s->gpe_cpu; - ACPIGPE *gpe = &s->ar.gpe; - CPUClass *k = CPU_GET_CLASS(cpu); - int64_t cpu_id; - - assert(s != NULL); - - *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS; - cpu_id = k->get_arch_id(CPU(cpu)); - if (action == PLUG) { - g->sts[cpu_id / 8] |= (1 << (cpu_id % 8)); - } else { - g->sts[cpu_id / 8] &= ~(1 << (cpu_id % 8)); - } - acpi_update_sci(&s->ar, s->irq); -} - static void piix4_cpu_added_req(Notifier *n, void *opaque) { PIIX4PMState *s = container_of(n, PIIX4PMState, cpu_added_notifier); - piix4_cpu_hotplug_req(s, CPU(opaque), PLUG); + assert(s != NULL); + AcpiCpuHotplug_add(&s->ar.gpe, &s->gpe_cpu, CPU(opaque)); + acpi_update_sci(&s->ar, s->irq); } static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, @@ -695,28 +690,22 @@ static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, PCIBus *bus, PIIX4PMState *s) { - CPUState *cpu; - memory_region_init_io(&s->io_gpe, OBJECT(s), &piix4_gpe_ops, s, "acpi-gpe0", GPE_LEN); memory_region_add_subregion(parent, GPE_BASE, &s->io_gpe); - memory_region_init_io(&s->io_pci, OBJECT(s), &piix4_pci_ops, s, - "acpi-pci-hotplug", PCI_HOTPLUG_SIZE); - memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR, - &s->io_pci); - pci_bus_hotplug(bus, piix4_device_hotplug, DEVICE(s)); - - CPU_FOREACH(cpu) { - CPUClass *cc = CPU_GET_CLASS(cpu); - int64_t id = cc->get_arch_id(cpu); - - g_assert((id / 8) < PIIX4_PROC_LEN); - s->gpe_cpu.sts[id / 8] |= (1 << (id % 8)); + if (s->use_acpi_pci_hotplug) { + acpi_pcihp_init(&s->acpi_pci_hotplug, bus, parent); + } else { + memory_region_init_io(&s->io_pci, OBJECT(s), &piix4_pci_ops, s, + "acpi-pci-hotplug", PCI_HOTPLUG_SIZE); + memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR, + &s->io_pci); + pci_bus_hotplug(bus, piix4_device_hotplug, DEVICE(s)); } - memory_region_init_io(&s->io_cpu, OBJECT(s), &cpu_hotplug_ops, s, - "acpi-cpu-hotplug", PIIX4_PROC_LEN); - memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu); + + AcpiCpuHotplug_init(parent, OBJECT(s), &s->gpe_cpu, + PIIX4_CPU_HOTPLUG_IO_BASE); s->cpu_added_notifier.notify = piix4_cpu_added_req; qemu_register_cpu_added_notifier(&s->cpu_added_notifier); } diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c index 07a43bfe89..986f2a9c92 100644 --- a/hw/audio/hda-codec.c +++ b/hw/audio/hda-codec.c @@ -559,6 +559,21 @@ static int hda_audio_post_load(void *opaque, int version) return 0; } +static void hda_audio_reset(DeviceState *dev) +{ + HDAAudioState *a = DO_UPCAST(HDAAudioState, hda.qdev, dev); + HDAAudioStream *st; + int i; + + dprint(a, 1, "%s\n", __func__); + for (i = 0; i < ARRAY_SIZE(a->st); i++) { + st = a->st + i; + if (st->node != NULL) { + hda_audio_set_running(st, false); + } + } +} + static const VMStateDescription vmstate_hda_audio_stream = { .name = "hda-audio-stream", .version_id = 1, @@ -640,6 +655,7 @@ static void hda_audio_output_class_init(ObjectClass *klass, void *data) k->stream = hda_audio_stream; set_bit(DEVICE_CATEGORY_SOUND, dc->categories); dc->desc = "HDA Audio Codec, output-only (line-out)"; + dc->reset = hda_audio_reset; dc->vmsd = &vmstate_hda_audio; dc->props = hda_audio_properties; } @@ -662,6 +678,7 @@ static void hda_audio_duplex_class_init(ObjectClass *klass, void *data) k->stream = hda_audio_stream; set_bit(DEVICE_CATEGORY_SOUND, dc->categories); dc->desc = "HDA Audio Codec, duplex (line-out, line-in)"; + dc->reset = hda_audio_reset; dc->vmsd = &vmstate_hda_audio; dc->props = hda_audio_properties; } @@ -684,6 +701,7 @@ static void hda_audio_micro_class_init(ObjectClass *klass, void *data) k->stream = hda_audio_stream; set_bit(DEVICE_CATEGORY_SOUND, dc->categories); dc->desc = "HDA Audio Codec, duplex (speaker, microphone)"; + dc->reset = hda_audio_reset; dc->vmsd = &vmstate_hda_audio; dc->props = hda_audio_properties; } diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 19d0961a47..8a568e5edb 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -731,7 +731,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, virtio_blk_save, virtio_blk_load, s); bdrv_set_dev_ops(s->bs, &virtio_block_ops, s); - bdrv_set_buffer_alignment(s->bs, s->conf->logical_block_size); + bdrv_set_guest_block_size(s->bs, s->conf->logical_block_size); bdrv_iostatus_enable(s->bs); diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index f0333a0cad..cb9d456814 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -495,7 +495,7 @@ static int xenfb_map_fb(struct XenFB *xenfb) munmap(map, n_fbdirs * XC_PAGE_SIZE); xenfb->pixels = xc_map_foreign_pages(xen_xc, xenfb->c.xendev.dom, - PROT_READ | PROT_WRITE, fbmfns, xenfb->fbpages); + PROT_READ, fbmfns, xenfb->fbpages); if (xenfb->pixels == NULL) goto out; @@ -903,6 +903,11 @@ static void fb_disconnect(struct XenDevice *xendev) fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + if (fb->pixels == MAP_FAILED) { + xen_be_printf(xendev, 0, + "Couldn't replace the framebuffer with anonymous memory errno=%d\n", + errno); + } common_unbind(&fb->c); fb->feature_update = 0; fb->bug_trigger = 0; diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 09ac433cf9..3df1612651 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -17,7 +17,7 @@ iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \ ifdef IASL #IASL Present. Generate hex files from .dsl hw/i386/%.hex: $(SRC_PATH)/hw/i386/%.dsl $(SRC_PATH)/scripts/acpi_extract_preprocess.py $(SRC_PATH)/scripts/acpi_extract.py - $(call quiet-command, cpp -P $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") + $(call quiet-command, cpp -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract_preprocess.py $*.dsl.i.orig > $*.dsl.i, " ACPI_PREPROCESS $(TARGET_DIR)$*.dsl.i") $(call quiet-command, $(IASL) $(call iasl-option,$(IASL),-Pn,) -vs -l -tc -p $* $*.dsl.i $(if $(V), , > /dev/null) 2>&1 ," IASL $(TARGET_DIR)$*.dsl.i") $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract.py $*.lst > $*.off, " ACPI_EXTRACT $(TARGET_DIR)$*.off") diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 48312f5a83..50e83f3b46 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -36,9 +36,11 @@ #include "hw/nvram/fw_cfg.h" #include "bios-linker-loader.h" #include "hw/loader.h" +#include "hw/isa/isa.h" /* Supported chipsets: */ #include "hw/acpi/piix4.h" +#include "hw/acpi/pcihp.h" #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" @@ -78,8 +80,15 @@ typedef struct AcpiMiscInfo { uint16_t pvpanic_port; } AcpiMiscInfo; +typedef struct AcpiBuildPciBusHotplugState { + GArray *device_table; + GArray *notify_table; + struct AcpiBuildPciBusHotplugState *parent; +} AcpiBuildPciBusHotplugState; + static void acpi_get_dsdt(AcpiMiscInfo *info) { + uint16_t *applesmc_sta; Object *piix = piix4_pm_find(); Object *lpc = ich9_lpc_find(); assert(!!piix != !!lpc); @@ -87,11 +96,17 @@ static void acpi_get_dsdt(AcpiMiscInfo *info) if (piix) { info->dsdt_code = AcpiDsdtAmlCode; info->dsdt_size = sizeof AcpiDsdtAmlCode; + applesmc_sta = piix_dsdt_applesmc_sta; } if (lpc) { info->dsdt_code = Q35AcpiDsdtAmlCode; info->dsdt_size = sizeof Q35AcpiDsdtAmlCode; + applesmc_sta = q35_dsdt_applesmc_sta; } + + /* Patch in appropriate value for AppleSMC _STA */ + *(uint8_t *)(info->dsdt_code + *applesmc_sta) = + applesmc_find() ? 0x0b : 0x00; } static @@ -171,38 +186,6 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) NULL); } -static void acpi_get_hotplug_info(AcpiMiscInfo *misc) -{ - int i; - PCIBus *bus = find_i440fx(); - - if (!bus) { - /* Only PIIX supports ACPI hotplug */ - memset(misc->slot_hotplug_enable, 0, sizeof misc->slot_hotplug_enable); - return; - } - - memset(misc->slot_hotplug_enable, 0xff, - DIV_ROUND_UP(PCI_SLOT_MAX, BITS_PER_BYTE)); - - for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { - PCIDeviceClass *pc; - PCIDevice *pdev = bus->devices[i]; - - if (!pdev) { - continue; - } - - pc = PCI_DEVICE_GET_CLASS(pdev); - - if (pc->no_hotplug) { - int slot = PCI_SLOT(i); - - clear_bit(slot, misc->slot_hotplug_enable); - } - } -} - static void acpi_get_misc_info(AcpiMiscInfo *info) { info->has_hpet = hpet_find(); @@ -368,6 +351,12 @@ static void build_package(GArray *package, uint8_t op, unsigned min_bytes) build_prepend_byte(package, op); } +static void build_extop_package(GArray *package, uint8_t op) +{ + build_package(package, op, 1); + build_prepend_byte(package, 0x5B); /* ExtOpPrefix */ +} + static void build_append_value(GArray *table, uint32_t value, int size) { uint8_t prefix; @@ -394,8 +383,44 @@ static void build_append_value(GArray *table, uint32_t value, int size) } } -static void build_append_notify_target(GArray *method, GArray *target_name, - uint32_t value, int size) +static void build_append_int(GArray *table, uint32_t value) +{ + if (value == 0x00) { + build_append_byte(table, 0x00); /* ZeroOp */ + } else if (value == 0x01) { + build_append_byte(table, 0x01); /* OneOp */ + } else if (value <= 0xFF) { + build_append_value(table, value, 1); + } else if (value <= 0xFFFFF) { + build_append_value(table, value, 2); + } else { + build_append_value(table, value, 4); + } +} + +static GArray *build_alloc_method(const char *name, uint8_t arg_count) +{ + GArray *method = build_alloc_array(); + + build_append_nameseg(method, "%s", name); + build_append_byte(method, arg_count); /* MethodFlags: ArgCount */ + + return method; +} + +static void build_append_and_cleanup_method(GArray *device, GArray *method) +{ + uint8_t op = 0x14; /* MethodOp */ + + build_package(method, op, 0); + + build_append_array(device, method); + build_free_array(method); +} + +static void build_append_notify_target_ifequal(GArray *method, + GArray *target_name, + uint32_t value, int size) { GArray *notify = build_alloc_array(); uint8_t op = 0xA0; /* IfOp */ @@ -415,6 +440,7 @@ static void build_append_notify_target(GArray *method, GArray *target_name, build_free_array(notify); } +/* End here */ #define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */ static inline void *acpi_data_push(GArray *table_data, unsigned size) @@ -624,44 +650,236 @@ static inline char acpi_get_hex(uint32_t val) #include "hw/i386/ssdt-pcihp.hex" static void -build_append_notify(GArray *device, const char *name, - const char *format, int skip, int count) +build_append_notify_method(GArray *device, const char *name, + const char *format, int count) { int i; - GArray *method = build_alloc_array(); - uint8_t op = 0x14; /* MethodOp */ + GArray *method = build_alloc_method(name, 2); - build_append_nameseg(method, "%s", name); - build_append_byte(method, 0x02); /* MethodFlags: ArgCount */ - for (i = skip; i < count; i++) { + for (i = 0; i < count; i++) { GArray *target = build_alloc_array(); build_append_nameseg(target, format, i); assert(i < 256); /* Fits in 1 byte */ - build_append_notify_target(method, target, i, 1); + build_append_notify_target_ifequal(method, target, i, 1); build_free_array(target); } - build_package(method, op, 2); - build_append_array(device, method); - build_free_array(method); + build_append_and_cleanup_method(device, method); } -static void patch_pcihp(int slot, uint8_t *ssdt_ptr, uint32_t eject) +static void patch_pcihp(int slot, uint8_t *ssdt_ptr) { - ssdt_ptr[ACPI_PCIHP_OFFSET_HEX] = acpi_get_hex(slot >> 4); - ssdt_ptr[ACPI_PCIHP_OFFSET_HEX + 1] = acpi_get_hex(slot); + unsigned devfn = PCI_DEVFN(slot, 0); + + ssdt_ptr[ACPI_PCIHP_OFFSET_HEX] = acpi_get_hex(devfn >> 4); + ssdt_ptr[ACPI_PCIHP_OFFSET_HEX + 1] = acpi_get_hex(devfn); ssdt_ptr[ACPI_PCIHP_OFFSET_ID] = slot; ssdt_ptr[ACPI_PCIHP_OFFSET_ADR + 2] = slot; +} + +/* Assign BSEL property to all buses. In the future, this can be changed + * to only assign to buses that support hotplug. + */ +static void *acpi_set_bsel(PCIBus *bus, void *opaque) +{ + unsigned *bsel_alloc = opaque; + unsigned *bus_bsel; + + if (bus->qbus.allow_hotplug) { + bus_bsel = g_malloc(sizeof *bus_bsel); - /* Runtime patching of ACPI_EJ0: to disable hotplug for a slot, - * replace the method name: _EJ0 by ACPI_EJ0_. + *bus_bsel = (*bsel_alloc)++; + object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, + bus_bsel, NULL); + } + + return bsel_alloc; +} + +static void acpi_set_pci_info(void) +{ + PCIBus *bus = find_i440fx(); /* TODO: Q35 support */ + unsigned bsel_alloc = 0; + + if (bus) { + /* Scan all PCI buses. Set property to enable acpi based hotplug. */ + pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &bsel_alloc); + } +} + +static void build_pci_bus_state_init(AcpiBuildPciBusHotplugState *state, + AcpiBuildPciBusHotplugState *parent) +{ + state->parent = parent; + state->device_table = build_alloc_array(); + state->notify_table = build_alloc_array(); +} + +static void build_pci_bus_state_cleanup(AcpiBuildPciBusHotplugState *state) +{ + build_free_array(state->device_table); + build_free_array(state->notify_table); +} + +static void *build_pci_bus_begin(PCIBus *bus, void *parent_state) +{ + AcpiBuildPciBusHotplugState *parent = parent_state; + AcpiBuildPciBusHotplugState *child = g_malloc(sizeof *child); + + build_pci_bus_state_init(child, parent); + + return child; +} + +static void build_pci_bus_end(PCIBus *bus, void *bus_state) +{ + AcpiBuildPciBusHotplugState *child = bus_state; + AcpiBuildPciBusHotplugState *parent = child->parent; + GArray *bus_table = build_alloc_array(); + DECLARE_BITMAP(slot_hotplug_enable, PCI_SLOT_MAX); + uint8_t op; + int i; + QObject *bsel; + GArray *method; + bool bus_hotplug_support = false; + + if (bus->parent_dev) { + op = 0x82; /* DeviceOp */ + build_append_nameseg(bus_table, "S%.02X_", + bus->parent_dev->devfn); + build_append_byte(bus_table, 0x08); /* NameOp */ + build_append_nameseg(bus_table, "_SUN"); + build_append_value(bus_table, PCI_SLOT(bus->parent_dev->devfn), 1); + build_append_byte(bus_table, 0x08); /* NameOp */ + build_append_nameseg(bus_table, "_ADR"); + build_append_value(bus_table, (PCI_SLOT(bus->parent_dev->devfn) << 16) | + PCI_FUNC(bus->parent_dev->devfn), 4); + } else { + op = 0x10; /* ScopeOp */; + build_append_nameseg(bus_table, "PCI0"); + } + + bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); + if (bsel) { + build_append_byte(bus_table, 0x08); /* NameOp */ + build_append_nameseg(bus_table, "BSEL"); + build_append_int(bus_table, qint_get_int(qobject_to_qint(bsel))); + + memset(slot_hotplug_enable, 0xff, sizeof slot_hotplug_enable); + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + PCIDeviceClass *pc; + PCIDevice *pdev = bus->devices[i]; + + if (!pdev) { + continue; + } + + pc = PCI_DEVICE_GET_CLASS(pdev); + + if (pc->no_hotplug || pc->is_bridge) { + int slot = PCI_SLOT(i); + + clear_bit(slot, slot_hotplug_enable); + } + } + + /* Append Device object for each slot which supports eject */ + for (i = 0; i < PCI_SLOT_MAX; i++) { + bool can_eject = test_bit(i, slot_hotplug_enable); + if (can_eject) { + void *pcihp = acpi_data_push(bus_table, + ACPI_PCIHP_SIZEOF); + memcpy(pcihp, ACPI_PCIHP_AML, ACPI_PCIHP_SIZEOF); + patch_pcihp(i, pcihp); + bus_hotplug_support = true; + } + } + + method = build_alloc_method("DVNT", 2); + + for (i = 0; i < PCI_SLOT_MAX; i++) { + GArray *notify; + uint8_t op; + + if (!test_bit(i, slot_hotplug_enable)) { + continue; + } + + notify = build_alloc_array(); + op = 0xA0; /* IfOp */ + + build_append_byte(notify, 0x7B); /* AndOp */ + build_append_byte(notify, 0x68); /* Arg0Op */ + build_append_int(notify, 0x1 << i); + build_append_byte(notify, 0x00); /* NullName */ + build_append_byte(notify, 0x86); /* NotifyOp */ + build_append_nameseg(notify, "S%.02X_", PCI_DEVFN(i, 0)); + build_append_byte(notify, 0x69); /* Arg1Op */ + + /* Pack it up */ + build_package(notify, op, 0); + + build_append_array(method, notify); + + build_free_array(notify); + } + + build_append_and_cleanup_method(bus_table, method); + } + + /* Append PCNT method to notify about events on local and child buses. + * Add unconditionally for root since DSDT expects it. */ - /* Sanity check */ - assert(!memcmp(ssdt_ptr + ACPI_PCIHP_OFFSET_EJ0, "_EJ0", 4)); + if (bus_hotplug_support || child->notify_table->len || !bus->parent_dev) { + method = build_alloc_method("PCNT", 0); + + /* If bus supports hotplug select it and notify about local events */ + if (bsel) { + build_append_byte(method, 0x70); /* StoreOp */ + build_append_int(method, qint_get_int(qobject_to_qint(bsel))); + build_append_nameseg(method, "BNUM"); + build_append_nameseg(method, "DVNT"); + build_append_nameseg(method, "PCIU"); + build_append_int(method, 1); /* Device Check */ + build_append_nameseg(method, "DVNT"); + build_append_nameseg(method, "PCID"); + build_append_int(method, 3); /* Eject Request */ + } + + /* Notify about child bus events in any case */ + build_append_array(method, child->notify_table); + + build_append_and_cleanup_method(bus_table, method); + + /* Append description of child buses */ + build_append_array(bus_table, child->device_table); + + /* Pack it up */ + if (bus->parent_dev) { + build_extop_package(bus_table, op); + } else { + build_package(bus_table, op, 0); + } - if (!eject) { - memcpy(ssdt_ptr + ACPI_PCIHP_OFFSET_EJ0, "EJ0_", 4); + /* Append our bus description to parent table */ + build_append_array(parent->device_table, bus_table); + + /* Also tell parent how to notify us, invoking PCNT method. + * At the moment this is not needed for root as we have a single root. + */ + if (bus->parent_dev) { + build_append_byte(parent->notify_table, '^'); /* ParentPrefixChar */ + build_append_byte(parent->notify_table, 0x2E); /* DualNamePrefix */ + build_append_nameseg(parent->notify_table, "S%.02X_", + bus->parent_dev->devfn); + build_append_nameseg(parent->notify_table, "PCNT"); + } } + + build_free_array(bus_table); + build_pci_bus_state_cleanup(child); + g_free(child); } static void patch_pci_windows(PcPciInfo *pci, uint8_t *start, unsigned size) @@ -733,7 +951,7 @@ build_ssdt(GArray *table_data, GArray *linker, * Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...} */ /* Arg0 = Processor ID = APIC ID */ - build_append_notify(sb_scope, "NTFY", "CP%0.02X", 0, acpi_cpus); + build_append_notify_method(sb_scope, "NTFY", "CP%0.02X", acpi_cpus); /* build "Name(CPON, Package() { One, One, ..., Zero, Zero, ... })" */ build_append_byte(sb_scope, 0x08); /* NameOp */ @@ -755,24 +973,19 @@ build_ssdt(GArray *table_data, GArray *linker, } { - GArray *pci0 = build_alloc_array(); - uint8_t op = 0x10; /* ScopeOp */; + AcpiBuildPciBusHotplugState hotplug_state; + PCIBus *bus = find_i440fx(); /* TODO: Q35 support */ - build_append_nameseg(pci0, "PCI0"); + build_pci_bus_state_init(&hotplug_state, NULL); - /* build Device object for each slot */ - for (i = 1; i < PCI_SLOT_MAX; i++) { - bool eject = test_bit(i, misc->slot_hotplug_enable); - void *pcihp = acpi_data_push(pci0, ACPI_PCIHP_SIZEOF); - - memcpy(pcihp, ACPI_PCIHP_AML, ACPI_PCIHP_SIZEOF); - patch_pcihp(i, pcihp, eject); + if (bus) { + /* Scan all PCI buses. Generate tables to support hotplug. */ + pci_for_each_bus_depth_first(bus, build_pci_bus_begin, + build_pci_bus_end, &hotplug_state); } - build_append_notify(pci0, "PCNT", "S%0.02X_", 1, PCI_SLOT_MAX); - build_package(pci0, op, 3); - build_append_array(sb_scope, pci0); - build_free_array(pci0); + build_append_array(sb_scope, hotplug_state.device_table); + build_pci_bus_state_cleanup(&hotplug_state); } build_package(sb_scope, op, 3); @@ -867,16 +1080,16 @@ build_srat(GArray *table_data, GArray *linker, next_base = mem_base + mem_len; /* Cut out the ACPI_PCI hole */ - if (mem_base <= guest_info->ram_size && - next_base > guest_info->ram_size) { - mem_len -= next_base - guest_info->ram_size; + if (mem_base <= guest_info->ram_size_below_4g && + next_base > guest_info->ram_size_below_4g) { + mem_len -= next_base - guest_info->ram_size_below_4g; if (mem_len > 0) { numamem = acpi_data_push(table_data, sizeof *numamem); acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); } mem_base = 1ULL << 32; - mem_len = next_base - guest_info->ram_size; - next_base += (1ULL << 32) - guest_info->ram_size; + mem_len = next_base - guest_info->ram_size_below_4g; + next_base += (1ULL << 32) - guest_info->ram_size_below_4g; } numamem = acpi_data_push(table_data, sizeof *numamem); acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, 1); @@ -1055,7 +1268,6 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) acpi_get_cpu_info(&cpu); acpi_get_pm_info(&pm); acpi_get_dsdt(&misc); - acpi_get_hotplug_info(&misc); acpi_get_misc_info(&misc); acpi_get_pci_info(&pci); @@ -1200,6 +1412,8 @@ void acpi_setup(PcGuestInfo *guest_info) build_state->guest_info = guest_info; + acpi_set_pci_info(); + acpi_build_tables_init(&tables); acpi_build(build_state->guest_info, &tables); diff --git a/hw/i386/acpi-dsdt-cpu-hotplug.dsl b/hw/i386/acpi-dsdt-cpu-hotplug.dsl index 995b415bae..dee4843cde 100644 --- a/hw/i386/acpi-dsdt-cpu-hotplug.dsl +++ b/hw/i386/acpi-dsdt-cpu-hotplug.dsl @@ -16,6 +16,7 @@ /**************************************************************** * CPU hotplug ****************************************************************/ +#define CPU_HOTPLUG_RESOURCE_DEVICE PRES Scope(\_SB) { /* Objects filled in by run-time generated SSDT */ @@ -52,7 +53,8 @@ Scope(\_SB) { Sleep(200) } - OperationRegion(PRST, SystemIO, 0xaf00, 32) +#define CPU_STATUS_LEN ACPI_GPE_PROC_LEN + OperationRegion(PRST, SystemIO, CPU_STATUS_BASE, CPU_STATUS_LEN) Field(PRST, ByteAcc, NoLock, Preserve) { PRS, 256 } @@ -89,4 +91,14 @@ Scope(\_SB) { Increment(Local0) } } + + Device(CPU_HOTPLUG_RESOURCE_DEVICE) { + Name(_HID, "ACPI0004") + + Name(_CRS, ResourceTemplate() { + IO(Decode16, CPU_STATUS_BASE, CPU_STATUS_BASE, 0, CPU_STATUS_LEN) + }) + + Name(_STA, 0xB) /* present, functioning, decoding, not shown in UI */ + } } diff --git a/hw/i386/acpi-dsdt-isa.dsl b/hw/i386/acpi-dsdt-isa.dsl index 89caa1649d..deb37de92e 100644 --- a/hw/i386/acpi-dsdt-isa.dsl +++ b/hw/i386/acpi-dsdt-isa.dsl @@ -16,6 +16,17 @@ /* Common legacy ISA style devices. */ Scope(\_SB.PCI0.ISA) { + Device (SMC) { + Name(_HID, EisaId("APP0001")) + /* _STA will be patched to 0x0B if AppleSMC is present */ + ACPI_EXTRACT_NAME_BYTE_CONST DSDT_APPLESMC_STA + Name(_STA, 0xF0) + Name(_CRS, ResourceTemplate () { + IO (Decode16, 0x0300, 0x0300, 0x01, 0x20) + IRQNoFlags() { 6 } + }) + } + Device(RTC) { Name(_HID, EisaId("PNP0B00")) Name(_CRS, ResourceTemplate() { diff --git a/hw/i386/acpi-dsdt-pci-crs.dsl b/hw/i386/acpi-dsdt-pci-crs.dsl index b375a19cf6..4648e90366 100644 --- a/hw/i386/acpi-dsdt-pci-crs.dsl +++ b/hw/i386/acpi-dsdt-pci-crs.dsl @@ -30,20 +30,7 @@ Scope(\_SB.PCI0) { 0x01, // Address Alignment 0x08, // Address Length ) - WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, - 0x0000, // Address Space Granularity - 0x0000, // Address Range Minimum - 0x0CF7, // Address Range Maximum - 0x0000, // Address Translation Offset - 0x0CF8, // Address Length - ,, , TypeStatic) - WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, - 0x0000, // Address Space Granularity - 0x0D00, // Address Range Minimum - 0xFFFF, // Address Range Maximum - 0x0000, // Address Translation Offset - 0xF300, // Address Length - ,, , TypeStatic) + BOARD_SPECIFIC_PCI_RESOURSES DWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, 0x00000000, // Address Space Granularity 0x000A0000, // Address Range Minimum diff --git a/hw/i386/acpi-dsdt.dsl b/hw/i386/acpi-dsdt.dsl index a377424f39..b23d5e0eac 100644 --- a/hw/i386/acpi-dsdt.dsl +++ b/hw/i386/acpi-dsdt.dsl @@ -35,6 +35,45 @@ DefinitionBlock ( /**************************************************************** * PCI Bus definition ****************************************************************/ +#define BOARD_SPECIFIC_PCI_RESOURSES \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0x0000, \ + 0x0CF7, \ + 0x0000, \ + 0x0CF8, \ + ,, , TypeStatic) \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0x0D00, \ + 0xADFF, \ + 0x0000, \ + 0xA100, \ + ,, , TypeStatic) \ + /* 0xae00-0xae0e hole for PCI hotplug, hw/acpi/piix4.c:PCI_HOTPLUG_ADDR */ \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0xAE0F, \ + 0xAEFF, \ + 0x0000, \ + 0x00F1, \ + ,, , TypeStatic) \ + /* 0xaf00-0xaf1f hole for CPU hotplug, hw/acpi/piix4.c:PIIX4_PROC_BASE */ \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0xAF20, \ + 0xAFDF, \ + 0x0000, \ + 0x00C0, \ + ,, , TypeStatic) \ + /* 0xafe0-0xafe3 hole for ACPI.GPE0, hw/acpi/piix4.c:GPE_BASE */ \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0xAFE4, \ + 0xFFFF, \ + 0x0000, \ + 0x501C, \ + ,, , TypeStatic) Scope(\_SB) { Device(PCI0) { @@ -114,6 +153,7 @@ DefinitionBlock ( } } +#define DSDT_APPLESMC_STA piix_dsdt_applesmc_sta #include "acpi-dsdt-isa.dsl" @@ -133,32 +173,28 @@ DefinitionBlock ( B0EJ, 32, } + OperationRegion(BNMR, SystemIO, 0xae10, 0x04) + Field(BNMR, DWordAcc, NoLock, WriteAsZeros) { + BNUM, 32, + } + + /* Lock to protect access to fields above. */ + Mutex(BLCK, 0) + /* Methods called by bulk generated PCI devices below */ /* Methods called by hotplug devices */ - Method(PCEJ, 1, NotSerialized) { + Method(PCEJ, 2, NotSerialized) { // _EJ0 method - eject callback - Store(ShiftLeft(1, Arg0), B0EJ) + Acquire(BLCK, 0xFFFF) + Store(Arg0, BNUM) + Store(ShiftLeft(1, Arg1), B0EJ) + Release(BLCK) Return (0x0) } /* Hotplug notification method supplied by SSDT */ External(\_SB.PCI0.PCNT, MethodObj) - - /* PCI hotplug notify method */ - Method(PCNF, 0) { - // Local0 = iterator - Store(Zero, Local0) - While (LLess(Local0, 31)) { - Increment(Local0) - If (And(PCIU, ShiftLeft(1, Local0))) { - PCNT(Local0, 1) - } - If (And(PCID, ShiftLeft(1, Local0))) { - PCNT(Local0, 3) - } - } - } } @@ -293,6 +329,8 @@ DefinitionBlock ( } } +#include "hw/acpi/cpu_hotplug_defs.h" +#define CPU_STATUS_BASE PIIX4_CPU_HOTPLUG_IO_BASE #include "acpi-dsdt-cpu-hotplug.dsl" @@ -307,7 +345,9 @@ DefinitionBlock ( } Method(_E01) { // PCI hotplug event - \_SB.PCI0.PCNF() + Acquire(\_SB.PCI0.BLCK, 0xFFFF) + \_SB.PCI0.PCNT() + Release(\_SB.PCI0.BLCK) } Method(_E02) { // CPU hotplug event diff --git a/hw/i386/acpi-dsdt.hex.generated b/hw/i386/acpi-dsdt.hex.generated index f8bd4ea1b5..1e58801b2a 100644 --- a/hw/i386/acpi-dsdt.hex.generated +++ b/hw/i386/acpi-dsdt.hex.generated @@ -3,12 +3,12 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x53, 0x44, 0x54, -0x37, +0x87, 0x11, 0x0, 0x0, 0x1, -0xd8, +0xb8, 0x42, 0x58, 0x50, @@ -860,8 +860,8 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x4e, 0x1, 0x10, -0x4c, -0x1b, +0x4b, +0x1e, 0x2f, 0x3, 0x5f, @@ -879,6 +879,53 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x5b, 0x82, 0x2d, +0x53, +0x4d, +0x43, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x6, +0x10, +0x0, +0x1, +0x8, +0x5f, +0x53, +0x54, +0x41, +0xb, +0x0, +0xff, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0x0, +0x3, +0x0, +0x3, +0x1, +0x20, +0x22, +0x40, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x2d, 0x52, 0x54, 0x43, @@ -1305,7 +1352,7 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x79, 0x0, 0x10, -0x4b, +0x48, 0x8, 0x2e, 0x5f, @@ -1371,79 +1418,76 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x45, 0x4a, 0x20, +0x5b, +0x80, +0x42, +0x4e, +0x4d, +0x52, +0x1, +0xb, +0x10, +0xae, +0xa, +0x4, +0x5b, +0x81, +0xb, +0x42, +0x4e, +0x4d, +0x52, +0x43, +0x42, +0x4e, +0x55, +0x4d, +0x20, +0x5b, +0x1, +0x42, +0x4c, +0x43, +0x4b, +0x0, 0x14, -0x11, +0x25, 0x50, 0x43, 0x45, 0x4a, -0x1, +0x2, +0x5b, +0x23, +0x42, +0x4c, +0x43, +0x4b, +0xff, +0xff, 0x70, -0x79, -0x1, 0x68, -0x0, 0x42, -0x30, -0x45, -0x4a, -0xa4, -0x0, -0x14, -0x36, -0x50, -0x43, 0x4e, -0x46, -0x0, -0x70, -0x0, -0x60, -0xa2, -0x2c, -0x95, -0x60, -0xa, -0x1f, -0x75, -0x60, -0xa0, -0x11, -0x7b, -0x50, -0x43, -0x49, 0x55, +0x4d, +0x70, 0x79, 0x1, -0x60, +0x69, 0x0, -0x0, -0x50, -0x43, -0x4e, -0x54, -0x60, -0x1, -0xa0, -0x12, -0x7b, -0x50, +0x42, +0x30, +0x45, +0x4a, +0x5b, +0x27, +0x42, +0x4c, 0x43, -0x49, -0x44, -0x79, -0x1, -0x60, -0x0, +0x4b, +0xa4, 0x0, -0x50, -0x43, -0x4e, -0x54, -0x60, -0xa, -0x3, 0x10, 0x4a, 0xa0, @@ -4248,8 +4292,8 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x75, 0x60, 0x10, -0x4e, -0x9, +0x42, +0xc, 0x5f, 0x47, 0x50, @@ -4277,12 +4321,31 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x30, 0x0, 0x14, -0x15, +0x39, 0x5f, 0x45, 0x30, 0x31, 0x0, +0x5b, +0x23, +0x5c, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x42, +0x4c, +0x43, +0x4b, +0xff, +0xff, 0x5c, 0x2f, 0x3, @@ -4297,7 +4360,24 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x50, 0x43, 0x4e, -0x46, +0x54, +0x5b, +0x27, +0x5c, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x42, +0x4c, +0x43, +0x4b, 0x14, 0x10, 0x5f, @@ -4407,3 +4487,6 @@ static unsigned char AcpiDsdtAmlCode[] = { 0x46, 0x0 }; +static unsigned short piix_dsdt_applesmc_sta[] = { +0x384 +}; diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 6f0be37d8b..348b15f267 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1072,6 +1072,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, PcGuestInfo *guest_info = &guest_info_state->info; int i, j; + guest_info->ram_size_below_4g = below_4g_mem_size; guest_info->ram_size = below_4g_mem_size + above_4g_mem_size; guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); guest_info->apic_xrupt_override = kvm_allows_irq0_override(); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 276641436e..a327d71fb1 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -393,6 +393,10 @@ static QEMUMachine pc_i440fx_machine_v1_7 = { PC_I440FX_1_7_MACHINE_OPTIONS, .name = "pc-i440fx-1.7", .init = pc_init_pci_1_7, + .compat_props = (GlobalProperty[]) { + PC_COMPAT_1_7, + { /* end of list */ } + }, }; #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 07f38ff704..a7f626096a 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -51,6 +51,11 @@ static bool has_pci_info; static bool has_acpi_build = true; static bool smbios_type1_defaults = true; +/* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to + * host addresses aligned at 1Gbyte boundaries. This way we can use 1GByte + * pages in the host. + */ +static bool gigabyte_align = true; /* PC hardware initialisation */ static void pc_q35_init(QEMUMachineInitArgs *args) @@ -92,9 +97,19 @@ static void pc_q35_init(QEMUMachineInitArgs *args) kvmclock_create(); + /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory + * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping + * also known as MMCFG). + * If it doesn't, we need to split it in chunks below and above 4G. + * In any case, try to make sure that guest addresses aligned at + * 1G boundaries get mapped to host addresses aligned at 1G boundaries. + * For old machine types, use whatever split we used historically to avoid + * breaking migration. + */ if (args->ram_size >= 0xb0000000) { - above_4g_mem_size = args->ram_size - 0xb0000000; - below_4g_mem_size = 0xb0000000; + ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000; + above_4g_mem_size = args->ram_size - lowmem; + below_4g_mem_size = lowmem; } else { above_4g_mem_size = 0; below_4g_mem_size = args->ram_size; @@ -228,6 +243,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) static void pc_compat_1_7(QEMUMachineInitArgs *args) { smbios_type1_defaults = false; + gigabyte_align = false; } static void pc_compat_1_6(QEMUMachineInitArgs *args) diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl index 7934a9ddfb..d618e9e2d2 100644 --- a/hw/i386/q35-acpi-dsdt.dsl +++ b/hw/i386/q35-acpi-dsdt.dsl @@ -48,6 +48,22 @@ DefinitionBlock ( /**************************************************************** * PCI Bus definition ****************************************************************/ +#define BOARD_SPECIFIC_PCI_RESOURSES \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0x0000, \ + 0x0CD7, \ + 0x0000, \ + 0x0CD8, \ + ,, , TypeStatic) \ + /* 0xcd8-0xcf7 hole for CPU hotplug, hw/acpi/ich9.c:ICH9_PROC_BASE */ \ + WordIO(ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, \ + 0x0000, \ + 0x0D00, \ + 0xFFFF, \ + 0x0000, \ + 0xF300, \ + ,, , TypeStatic) Scope(\_SB) { Device(PCI0) { @@ -171,6 +187,7 @@ DefinitionBlock ( } } +#define DSDT_APPLESMC_STA q35_dsdt_applesmc_sta #include "acpi-dsdt-isa.dsl" @@ -404,6 +421,8 @@ DefinitionBlock ( define_gsi_link(GSIH, 0, 0x17) } +#include "hw/acpi/cpu_hotplug_defs.h" +#define CPU_STATUS_BASE ICH9_CPU_HOTPLUG_IO_BASE #include "acpi-dsdt-cpu-hotplug.dsl" diff --git a/hw/i386/q35-acpi-dsdt.hex.generated b/hw/i386/q35-acpi-dsdt.hex.generated index 111ad3e9c2..6d885a9055 100644 --- a/hw/i386/q35-acpi-dsdt.hex.generated +++ b/hw/i386/q35-acpi-dsdt.hex.generated @@ -3,12 +3,12 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x53, 0x44, 0x54, -0xb0, +0xdf, 0x1c, 0x0, 0x0, 0x1, -0xfe, +0xff, 0x42, 0x58, 0x50, @@ -1033,8 +1033,8 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x4e, 0x1, 0x10, -0x4c, -0x1b, +0x4b, +0x1e, 0x2f, 0x3, 0x5f, @@ -1052,6 +1052,53 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x5b, 0x82, 0x2d, +0x53, +0x4d, +0x43, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x6, +0x10, +0x0, +0x1, +0x8, +0x5f, +0x53, +0x54, +0x41, +0xb, +0x0, +0xff, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0x0, +0x3, +0x0, +0x3, +0x1, +0x20, +0x22, +0x40, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x2d, 0x52, 0x54, 0x43, @@ -7229,12 +7276,19 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x30, 0x0, 0x14, -0x10, +0x6, 0x5f, 0x4c, 0x30, 0x31, 0x0, +0x14, +0x10, +0x5f, +0x45, +0x30, +0x32, +0x0, 0x5c, 0x2e, 0x5f, @@ -7250,13 +7304,6 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x5f, 0x4c, 0x30, -0x32, -0x0, -0x14, -0x6, -0x5f, -0x4c, -0x30, 0x33, 0x0, 0x14, @@ -7344,3 +7391,6 @@ static unsigned char Q35AcpiDsdtAmlCode[] = { 0x46, 0x0 }; +static unsigned short q35_dsdt_applesmc_sta[] = { +0x431 +}; diff --git a/hw/i386/ssdt-pcihp.dsl b/hw/i386/ssdt-pcihp.dsl index d29a5b95d2..cc245c3e7c 100644 --- a/hw/i386/ssdt-pcihp.dsl +++ b/hw/i386/ssdt-pcihp.dsl @@ -25,6 +25,7 @@ DefinitionBlock ("ssdt-pcihp.aml", "SSDT", 0x01, "BXPC", "BXSSDTPCIHP", 0x1) /* Objects supplied by DSDT */ External(\_SB.PCI0, DeviceObj) External(\_SB.PCI0.PCEJ, MethodObj) + External(BSEL, IntObj) Scope(\_SB.PCI0) { @@ -33,19 +34,17 @@ DefinitionBlock ("ssdt-pcihp.aml", "SSDT", 0x01, "BXPC", "BXSSDTPCIHP", 0x1) ACPI_EXTRACT_DEVICE_END ssdt_pcihp_end ACPI_EXTRACT_DEVICE_STRING ssdt_pcihp_name - // Method _EJ0 can be patched by BIOS to EJ0_ - // at runtime, if the slot is detected to not support hotplug. - // Extract the offset of the address dword and the - // _EJ0 name to allow this patching. + // Extract the offsets of the device name, address dword and the slot + // name byte - we fill them in for each device. Device(SAA) { ACPI_EXTRACT_NAME_BYTE_CONST ssdt_pcihp_id Name(_SUN, 0xAA) ACPI_EXTRACT_NAME_DWORD_CONST ssdt_pcihp_adr Name(_ADR, 0xAA0000) - ACPI_EXTRACT_METHOD_STRING ssdt_pcihp_ej0 Method(_EJ0, 1) { - Return (PCEJ(_SUN)) + PCEJ(BSEL, _SUN) } } + } } diff --git a/hw/i386/ssdt-pcihp.hex.generated b/hw/i386/ssdt-pcihp.hex.generated index b3c2cd5cf9..610a631fd1 100644 --- a/hw/i386/ssdt-pcihp.hex.generated +++ b/hw/i386/ssdt-pcihp.hex.generated @@ -5,19 +5,19 @@ static unsigned char ssdt_pcihp_adr[] = { 0x44 }; static unsigned char ssdt_pcihp_end[] = { -0x58 +0x5b }; static unsigned char ssdp_pcihp_aml[] = { 0x53, 0x53, 0x44, 0x54, -0x58, +0x5b, 0x0, 0x0, 0x0, 0x1, -0x76, +0xe8, 0x42, 0x58, 0x50, @@ -45,7 +45,7 @@ static unsigned char ssdp_pcihp_aml[] = { 0x13, 0x20, 0x10, -0x33, +0x36, 0x5c, 0x2e, 0x5f, @@ -58,7 +58,7 @@ static unsigned char ssdp_pcihp_aml[] = { 0x30, 0x5b, 0x82, -0x26, +0x29, 0x53, 0x41, 0x41, @@ -81,17 +81,20 @@ static unsigned char ssdp_pcihp_aml[] = { 0xaa, 0x0, 0x14, -0xf, +0x12, 0x5f, 0x45, 0x4a, 0x30, 0x1, -0xa4, 0x50, 0x43, 0x45, 0x4a, +0x42, +0x53, +0x45, +0x4c, 0x5f, 0x53, 0x55, @@ -103,6 +106,3 @@ static unsigned char ssdt_pcihp_start[] = { static unsigned char ssdt_pcihp_id[] = { 0x3d }; -static unsigned char ssdt_pcihp_ej0[] = { -0x4a -}; diff --git a/hw/i386/ssdt-proc.hex.generated b/hw/i386/ssdt-proc.hex.generated index bb9920d3c9..97e28d4820 100644 --- a/hw/i386/ssdt-proc.hex.generated +++ b/hw/i386/ssdt-proc.hex.generated @@ -11,7 +11,7 @@ static unsigned char ssdp_proc_aml[] = { 0x0, 0x0, 0x1, -0xb8, +0x78, 0x42, 0x58, 0x50, @@ -47,8 +47,8 @@ static unsigned char ssdp_proc_aml[] = { 0x41, 0x41, 0xaa, -0x10, -0xb0, +0x0, +0x0, 0x0, 0x0, 0x0, diff --git a/hw/ide/core.c b/hw/ide/core.c index e1f4c33fb8..e1dfe54df6 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1321,6 +1321,7 @@ static bool cmd_exec_dev_diagnostic(IDEState *s, uint8_t cmd) s->status = 0; /* ATAPI spec (v6) section 9.10 defines packet * devices to return a clear status register * with READY_STAT *not* set. */ + s->error = 0x01; } else { s->status = READY_STAT | SEEK_STAT; /* The bits of the error register are not as usual for this command! @@ -2103,7 +2104,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind, s->smart_selftest_count = 0; if (kind == IDE_CD) { bdrv_set_dev_ops(bs, &ide_cd_block_ops, s); - bdrv_set_buffer_alignment(bs, 2048); + bdrv_set_guest_block_size(bs, 2048); } else { if (!bdrv_is_inserted(s->bs)) { error_report("Device needs media, but drive is empty"); diff --git a/hw/misc/applesmc.c b/hw/misc/applesmc.c index 1e8d183e7f..627adb97c9 100644 --- a/hw/misc/applesmc.c +++ b/hw/misc/applesmc.c @@ -66,7 +66,6 @@ struct AppleSMCData { QLIST_ENTRY(AppleSMCData) node; }; -#define TYPE_APPLE_SMC "isa-applesmc" #define APPLE_SMC(obj) OBJECT_CHECK(AppleSMCState, (obj), TYPE_APPLE_SMC) typedef struct AppleSMCState AppleSMCState; diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c index 2315f996d4..e528290b41 100644 --- a/hw/net/lan9118.c +++ b/hw/net/lan9118.c @@ -727,14 +727,14 @@ static void tx_fifo_push(lan9118_state *s, uint32_t val) s->txp->cmd_a = val & 0x831f37ff; s->txp->fifo_used++; s->txp->state = TX_B; + s->txp->buffer_size = extract32(s->txp->cmd_a, 0, 11); + s->txp->offset = extract32(s->txp->cmd_a, 16, 5); break; case TX_B: if (s->txp->cmd_a & 0x2000) { /* First segment */ s->txp->cmd_b = val; s->txp->fifo_used++; - s->txp->buffer_size = s->txp->cmd_a & 0x7ff; - s->txp->offset = (s->txp->cmd_a >> 16) & 0x1f; /* End alignment does not include command words. */ n = (s->txp->buffer_size + s->txp->offset + 3) >> 2; switch ((n >> 24) & 3) { @@ -763,7 +763,7 @@ static void tx_fifo_push(lan9118_state *s, uint32_t val) if (s->txp->buffer_size <= 0 && s->txp->pad != 0) { s->txp->pad--; } else { - n = 4; + n = MIN(4, s->txp->buffer_size + s->txp->offset); while (s->txp->offset) { val >>= 8; n--; diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 006576db31..854997d9ba 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -321,7 +321,7 @@ void vhost_net_ack_features(struct vhost_net *net, unsigned features) bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) { - return -ENOSYS; + return false; } void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, diff --git a/hw/pci/pci.c b/hw/pci/pci.c index aa2a395499..1221f32847 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -793,6 +793,15 @@ static void pci_config_free(PCIDevice *pci_dev) g_free(pci_dev->used); } +static void do_pci_unregister_device(PCIDevice *pci_dev) +{ + pci_dev->bus->devices[pci_dev->devfn] = NULL; + pci_config_free(pci_dev); + + address_space_destroy(&pci_dev->bus_master_as); + memory_region_destroy(&pci_dev->bus_master_enable_region); +} + /* -1 for devfn means auto assign */ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, const char *name, int devfn) @@ -858,7 +867,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, pci_init_mask_bridge(pci_dev); } if (pci_init_multifunction(bus, pci_dev)) { - pci_config_free(pci_dev); + do_pci_unregister_device(pci_dev); return NULL; } @@ -873,15 +882,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, return pci_dev; } -static void do_pci_unregister_device(PCIDevice *pci_dev) -{ - pci_dev->bus->devices[pci_dev->devfn] = NULL; - pci_config_free(pci_dev); - - address_space_destroy(&pci_dev->bus_master_as); - memory_region_destroy(&pci_dev->bus_master_enable_region); -} - static void pci_unregister_io_regions(PCIDevice *pci_dev) { PCIIORegion *r; @@ -1704,6 +1704,34 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) return NULL; } +void pci_for_each_bus_depth_first(PCIBus *bus, + void *(*begin)(PCIBus *bus, void *parent_state), + void (*end)(PCIBus *bus, void *state), + void *parent_state) +{ + PCIBus *sec; + void *state; + + if (!bus) { + return; + } + + if (begin) { + state = begin(bus, parent_state); + } else { + state = parent_state; + } + + QLIST_FOREACH(sec, &bus->child, sibling) { + pci_for_each_bus_depth_first(sec, begin, end, state); + } + + if (end) { + end(bus, state); + } +} + + PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) { bus = pci_find_bus_nr(bus, bus_num); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 3496c0bbd8..50b89ad4aa 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -469,6 +469,8 @@ static int32_t scsi_target_send_command(SCSIRequest *req, uint8_t *buf) r->req.dev->sense_is_ua = false; } break; + case TEST_UNIT_READY: + break; default: scsi_req_build_sense(req, SENSE_CODE(LUN_NOT_SUPPORTED)); scsi_req_complete(req, CHECK_CONDITION); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bce617cb93..a8d0f15ebe 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2254,7 +2254,7 @@ static int scsi_initfn(SCSIDevice *dev) } else { bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_block_ops, s); } - bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize); + bdrv_set_guest_block_size(s->qdev.conf.bs, s->qdev.blocksize); bdrv_iostatus_enable(s->qdev.conf.bs); add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, NULL); @@ -2306,6 +2306,7 @@ static const SCSIReqOps scsi_disk_emulate_reqops = { .send_command = scsi_disk_emulate_command, .read_data = scsi_disk_emulate_read_data, .write_data = scsi_disk_emulate_write_data, + .cancel_io = scsi_cancel_io, .get_buf = scsi_get_buf, }; diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 8f195bec00..f08b64e177 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -210,7 +210,7 @@ static void scsi_read_complete(void * opaque, int ret) s->blocksize = ldl_be_p(&r->buf[8]); s->max_lba = ldq_be_p(&r->buf[0]); } - bdrv_set_buffer_alignment(s->conf.bs, s->blocksize); + bdrv_set_guest_block_size(s->conf.bs, s->blocksize); scsi_req_data(&r->req, len); if (!r->req.io_canceled) { diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 6dcdd1b91c..6610b3aab3 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -306,6 +306,10 @@ static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status, VirtIOSCSIReq *req = r->hba_private; uint32_t sense_len; + if (r->io_canceled) { + return; + } + req->resp.cmd->response = VIRTIO_SCSI_S_OK; req->resp.cmd->status = status; if (req->resp.cmd->status == GOOD) { @@ -516,7 +520,7 @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, evt->event = event; evt->reason = reason; if (!dev) { - assert(event == VIRTIO_SCSI_T_NO_EVENT); + assert(event == VIRTIO_SCSI_T_EVENTS_MISSED); } else { evt->lun[0] = 1; evt->lun[1] = dev->id; diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index a3eac3e5c1..97b457541f 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -1,5 +1,5 @@ # usb subsystem core -common-obj-y += core.o combined-packet.o bus.o desc.o +common-obj-y += core.o combined-packet.o bus.o desc.o desc-msos.o common-obj-y += libhw.o # usb host adapters diff --git a/hw/usb/bus.c b/hw/usb/bus.c index 09848c6320..fe70429304 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -16,6 +16,8 @@ static Property usb_props[] = { DEFINE_PROP_STRING("serial", USBDevice, serial), DEFINE_PROP_BIT("full-path", USBDevice, flags, USB_DEV_FLAG_FULL_PATH, true), + DEFINE_PROP_BIT("msos-desc", USBDevice, flags, + USB_DEV_FLAG_MSOS_DESC_ENABLE, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/usb/desc-msos.c b/hw/usb/desc-msos.c new file mode 100644 index 0000000000..ed8d62cab8 --- /dev/null +++ b/hw/usb/desc-msos.c @@ -0,0 +1,234 @@ +#include "hw/usb.h" +#include "hw/usb/desc.h" + +/* + * Microsoft OS Descriptors + * + * Windows tries to fetch some special descriptors with informations + * specifically for windows. Presence is indicated using a special + * string @ index 0xee. There are two kinds of descriptors: + * + * compatid descriptor + * Used to bind drivers, if usb class isn't specific enougth. + * Used for PTP/MTP for example (both share the same usb class). + * + * properties descriptor + * Does carry registry entries. They show up in + * HLM\SYSTEM\CurrentControlSet\Enum\USB\<devid>\<serial>\Device Parameters + * + * Note that Windows caches the stuff it got in the registry, so when + * playing with this you have to delete registry subtrees to make + * windows query the device again: + * HLM\SYSTEM\CurrentControlSet\Control\usbflags + * HLM\SYSTEM\CurrentControlSet\Enum\USB + * Windows will complain it can't delete entries on the second one. + * It has deleted everything it had permissions too, which is enouth + * as this includes "Device Parameters". + * + * http://msdn.microsoft.com/en-us/library/windows/hardware/ff537430.aspx + * + */ + +/* ------------------------------------------------------------------ */ + +typedef struct msos_compat_hdr { + uint32_t dwLength; + uint8_t bcdVersion_lo; + uint8_t bcdVersion_hi; + uint8_t wIndex_lo; + uint8_t wIndex_hi; + uint8_t bCount; + uint8_t reserved[7]; +} QEMU_PACKED msos_compat_hdr; + +typedef struct msos_compat_func { + uint8_t bFirstInterfaceNumber; + uint8_t reserved_1; + uint8_t compatibleId[8]; + uint8_t subCompatibleId[8]; + uint8_t reserved_2[6]; +} QEMU_PACKED msos_compat_func; + +static int usb_desc_msos_compat(const USBDesc *desc, uint8_t *dest) +{ + msos_compat_hdr *hdr = (void *)dest; + msos_compat_func *func; + int length = sizeof(*hdr); + int count = 0; + + func = (void *)(dest + length); + func->bFirstInterfaceNumber = 0; + func->reserved_1 = 0x01; + length += sizeof(*func); + count++; + + hdr->dwLength = cpu_to_le32(length); + hdr->bcdVersion_lo = 0x00; + hdr->bcdVersion_hi = 0x01; + hdr->wIndex_lo = 0x04; + hdr->wIndex_hi = 0x00; + hdr->bCount = count; + return length; +} + +/* ------------------------------------------------------------------ */ + +typedef struct msos_prop_hdr { + uint32_t dwLength; + uint8_t bcdVersion_lo; + uint8_t bcdVersion_hi; + uint8_t wIndex_lo; + uint8_t wIndex_hi; + uint8_t wCount_lo; + uint8_t wCount_hi; +} QEMU_PACKED msos_prop_hdr; + +typedef struct msos_prop { + uint32_t dwLength; + uint32_t dwPropertyDataType; + uint8_t dwPropertyNameLength_lo; + uint8_t dwPropertyNameLength_hi; + uint8_t bPropertyName[]; +} QEMU_PACKED msos_prop; + +typedef struct msos_prop_data { + uint32_t dwPropertyDataLength; + uint8_t bPropertyData[]; +} QEMU_PACKED msos_prop_data; + +typedef enum msos_prop_type { + MSOS_REG_SZ = 1, + MSOS_REG_EXPAND_SZ = 2, + MSOS_REG_BINARY = 3, + MSOS_REG_DWORD_LE = 4, + MSOS_REG_DWORD_BE = 5, + MSOS_REG_LINK = 6, + MSOS_REG_MULTI_SZ = 7, +} msos_prop_type; + +static int usb_desc_msos_prop_name(struct msos_prop *prop, + const wchar_t *name) +{ + int length = wcslen(name) + 1; + int i; + + prop->dwPropertyNameLength_lo = usb_lo(length*2); + prop->dwPropertyNameLength_hi = usb_hi(length*2); + for (i = 0; i < length; i++) { + prop->bPropertyName[i*2] = usb_lo(name[i]); + prop->bPropertyName[i*2+1] = usb_hi(name[i]); + } + return length*2; +} + +static int usb_desc_msos_prop_str(uint8_t *dest, msos_prop_type type, + const wchar_t *name, const wchar_t *value) +{ + struct msos_prop *prop = (void *)dest; + struct msos_prop_data *data; + int length = sizeof(*prop); + int i, vlen = wcslen(value) + 1; + + prop->dwPropertyDataType = cpu_to_le32(type); + length += usb_desc_msos_prop_name(prop, name); + data = (void *)(dest + length); + + data->dwPropertyDataLength = cpu_to_le32(vlen*2); + length += sizeof(*prop); + + for (i = 0; i < vlen; i++) { + data->bPropertyData[i*2] = usb_lo(value[i]); + data->bPropertyData[i*2+1] = usb_hi(value[i]); + } + length += vlen*2; + + prop->dwLength = cpu_to_le32(length); + return length; +} + +static int usb_desc_msos_prop_dword(uint8_t *dest, const wchar_t *name, + uint32_t value) +{ + struct msos_prop *prop = (void *)dest; + struct msos_prop_data *data; + int length = sizeof(*prop); + + prop->dwPropertyDataType = cpu_to_le32(MSOS_REG_DWORD_LE); + length += usb_desc_msos_prop_name(prop, name); + data = (void *)(dest + length); + + data->dwPropertyDataLength = cpu_to_le32(4); + data->bPropertyData[0] = (value) & 0xff; + data->bPropertyData[1] = (value >> 8) & 0xff; + data->bPropertyData[2] = (value >> 16) & 0xff; + data->bPropertyData[3] = (value >> 24) & 0xff; + length += sizeof(*prop) + 4; + + prop->dwLength = cpu_to_le32(length); + return length; +} + +static int usb_desc_msos_prop(const USBDesc *desc, uint8_t *dest) +{ + msos_prop_hdr *hdr = (void *)dest; + int length = sizeof(*hdr); + int count = 0; + + if (desc->msos->Label) { + /* + * Given as example in the specs. Havn't figured yet where + * this label shows up in the windows gui. + */ + length += usb_desc_msos_prop_str(dest+length, MSOS_REG_SZ, + L"Label", desc->msos->Label); + count++; + } + + if (desc->msos->SelectiveSuspendEnabled) { + /* + * Signaling remote wakeup capability in the standard usb + * descriptors isn't enouth to make windows actually use it. + * This is the "Yes, we really mean it" registy entry to flip + * the switch in the windows drivers. + */ + length += usb_desc_msos_prop_dword(dest+length, + L"SelectiveSuspendEnabled", 1); + count++; + } + + hdr->dwLength = cpu_to_le32(length); + hdr->bcdVersion_lo = 0x00; + hdr->bcdVersion_hi = 0x01; + hdr->wIndex_lo = 0x05; + hdr->wIndex_hi = 0x00; + hdr->wCount_lo = usb_lo(count); + hdr->wCount_hi = usb_hi(count); + return length; +} + +/* ------------------------------------------------------------------ */ + +int usb_desc_msos(const USBDesc *desc, USBPacket *p, + int index, uint8_t *dest, size_t len) +{ + void *buf = g_malloc0(4096); + int length = 0; + + switch (index) { + case 0x0004: + length = usb_desc_msos_compat(desc, buf); + break; + case 0x0005: + length = usb_desc_msos_prop(desc, buf); + break; + } + + if (length > len) { + length = len; + } + memcpy(dest, buf, length); + free(buf); + + p->actual_length = length; + return 0; +} diff --git a/hw/usb/desc.c b/hw/usb/desc.c index f18a043500..f133ddb9db 100644 --- a/hw/usb/desc.c +++ b/hw/usb/desc.c @@ -7,7 +7,7 @@ /* ------------------------------------------------------------------ */ int usb_desc_device(const USBDescID *id, const USBDescDevice *dev, - uint8_t *dest, size_t len) + bool msos, uint8_t *dest, size_t len) { uint8_t bLength = 0x12; USBDescriptor *d = (void *)dest; @@ -19,8 +19,18 @@ int usb_desc_device(const USBDescID *id, const USBDescDevice *dev, d->bLength = bLength; d->bDescriptorType = USB_DT_DEVICE; - d->u.device.bcdUSB_lo = usb_lo(dev->bcdUSB); - d->u.device.bcdUSB_hi = usb_hi(dev->bcdUSB); + if (msos && dev->bcdUSB < 0x0200) { + /* + * Version 2.0+ required for microsoft os descriptors to work. + * Done this way so msos-desc compat property will handle both + * the version and the new descriptors being present. + */ + d->u.device.bcdUSB_lo = usb_lo(0x0200); + d->u.device.bcdUSB_hi = usb_hi(0x0200); + } else { + d->u.device.bcdUSB_lo = usb_lo(dev->bcdUSB); + d->u.device.bcdUSB_hi = usb_hi(dev->bcdUSB); + } d->u.device.bDeviceClass = dev->bDeviceClass; d->u.device.bDeviceSubClass = dev->bDeviceSubClass; d->u.device.bDeviceProtocol = dev->bDeviceProtocol; @@ -499,6 +509,10 @@ void usb_desc_init(USBDevice *dev) if (desc->super) { dev->speedmask |= USB_SPEED_MASK_SUPER; } + if (desc->msos && (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_ENABLE))) { + dev->flags |= (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE); + usb_desc_set_string(dev, 0xee, "MSFT100Q"); + } usb_desc_setdefaults(dev); } @@ -626,6 +640,7 @@ int usb_desc_string(USBDevice *dev, int index, uint8_t *dest, size_t len) int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p, int value, uint8_t *dest, size_t len) { + bool msos = (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE)); const USBDesc *desc = usb_device_get_usb_desc(dev); const USBDescDevice *other_dev; uint8_t buf[256]; @@ -646,7 +661,7 @@ int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p, switch(type) { case USB_DT_DEVICE: - ret = usb_desc_device(&desc->id, dev->device, buf, sizeof(buf)); + ret = usb_desc_device(&desc->id, dev->device, msos, buf, sizeof(buf)); trace_usb_desc_device(dev->addr, len, ret); break; case USB_DT_CONFIG: @@ -703,6 +718,7 @@ int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p, int usb_desc_handle_control(USBDevice *dev, USBPacket *p, int request, int value, int index, int length, uint8_t *data) { + bool msos = (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE)); const USBDesc *desc = usb_device_get_usb_desc(dev); int ret = -1; @@ -782,6 +798,19 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, trace_usb_set_interface(dev->addr, index, value, ret); break; + case VendorDeviceRequest | 'Q': + if (msos) { + ret = usb_desc_msos(desc, p, index, data, length); + trace_usb_desc_msos(dev->addr, index, length, ret); + } + break; + case VendorInterfaceRequest | 'Q': + if (msos) { + ret = usb_desc_msos(desc, p, index, data, length); + trace_usb_desc_msos(dev->addr, index, length, ret); + } + break; + } return ret; } diff --git a/hw/usb/desc.h b/hw/usb/desc.h index 81327b0e74..2b4fcdae76 100644 --- a/hw/usb/desc.h +++ b/hw/usb/desc.h @@ -2,6 +2,7 @@ #define QEMU_HW_USB_DESC_H #include <inttypes.h> +#include <wchar.h> /* binary representation */ typedef struct USBDescriptor { @@ -182,6 +183,11 @@ struct USBDescOther { const uint8_t *data; }; +struct USBDescMSOS { + const wchar_t *Label; + bool SelectiveSuspendEnabled; +}; + typedef const char *USBDescStrings[256]; struct USBDesc { @@ -190,6 +196,7 @@ struct USBDesc { const USBDescDevice *high; const USBDescDevice *super; const char* const *str; + const USBDescMSOS *msos; }; #define USB_DESC_FLAG_SUPER (1 << 1) @@ -207,7 +214,7 @@ static inline uint8_t usb_hi(uint16_t val) /* generate usb packages from structs */ int usb_desc_device(const USBDescID *id, const USBDescDevice *dev, - uint8_t *dest, size_t len); + bool msos, uint8_t *dest, size_t len); int usb_desc_device_qualifier(const USBDescDevice *dev, uint8_t *dest, size_t len); int usb_desc_config(const USBDescConfig *conf, int flags, @@ -219,6 +226,8 @@ int usb_desc_iface(const USBDescIface *iface, int flags, int usb_desc_endpoint(const USBDescEndpoint *ep, int flags, uint8_t *dest, size_t len); int usb_desc_other(const USBDescOther *desc, uint8_t *dest, size_t len); +int usb_desc_msos(const USBDesc *desc, USBPacket *p, + int index, uint8_t *dest, size_t len); /* control message emulation helpers */ void usb_desc_init(USBDevice *dev); diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c index 5e667f0199..2966066682 100644 --- a/hw/usb/dev-hid.c +++ b/hw/usb/dev-hid.c @@ -261,6 +261,10 @@ static const USBDescDevice desc_device_keyboard = { }, }; +static const USBDescMSOS desc_msos_suspend = { + .SelectiveSuspendEnabled = true, +}; + static const USBDesc desc_mouse = { .id = { .idVendor = 0x0627, @@ -272,6 +276,7 @@ static const USBDesc desc_mouse = { }, .full = &desc_device_mouse, .str = desc_strings, + .msos = &desc_msos_suspend, }; static const USBDesc desc_tablet = { @@ -285,6 +290,7 @@ static const USBDesc desc_tablet = { }, .full = &desc_device_tablet, .str = desc_strings, + .msos = &desc_msos_suspend, }; static const USBDesc desc_tablet2 = { @@ -299,6 +305,7 @@ static const USBDesc desc_tablet2 = { .full = &desc_device_tablet, .high = &desc_device_tablet2, .str = desc_strings, + .msos = &desc_msos_suspend, }; static const USBDesc desc_keyboard = { @@ -312,6 +319,7 @@ static const USBDesc desc_keyboard = { }, .full = &desc_device_keyboard, .str = desc_strings, + .msos = &desc_msos_suspend, }; static const uint8_t qemu_mouse_hid_report_descriptor[] = { diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 250d45ec3d..665a1ffcb3 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -376,7 +376,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, barrier(); if (desc.flags & VRING_DESC_F_INDIRECT) { - int ret = get_indirect(vring, elem, &desc); + ret = get_indirect(vring, elem, &desc); if (ret < 0) { goto out; } diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index d9754dbd33..a470a0b3a6 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -263,7 +263,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) config.num_pages = cpu_to_le32(dev->num_pages); config.actual = cpu_to_le32(dev->actual); - memcpy(config_data, &config, 8); + memcpy(config_data, &config, sizeof(struct virtio_balloon_config)); } static void virtio_balloon_set_config(VirtIODevice *vdev, @@ -272,7 +272,7 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); struct virtio_balloon_config config; uint32_t oldactual = dev->actual; - memcpy(&config, config_data, 8); + memcpy(&config, config_data, sizeof(struct virtio_balloon_config)); dev->actual = le32_to_cpu(config.actual); if (dev->actual != oldactual) { qemu_balloon_changed(ram_size - @@ -343,7 +343,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) VirtIOBalloon *s = VIRTIO_BALLOON(dev); int ret; - virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 8); + virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, + sizeof(struct virtio_balloon_config)); ret = qemu_add_balloon_handler(virtio_balloon_to_target, virtio_balloon_stat, s); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index d58cb616b1..be4220b415 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -420,8 +420,8 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s) "xen-pci-pt-bar", r->size); pci_register_bar(&s->dev, i, type, &s->bar[i]); - XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%lx"PRIx64 - " base_addr=0x%lx"PRIx64" type: %#x)\n", + XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64 + " base_addr=0x%08"PRIx64" type: %#x)\n", i, r->size, r->base_addr, type); } @@ -440,8 +440,8 @@ static int xen_pt_register_regions(XenPCIPassthroughState *s) s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr; - memory_region_init_rom_device(&s->rom, OBJECT(s), NULL, NULL, - "xen-pci-pt-rom", d->rom.size); + memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev, + "xen-pci-pt-rom", d->rom.size); pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->rom); diff --git a/include/block/block.h b/include/block/block.h index 36efaeac2d..963a61fa4c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -184,7 +184,11 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, - QDict *options, int flags, Error **errp); + const char *reference, QDict *options, int flags, + Error **errp); +int bdrv_open_image(BlockDriverState **pbs, const char *filename, + QDict *options, const char *bdref_key, int flags, + bool force_raw, bool allow_none, Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, int flags, BlockDriver *drv, Error **errp); @@ -220,7 +224,6 @@ BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, int64_t sector_num int nb_sectors, BdrvRequestFlags flags, BlockDriverCompletionFunc *cb, void *opaque); int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); -int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); int bdrv_pwrite(BlockDriverState *bs, int64_t offset, @@ -249,6 +252,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset); int64_t bdrv_getlength(BlockDriverState *bs); int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +int bdrv_refresh_limits(BlockDriverState *bs); int bdrv_commit(BlockDriverState *bs); int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, @@ -283,16 +287,16 @@ int bdrv_amend_options(BlockDriverState *bs_new, QEMUOptionParameter *options); /* external snapshots */ typedef enum { - EXT_SNAPSHOT_ALLOWED, - EXT_SNAPSHOT_FORBIDDEN, -} ExtSnapshotPerm; + BS_IS_A_FILTER, + BS_FILTER_PASS_DOWN, + BS_AUTHORIZATION_COUNT, +} BsAuthorization; -/* return EXT_SNAPSHOT_ALLOWED if external snapshot is allowed - * return EXT_SNAPSHOT_FORBIDDEN if external snapshot is forbidden - */ -ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs); -/* helper used to forbid external snapshots like in blkverify */ -ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs); +bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate); +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate); +bool bdrv_is_first_non_filter(BlockDriverState *candidate); /* async block I/O */ typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, @@ -374,6 +378,11 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked); void bdrv_eject(BlockDriverState *bs, bool eject_flag); const char *bdrv_get_format_name(BlockDriverState *bs); BlockDriverState *bdrv_find(const char *name); +BlockDriverState *bdrv_find_node(const char *node_name); +BlockDeviceInfoList *bdrv_named_nodes_list(void); +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp); BlockDriverState *bdrv_next(BlockDriverState *bs); void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque); @@ -418,7 +427,10 @@ void bdrv_img_create(const char *filename, const char *fmt, char *options, uint64_t img_size, int flags, Error **errp, bool quiet); -void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); +/* Returns the alignment in bytes that is required so that no bounce buffer + * is required throughout the stack */ +size_t bdrv_opt_mem_align(BlockDriverState *bs); +void bdrv_set_guest_block_size(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); @@ -515,6 +527,14 @@ typedef enum { BLKDBG_FLUSH_TO_OS, BLKDBG_FLUSH_TO_DISK, + BLKDBG_PWRITEV_RMW_HEAD, + BLKDBG_PWRITEV_RMW_AFTER_HEAD, + BLKDBG_PWRITEV_RMW_TAIL, + BLKDBG_PWRITEV_RMW_AFTER_TAIL, + BLKDBG_PWRITEV, + BLKDBG_PWRITEV_ZERO, + BLKDBG_PWRITEV_DONE, + BLKDBG_EVENT_MAX, } BlkDebugEvent; diff --git a/include/block/block_int.h b/include/block/block_int.h index 2772f2f1bd..0bcf1c9b8c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -57,22 +57,35 @@ typedef struct BdrvTrackedRequest { BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; + int64_t offset; + unsigned int bytes; bool is_write; + + bool serialising; + int64_t overlap_offset; + unsigned int overlap_bytes; + QLIST_ENTRY(BdrvTrackedRequest) list; Coroutine *co; /* owner, used for deadlock detection */ CoQueue wait_queue; /* coroutines blocked on this request */ + + struct BdrvTrackedRequest *waiting_for; } BdrvTrackedRequest; struct BlockDriver { const char *format_name; int instance_size; - /* if not defined external snapshots are allowed - * future block filters will query their children to build the response + /* this table of boolean contains authorizations for the block operations */ + bool authorizations[BS_AUTHORIZATION_COUNT]; + /* for snapshots complex block filter like Quorum can implement the + * following recursive callback instead of BS_IS_A_FILTER. + * It's purpose is to recurse on the filter children while calling + * bdrv_recurse_is_first_non_filter on them. + * For a sample implementation look in the future Quorum block filter. */ - ExtSnapshotPerm (*bdrv_check_ext_snapshot)(BlockDriverState *bs); + bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, + BlockDriverState *candidate); int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); int (*bdrv_probe_device)(const char *filename); @@ -226,6 +239,8 @@ struct BlockDriver { int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + int (*bdrv_refresh_limits)(BlockDriverState *bs); + /* * Returns 1 if newly created images are guaranteed to contain only * zeros, 0 otherwise. @@ -250,6 +265,9 @@ typedef struct BlockLimits { /* optimal transfer length in sectors */ int opt_transfer_length; + + /* memory alignment so that no bounce buffer is needed */ + size_t opt_mem_alignment; } BlockLimits; /* @@ -291,8 +309,8 @@ struct BlockDriverState { /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; - /* number of in-flight copy-on-read requests */ - unsigned int copy_on_read_in_flight; + /* number of in-flight serialising requests */ + unsigned int serialising_in_flight; /* I/O throttling */ ThrottleState throttle_state; @@ -314,8 +332,11 @@ struct BlockDriverState { /* Whether produces zeros when read beyond eof */ bool zero_beyond_eof; - /* the memory alignment required for the buffers handled by this driver */ - int buffer_alignment; + /* Alignment requirement for offset/length of I/O requests */ + unsigned int request_alignment; + + /* the block size for which the guest device expects atomicity */ + int guest_block_size; /* do we need to tell the quest if we have a volatile write cache? */ int enable_write_cache; @@ -325,11 +346,18 @@ struct BlockDriverState { BlockdevOnError on_read_error, on_write_error; bool iostatus_enabled; BlockDeviceIoStatus iostatus; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; + /* element of the list of named nodes building the graph */ + QTAILQ_ENTRY(BlockDriverState) node_list; + /* Device name is the name associated with the "drive" the guest sees */ char device_name[32]; + /* element of the list of "drives" the guest sees */ + QTAILQ_ENTRY(BlockDriverState) device_list; QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; int refcnt; int in_use; /* users other than guest access, eg. block migration */ - QTAILQ_ENTRY(BlockDriverState) list; QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; diff --git a/include/block/qapi.h b/include/block/qapi.h index 9518ee4001..e92c00daf6 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -29,6 +29,7 @@ #include "block/block.h" #include "block/snapshot.h" +BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs); int bdrv_query_snapshot_info_list(BlockDriverState *bs, SnapshotInfoList **p_list, Error **errp); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 33c8acc02e..481a447417 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -79,6 +79,7 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, xen_modified_memory(start, length); } +#if !defined(_WIN32) static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, ram_addr_t start, ram_addr_t pages) @@ -127,6 +128,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, } } } +#endif /* not _WIN32 */ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, ram_addr_t length, diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h new file mode 100644 index 0000000000..4576400fd7 --- /dev/null +++ b/include/hw/acpi/cpu_hotplug.h @@ -0,0 +1,27 @@ +/* + * QEMU ACPI hotplug utilities + * + * Copyright (C) 2013 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef ACPI_HOTPLUG_H +#define ACPI_HOTPLUG_H + +#include "hw/acpi/acpi.h" +#include "hw/acpi/cpu_hotplug_defs.h" + +typedef struct AcpiCpuHotplug { + MemoryRegion io; + uint8_t sts[ACPI_GPE_PROC_LEN]; +} AcpiCpuHotplug; + +void AcpiCpuHotplug_add(ACPIGPE *gpe, AcpiCpuHotplug *g, CPUState *cpu); + +void AcpiCpuHotplug_init(MemoryRegion *parent, Object *owner, + AcpiCpuHotplug *gpe_cpu, uint16_t base); +#endif diff --git a/include/hw/acpi/cpu_hotplug_defs.h b/include/hw/acpi/cpu_hotplug_defs.h new file mode 100644 index 0000000000..2725b50aac --- /dev/null +++ b/include/hw/acpi/cpu_hotplug_defs.h @@ -0,0 +1,24 @@ +/* + * QEMU ACPI hotplug utilities shared defines + * + * Copyright (C) 2013 Red Hat Inc + * + * Authors: + * Igor Mammedov <imammedo@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef ACPI_HOTPLUG_DEFS_H +#define ACPI_HOTPLUG_DEFS_H + +/* + * ONLY DEFINEs are permited in this file since it's shared + * between C and ASL code. + */ +#define ACPI_CPU_HOTPLUG_STATUS 4 +#define ACPI_GPE_PROC_LEN 32 +#define ICH9_CPU_HOTPLUG_IO_BASE 0x0CD8 +#define PIIX4_CPU_HOTPLUG_IO_BASE 0xaf00 + +#endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index 82fcf9f2eb..104f419852 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -22,6 +22,7 @@ #define HW_ACPI_ICH9_H #include "hw/acpi/acpi.h" +#include "hw/acpi/cpu_hotplug.h" typedef struct ICH9LPCPMRegs { /* @@ -42,6 +43,9 @@ typedef struct ICH9LPCPMRegs { uint32_t pm_io_base; Notifier powerdown_notifier; + + AcpiCpuHotplug gpe_cpu; + Notifier cpu_added_notifier; } ICH9LPCPMRegs; void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h new file mode 100644 index 0000000000..6230e60954 --- /dev/null +++ b/include/hw/acpi/pcihp.h @@ -0,0 +1,72 @@ +/* + * QEMU<->ACPI BIOS PCI hotplug interface + * + * QEMU supports PCI hotplug via ACPI. This module + * implements the interface between QEMU and the ACPI BIOS. + * Interface specification - see docs/specs/acpi_pci_hotplug.txt + * + * Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) + * Copyright (c) 2006 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef HW_ACPI_PCIHP_H +#define HW_ACPI_PCIHP_H + +#include <inttypes.h> +#include <qemu/typedefs.h> +#include "hw/pci/pci.h" /* for PCIHotplugState */ + +typedef struct AcpiPciHpPciStatus { + uint32_t up; /* deprecated, maintained for migration compatibility */ + uint32_t down; + uint32_t hotplug_enable; + uint32_t device_present; +} AcpiPciHpPciStatus; + +#define ACPI_PCIHP_PROP_BSEL "acpi-pcihp-bsel" +#define ACPI_PCIHP_MAX_HOTPLUG_BUS 256 + +typedef struct AcpiPciHpState { + AcpiPciHpPciStatus acpi_pcihp_pci_status[ACPI_PCIHP_MAX_HOTPLUG_BUS]; + uint32_t hotplug_select; + PCIBus *root; + MemoryRegion io; +} AcpiPciHpState; + +void acpi_pcihp_init(AcpiPciHpState *, PCIBus *root, + MemoryRegion *address_space_io); + +/* Invoke on device hotplug */ +int acpi_pcihp_device_hotplug(AcpiPciHpState *, PCIDevice *, + PCIHotplugState state); + +/* Called on reset */ +void acpi_pcihp_reset(AcpiPciHpState *s); + +extern const VMStateDescription vmstate_acpi_pcihp_pci_status; + +#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp) \ + VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \ + test_pcihp), \ + VMSTATE_STRUCT_ARRAY_TEST(pcihp.acpi_pcihp_pci_status, state, \ + ACPI_PCIHP_MAX_HOTPLUG_BUS, \ + test_pcihp, 1, \ + vmstate_acpi_pcihp_pci_status, \ + AcpiPciHpPciStatus) + +#endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index eb3da964f0..3e1e81b27b 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -35,7 +35,7 @@ typedef struct PcPciInfo { struct PcGuestInfo { bool has_pci_info; bool isapc_ram_fw; - hwaddr ram_size; + hwaddr ram_size, ram_size_below_4g; unsigned apic_id_limit; bool apic_xrupt_override; uint64_t numa_nodes; @@ -241,6 +241,7 @@ uint16_t pvpanic_port(void); int e820_add_entry(uint64_t, uint64_t, uint32_t); #define PC_Q35_COMPAT_1_7 \ + PC_COMPAT_1_7, \ {\ .driver = "hpet",\ .property = HPET_INTCAP,\ @@ -259,7 +260,20 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); PC_COMPAT_1_4, \ PC_Q35_COMPAT_1_5 +#define PC_COMPAT_1_7 \ + {\ + .driver = TYPE_USB_DEVICE,\ + .property = "msos-desc",\ + .value = "no",\ + },\ + {\ + .driver = "PIIX4_PM",\ + .property = "acpi-pci-hotplug-with-bridge-support",\ + .value = "off",\ + } + #define PC_COMPAT_1_6 \ + PC_COMPAT_1_7, \ {\ .driver = "e1000",\ .property = "mitigation",\ diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h index fa45a5b094..e0c749f9e9 100644 --- a/include/hw/isa/isa.h +++ b/include/hw/isa/isa.h @@ -20,6 +20,13 @@ #define TYPE_ISA_BUS "ISA" #define ISA_BUS(obj) OBJECT_CHECK(ISABus, (obj), TYPE_ISA_BUS) +#define TYPE_APPLE_SMC "isa-applesmc" + +static inline bool applesmc_find(void) +{ + return object_resolve_path_type("", TYPE_APPLE_SMC, NULL); +} + typedef struct ISADeviceClass { DeviceClass parent_class; } ISADeviceClass; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 754b82de81..52523467b6 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -387,6 +387,20 @@ int pci_bus_num(PCIBus *s); void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), void *opaque); +void pci_for_each_bus_depth_first(PCIBus *bus, + void *(*begin)(PCIBus *bus, void *parent_state), + void (*end)(PCIBus *bus, void *state), + void *parent_state); + +/* Use this wrapper when specific scan order is not required. */ +static inline +void pci_for_each_bus(PCIBus *bus, + void (*fn)(PCIBus *bus, void *opaque), + void *opaque) +{ + pci_for_each_bus_depth_first(bus, NULL, fn, opaque); +} + PCIBus *pci_find_primary_bus(void); PCIBus *pci_device_root_bus(const PCIDevice *d); const char *pci_root_bus_path(PCIDevice *dev); diff --git a/include/hw/usb.h b/include/hw/usb.h index 2a3ea0c92e..3ef7af7413 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -182,6 +182,7 @@ typedef struct USBDescIface USBDescIface; typedef struct USBDescEndpoint USBDescEndpoint; typedef struct USBDescOther USBDescOther; typedef struct USBDescString USBDescString; +typedef struct USBDescMSOS USBDescMSOS; struct USBDescString { uint8_t index; @@ -208,6 +209,8 @@ struct USBEndpoint { enum USBDeviceFlags { USB_DEV_FLAG_FULL_PATH, USB_DEV_FLAG_IS_HOST, + USB_DEV_FLAG_MSOS_DESC_ENABLE, + USB_DEV_FLAG_MSOS_DESC_IN_USE, }; /* definition of a USB device */ diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index 22d8b8f3e0..7e5f752b7a 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -5,7 +5,7 @@ #include "qapi/qmp/qerror.h" #include "qapi/qmp/qdict.h" #include "block/block.h" -#include "monitor/readline.h" +#include "qemu/readline.h" extern Monitor *cur_mon; extern Monitor *default_mon; diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index 5cefd8022a..1ddf97b1c3 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -68,5 +68,6 @@ QDict *qdict_clone_shallow(const QDict *src); void qdict_flatten(QDict *qdict); void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start); +void qdict_array_split(QDict *src, QList **dst); #endif /* QDICT_H */ diff --git a/include/qemu-io.h b/include/qemu-io.h index a418b46a40..7e7c07c09b 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -42,5 +42,8 @@ bool qemuio_command(BlockDriverState *bs, const char *cmd); void qemuio_add_command(const cmdinfo_t *ci); int qemuio_command_usage(const cmdinfo_t *ci); +void qemuio_complete_command(const char *input, + void (*fn)(const char *cmd, void *opaque), + void *opaque); #endif /* QEMU_IO_H */ diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h index 508428ff32..dbd97c4bdb 100644 --- a/include/qemu/config-file.h +++ b/include/qemu/config-file.h @@ -4,6 +4,7 @@ #include <stdio.h> #include "qemu/option.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" QemuOptsList *qemu_find_opts(const char *group); QemuOptsList *qemu_find_opts_err(const char *group, Error **errp); @@ -18,6 +19,11 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname); int qemu_read_config_file(const char *filename); +/* Parse QDict options as a replacement for a config file (allowing multiple + enumerated (0..(n-1)) configuration "sections") */ +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, + Error **errp); + /* Read default QEMU config files */ int qemu_read_default_config_files(bool userconfig); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index b3e2b6d8ea..eac7172bcb 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -240,4 +240,6 @@ static inline void qemu_init_auxval(char **envp) { } void qemu_init_auxval(char **envp); #endif +void qemu_set_tty_echo(int fd, bool echo); + #endif diff --git a/include/monitor/readline.h b/include/qemu/readline.h index 0faf6e1db7..a89fe4a9a9 100644 --- a/include/monitor/readline.h +++ b/include/qemu/readline.h @@ -1,14 +1,15 @@ #ifndef READLINE_H #define READLINE_H -#include "qemu-common.h" - #define READLINE_CMD_BUF_SIZE 4095 #define READLINE_MAX_CMDS 64 #define READLINE_MAX_COMPLETIONS 256 -typedef void ReadLineFunc(Monitor *mon, const char *str, void *opaque); -typedef void ReadLineCompletionFunc(Monitor *mon, +typedef void ReadLinePrintfFunc(void *opaque, const char *fmt, ...); +typedef void ReadLineFlushFunc(void *opaque); +typedef void ReadLineFunc(void *opaque, const char *str, + void *readline_opaque); +typedef void ReadLineCompletionFunc(void *opaque, const char *cmdline); typedef struct ReadLineState { @@ -35,7 +36,10 @@ typedef struct ReadLineState { void *readline_opaque; int read_password; char prompt[256]; - Monitor *mon; + + ReadLinePrintfFunc *printf_func; + ReadLineFlushFunc *flush_func; + void *opaque; } ReadLineState; void readline_add_completion(ReadLineState *rs, const char *str); @@ -46,11 +50,13 @@ const char *readline_get_history(ReadLineState *rs, unsigned int index); void readline_handle_byte(ReadLineState *rs, int ch); void readline_start(ReadLineState *rs, const char *prompt, int read_password, - ReadLineFunc *readline_func, void *opaque); + ReadLineFunc *readline_func, void *readline_opaque); void readline_restart(ReadLineState *rs); void readline_show_prompt(ReadLineState *rs); -ReadLineState *readline_init(Monitor *mon, +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, + ReadLineFlushFunc *flush_func, + void *opaque, ReadLineCompletionFunc *completion_finder); #endif /* !READLINE_H */ diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 5afcffc3f9..7f9a074c2a 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -405,7 +405,7 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg); * timer_init: * @ts: the timer to be initialised * @timer_list: the timer list to attach the timer to - * @scale: the scale value for the tiemr + * @scale: the scale value for the timer * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * @@ -422,7 +422,7 @@ void timer_init(QEMUTimer *ts, /** * timer_new_tl: * @timer_list: the timer list to attach the timer to - * @scale: the scale value for the tiemr + * @scale: the scale value for the timer * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * @@ -447,7 +447,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, /** * timer_new: * @type: the clock type to use - * @scale: the scale value for the tiemr + * @scale: the scale value for the timer * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * @@ -499,7 +499,7 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return ret; } -static int kvm_set_ioeventfd_mmio(int fd, uint32_t addr, uint32_t val, +static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val, bool assign, uint32_t size, bool datamatch) { int ret; @@ -1423,16 +1423,22 @@ int kvm_init(void) nc++; } - s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0); - if (s->vmfd < 0) { + do { + ret = kvm_ioctl(s, KVM_CREATE_VM, 0); + } while (ret == -EINTR); + + if (ret < 0) { + fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -s->vmfd, + strerror(-ret)); + #ifdef TARGET_S390X fprintf(stderr, "Please add the 'switch_amode' kernel parameter to " "your host kernel command line\n"); #endif - ret = s->vmfd; goto err; } + s->vmfd = ret; missing_cap = kvm_check_extension_list(s, kvm_required_capabilites); if (!missing_cap) { missing_cap = diff --git a/linux-user/s390x/syscall.h b/linux-user/s390x/syscall.h index ea8c304840..e5ce30b667 100644 --- a/linux-user/s390x/syscall.h +++ b/linux-user/s390x/syscall.h @@ -22,4 +22,4 @@ struct target_pt_regs { #define UNAME_MACHINE "s390x" -#define TARGET_CLONE_BACKWARDS +#define TARGET_CLONE_BACKWARDS2 diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 0ac05b85f2..bc0ac98d4f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -2340,7 +2340,7 @@ static abi_long do_socketcall(int num, abi_ulong vptr) size_t len; abi_ulong flags; abi_ulong addr; - socklen_t addrlen; + abi_ulong addrlen; if (get_user_ual(sockfd, vptr) || get_user_ual(msg, vptr + n) @@ -2406,7 +2406,7 @@ static abi_long do_socketcall(int num, abi_ulong vptr) abi_ulong level; abi_ulong optname; abi_ulong optval; - socklen_t optlen; + abi_ulong optlen; if (get_user_ual(sockfd, vptr) || get_user_ual(level, vptr + n) @@ -37,7 +37,7 @@ #include "ui/qemu-spice.h" #include "sysemu/sysemu.h" #include "monitor/monitor.h" -#include "monitor/readline.h" +#include "qemu/readline.h" #include "ui/console.h" #include "sysemu/blockdev.h" #include "audio/audio.h" @@ -217,8 +217,8 @@ static const mon_cmd_t qmp_cmds[]; Monitor *cur_mon; Monitor *default_mon; -static void monitor_command_cb(Monitor *mon, const char *cmdline, - void *opaque); +static void monitor_command_cb(void *opaque, const char *cmdline, + void *readline_opaque); static inline int qmp_cmd_mode(const Monitor *mon) { @@ -4338,9 +4338,10 @@ static void monitor_find_completion_by_table(Monitor *mon, } } -static void monitor_find_completion(Monitor *mon, +static void monitor_find_completion(void *opaque, const char *cmdline) { + Monitor *mon = opaque; char *args[MAX_ARGS]; int nb_args, len; @@ -4751,8 +4752,11 @@ static void monitor_read(void *opaque, const uint8_t *buf, int size) cur_mon = old_mon; } -static void monitor_command_cb(Monitor *mon, const char *cmdline, void *opaque) +static void monitor_command_cb(void *opaque, const char *cmdline, + void *readline_opaque) { + Monitor *mon = opaque; + monitor_suspend(mon); handle_user_command(mon, cmdline); monitor_resume(mon); @@ -4881,6 +4885,22 @@ static void sortcmdlist(void) * End: */ +/* These functions just adapt the readline interface in a typesafe way. We + * could cast function pointers but that discards compiler checks. + */ +static void monitor_readline_printf(void *opaque, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + monitor_vprintf(opaque, fmt, ap); + va_end(ap); +} + +static void monitor_readline_flush(void *opaque) +{ + monitor_flush(opaque); +} + void monitor_init(CharDriverState *chr, int flags) { static int is_first_init = 1; @@ -4898,7 +4918,10 @@ void monitor_init(CharDriverState *chr, int flags) mon->chr = chr; mon->flags = flags; if (flags & MONITOR_USE_READLINE) { - mon->rs = readline_init(mon, monitor_find_completion); + mon->rs = readline_init(monitor_readline_printf, + monitor_readline_flush, + mon, + monitor_find_completion); monitor_read_command(mon, 0); } @@ -4920,9 +4943,11 @@ void monitor_init(CharDriverState *chr, int flags) default_mon = mon; } -static void bdrv_password_cb(Monitor *mon, const char *password, void *opaque) +static void bdrv_password_cb(void *opaque, const char *password, + void *readline_opaque) { - BlockDriverState *bs = opaque; + Monitor *mon = opaque; + BlockDriverState *bs = readline_opaque; int ret = 0; if (bdrv_set_key(bs, password) != 0) { @@ -164,7 +164,6 @@ void qemu_macaddr_default_if_unset(MACAddr *macaddr) static char *assign_name(NetClientState *nc1, const char *model) { NetClientState *nc; - char buf[256]; int id = 0; QTAILQ_FOREACH(nc, &net_clients, next) { @@ -176,9 +175,7 @@ static char *assign_name(NetClientState *nc1, const char *model) } } - snprintf(buf, sizeof(buf), "%s.%d", model, id); - - return g_strdup(buf); + return g_strdup_printf("%s.%d", model, id); } static void qemu_net_client_destructor(NetClientState *nc) diff --git a/net/tap-linux.c b/net/tap-linux.c index 36c09e24d8..812bf2dfc6 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -52,14 +52,17 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - if (ioctl(fd, TUNGETFEATURES, &features) == 0 && - features & IFF_ONE_QUEUE) { + if (ioctl(fd, TUNGETFEATURES, &features) == -1) { + error_report("warning: TUNGETFEATURES failed: %s", strerror(errno)); + features = 0; + } + + if (features & IFF_ONE_QUEUE) { ifr.ifr_flags |= IFF_ONE_QUEUE; } if (*vnet_hdr) { - if (ioctl(fd, TUNGETFEATURES, &features) == 0 && - features & IFF_VNET_HDR) { + if (features & IFF_VNET_HDR) { *vnet_hdr = 1; ifr.ifr_flags |= IFF_VNET_HDR; } else { @@ -82,8 +85,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, } if (mq_required) { - if ((ioctl(fd, TUNGETFEATURES, &features) != 0) || - !(features & IFF_MULTI_QUEUE)) { + if (!(features & IFF_MULTI_QUEUE)) { error_report("multiqueue required, but no kernel " "support for IFF_MULTI_QUEUE available"); close(fd); diff --git a/pc-bios/kvmvapic.bin b/pc-bios/kvmvapic.bin Binary files differindex 045f5c2884..045f5c2884 100755..100644 --- a/pc-bios/kvmvapic.bin +++ b/pc-bios/kvmvapic.bin diff --git a/pc-bios/multiboot.bin b/pc-bios/multiboot.bin Binary files differindex e772713c95..e772713c95 100755..100644 --- a/pc-bios/multiboot.bin +++ b/pc-bios/multiboot.bin diff --git a/pc-bios/sgabios.bin b/pc-bios/sgabios.bin Binary files differindex c3da4c3d0a..c3da4c3d0a 100755..100644 --- a/pc-bios/sgabios.bin +++ b/pc-bios/sgabios.bin diff --git a/qapi-schema.json b/qapi-schema.json index f27c48a285..05ced9d572 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -810,6 +810,8 @@ # # @file: the filename of the backing device # +# @node-name: #optional the name of the block driver node (Since 2.0) +# # @ro: true if the backing device was open read-only # # @drv: the name of the block format used to open the backing device. As of @@ -857,10 +859,9 @@ # # Since: 0.14.0 # -# Notes: This interface is only found in @BlockInfo. ## { 'type': 'BlockDeviceInfo', - 'data': { 'file': 'str', 'ro': 'bool', 'drv': 'str', + 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', '*backing_file': 'str', 'backing_file_depth': 'int', 'encrypted': 'bool', 'encryption_key_missing': 'bool', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', @@ -1022,15 +1023,17 @@ # # @stats: A @BlockDeviceStats for the device. # -# @parent: #optional This may point to the backing block device if this is a -# a virtual block device. If it's a backing block, this will point -# to the backing file is one is present. +# @parent: #optional This describes the file block device if it has one. +# +# @backing: #optional This describes the backing block device if it has one. +# (Since 2.0) # # Since: 0.14.0 ## { 'type': 'BlockStats', 'data': {'*device': 'str', 'stats': 'BlockDeviceStats', - '*parent': 'BlockStats'} } + '*parent': 'BlockStats', + '*backing': 'BlockStats'} } ## # @query-blockstats: @@ -1675,7 +1678,11 @@ # determine which ones are encrypted, set the passwords with this command, and # then start the guest with the @cont command. # -# @device: the name of the device to set the password on +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the block backend device to set the password on +# +# @node-name: #optional graph node name to set the password on (Since 2.0) # # @password: the password to use for the device # @@ -1689,7 +1696,8 @@ # # Since: 0.14.0 ## -{ 'command': 'block_passwd', 'data': {'device': 'str', 'password': 'str'} } +{ 'command': 'block_passwd', 'data': {'*device': 'str', + '*node-name': 'str', 'password': 'str'} } ## # @balloon: @@ -1716,7 +1724,11 @@ # # Resize a block image while a guest is running. # -# @device: the name of the device to get the image resized +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the device to get the image resized +# +# @node-name: #optional graph node name to get the image resized (Since 2.0) # # @size: new image size in bytes # @@ -1725,7 +1737,9 @@ # # Since: 0.14.0 ## -{ 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }} +{ 'command': 'block_resize', 'data': { '*device': 'str', + '*node-name': 'str', + 'size': 'int' }} ## # @NewImageMode @@ -1747,18 +1761,25 @@ ## # @BlockdevSnapshot # -# @device: the name of the device to generate the snapshot from. +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the device to generate the snapshot from. +# +# @node-name: #optional graph node name to generate the snapshot from (Since 2.0) # # @snapshot-file: the target of the new image. A new file will be created. # +# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0) +# # @format: #optional the format of the snapshot image, default is 'qcow2'. # # @mode: #optional whether and how QEMU should create a new image, default is # 'absolute-paths'. ## { 'type': 'BlockdevSnapshot', - 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', - '*mode': 'NewImageMode' } } + 'data': { '*device': 'str', '*node-name': 'str', + 'snapshot-file': 'str', '*snapshot-node-name': 'str', + '*format': 'str', '*mode': 'NewImageMode' } } ## # @BlockdevSnapshotInternal @@ -1973,6 +1994,13 @@ # user needs to complete the job with the block-job-complete # command after getting the ready event. (Since 2.0) # +# If the base image is smaller than top, then the base image +# will be resized to be the same size as top. If top is +# smaller than the base image, the base will not be +# truncated. If you want the base image size to match the +# size of the smaller top, you can safely truncate it +# yourself once the commit operation successfully completes. +# # # @speed: #optional the maximum speed, in bytes per second # @@ -2009,6 +2037,17 @@ { 'command': 'drive-backup', 'data': 'DriveBackup' } ## +# @query-named-block-nodes +# +# Get the named block driver list +# +# Returns: the list of BlockDeviceInfo +# +# Since 2.0 +## +{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] } + +## # @drive-mirror # # Start mirroring a block device's writes to a new destination. @@ -4090,6 +4129,7 @@ # @id: #optional id by which the new block device can be referred to. # This is a required option on the top level of blockdev-add, and # currently not allowed on any other level. +# @node-name: #optional the name of a block driver state node (Since 2.0) # @discard: #optional discard-related options (default: ignore) # @cache: #optional cache-related options # @aio: #optional AIO backend (default: threads) @@ -4105,6 +4145,7 @@ { 'type': 'BlockdevOptionsBase', 'data': { 'driver': 'str', '*id': 'str', + '*node-name': 'str', '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', '*aio': 'BlockdevAioOptions', @@ -4201,6 +4242,116 @@ '*pass-discard-other': 'bool' } } ## +# @BlkdebugEvent +# +# Trigger events supported by blkdebug. +## +{ 'enum': 'BlkdebugEvent', + 'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table', + 'l1_grow.activate_table', 'l2_load', 'l2_update', + 'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write', + 'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio', + 'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read', + 'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update', + 'refblock_load', 'refblock_update', 'refblock_update_part', + 'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write', + 'refblock_alloc.write_blocks', 'refblock_alloc.write_table', + 'refblock_alloc.switch_table', 'cluster_alloc', + 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', + 'flush_to_disk' ] } + +## +# @BlkdebugInjectErrorOptions +# +# Describes a single error injection for blkdebug. +# +# @event: trigger event +# +# @state: #optional the state identifier blkdebug needs to be in to +# actually trigger the event; defaults to "any" +# +# @errno: #optional error identifier (errno) to be returned; defaults to +# EIO +# +# @sector: #optional specifies the sector index which has to be affected +# in order to actually trigger the event; defaults to "any +# sector" +# +# @once: #optional disables further events after this one has been +# triggered; defaults to false +# +# @immediately: #optional fail immediately; defaults to false +# +# Since: 2.0 +## +{ 'type': 'BlkdebugInjectErrorOptions', + 'data': { 'event': 'BlkdebugEvent', + '*state': 'int', + '*errno': 'int', + '*sector': 'int', + '*once': 'bool', + '*immediately': 'bool' } } + +## +# @BlkdebugSetStateOptions +# +# Describes a single state-change event for blkdebug. +# +# @event: trigger event +# +# @state: #optional the current state identifier blkdebug needs to be in; +# defaults to "any" +# +# @new_state: the state identifier blkdebug is supposed to assume if +# this event is triggered +# +# Since: 2.0 +## +{ 'type': 'BlkdebugSetStateOptions', + 'data': { 'event': 'BlkdebugEvent', + '*state': 'int', + 'new_state': 'int' } } + +## +# @BlockdevOptionsBlkdebug +# +# Driver specific block device options for blkdebug. +# +# @image: underlying raw block device (or image file) +# +# @config: #optional filename of the configuration file +# +# @align: #optional required alignment for requests in bytes +# +# @inject-error: #optional array of error injection descriptions +# +# @set-state: #optional array of state-change descriptions +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsBlkdebug', + 'data': { 'image': 'BlockdevRef', + '*config': 'str', + '*align': 'int', + '*inject-error': ['BlkdebugInjectErrorOptions'], + '*set-state': ['BlkdebugSetStateOptions'] } } + +## +# @BlockdevOptionsBlkverify +# +# Driver specific block device options for blkverify. +# +# @test: block device to be tested +# +# @raw: raw image used for verification +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsBlkverify', + 'data': { 'test': 'BlockdevRef', + 'raw': 'BlockdevRef' } } + +## # @BlockdevOptions # # Options for creating a block device. @@ -4224,10 +4375,8 @@ # TODO sheepdog: Wait for structured options # TODO ssh: Should take InetSocketAddress for 'host'? 'vvfat': 'BlockdevOptionsVVFAT', - -# TODO blkdebug: Wait for structured options -# TODO blkverify: Wait for structured options - + 'blkdebug': 'BlockdevOptionsBlkdebug', + 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', 'cow': 'BlockdevOptionsGenericCOWFormat', diff --git a/qemu-doc.texi b/qemu-doc.texi index 4e9c6e9b6e..ce61f30d6e 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -536,11 +536,11 @@ support of multiple VM snapshots. Supported options: @table @code @item compat -Determines the qcow2 version to use. @code{compat=0.10} uses the traditional -image format that can be read by any QEMU since 0.10 (this is the default). +Determines the qcow2 version to use. @code{compat=0.10} uses the +traditional image format that can be read by any QEMU since 0.10. @code{compat=1.1} enables image format extensions that only QEMU 1.1 and -newer understand. Amongst others, this includes zero clusters, which allow -efficient copy-on-read for sparse images. +newer understand (this is the default). Amongst others, this includes +zero clusters, which allow efficient copy-on-read for sparse images. @item backing_file File name of a base image (see @option{create} subcommand) diff --git a/qemu-img.texi b/qemu-img.texi index 1bba91efde..526d56a458 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -57,7 +57,9 @@ indicates that target image must be compressed (qcow format only) @item -h with or without a command shows help and lists the supported formats @item -p -display progress bar (convert and rebase commands only) +display progress bar (compare, convert and rebase commands only). +If the @var{-p} option is not used for a command that supports it, the +progress is reported when the process receives a @code{SIGUSR1} signal. @item -q Quiet mode - do not print any output (except errors). There's no progress bar in case both @var{-q} and @var{-p} options are used. @@ -140,7 +142,12 @@ it doesn't need to be specified separately in this case. @item commit [-f @var{fmt}] [-t @var{cache}] @var{filename} -Commit the changes recorded in @var{filename} in its base image. +Commit the changes recorded in @var{filename} in its base image or backing file. +If the backing file is smaller than the snapshot, then the backing file will be +resized to be the same size as the snapshot. If the snapshot is smaller than +the backing file, the backing file will not be truncated. If you want the +backing file to match the size of the smaller snapshot, you can safely truncate +it yourself once the commit operation successfully completes. @item compare [-f @var{fmt}] [-F @var{fmt}] [-p] [-s] [-q] @var{filename1} @var{filename2} @@ -391,11 +398,11 @@ support of multiple VM snapshots. Supported options: @table @code @item compat -Determines the qcow2 version to use. @code{compat=0.10} uses the traditional -image format that can be read by any QEMU since 0.10 (this is the default). +Determines the qcow2 version to use. @code{compat=0.10} uses the +traditional image format that can be read by any QEMU since 0.10. @code{compat=1.1} enables image format extensions that only QEMU 1.1 and -newer understand. Amongst others, this includes zero clusters, which allow -efficient copy-on-read for sparse images. +newer understand (this is the default). Amongst others, this includes zero +clusters, which allow efficient copy-on-read for sparse images. @item backing_file File name of a base image (see @option{create} subcommand) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 85e4982bd8..f1de24c91c 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -12,6 +12,7 @@ #include "block/block_int.h" #include "block/qapi.h" #include "qemu/main-loop.h" +#include "qemu/timer.h" #define CMD_NOFILE_OK 0x01 @@ -94,6 +95,21 @@ static const cmdinfo_t *find_command(const char *cmd) return NULL; } +/* Invoke fn() for commands with a matching prefix */ +void qemuio_complete_command(const char *input, + void (*fn)(const char *cmd, void *opaque), + void *opaque) +{ + cmdinfo_t *ct; + size_t input_len = strlen(input); + + for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) { + if (strncmp(input, ct->name, input_len) == 0) { + fn(ct->name, opaque); + } + } +} + static char **breakline(char *input, int *count) { int c = 0; @@ -2038,6 +2054,46 @@ static const cmdinfo_t abort_cmd = { .oneline = "simulate a program crash using abort(3)", }; +static void sleep_cb(void *opaque) +{ + bool *expired = opaque; + *expired = true; +} + +static int sleep_f(BlockDriverState *bs, int argc, char **argv) +{ + char *endptr; + long ms; + struct QEMUTimer *timer; + bool expired = false; + + ms = strtol(argv[1], &endptr, 0); + if (ms < 0 || *endptr != '\0') { + printf("%s is not a valid number\n", argv[1]); + return 0; + } + + timer = timer_new_ns(QEMU_CLOCK_HOST, sleep_cb, &expired); + timer_mod(timer, qemu_clock_get_ns(QEMU_CLOCK_HOST) + SCALE_MS * ms); + + while (!expired) { + main_loop_wait(false); + } + + timer_free(timer); + + return 0; +} + +static const cmdinfo_t sleep_cmd = { + .name = "sleep", + .argmin = 1, + .argmax = 1, + .cfunc = sleep_f, + .flags = CMD_NOFILE_OK, + .oneline = "waits for the given value in milliseconds", +}; + static void help_oneline(const char *cmd, const cmdinfo_t *ct) { if (cmd) { @@ -2151,4 +2207,5 @@ static void __attribute((constructor)) init_qemuio_commands(void) qemuio_add_command(&resume_cmd); qemuio_add_command(&wait_break_cmd); qemuio_add_command(&abort_cmd); + qemuio_add_command(&sleep_cmd); } @@ -18,6 +18,7 @@ #include "qemu/main-loop.h" #include "qemu/option.h" #include "qemu/config-file.h" +#include "qemu/readline.h" #include "block/block_int.h" #include "trace/control.h" @@ -32,6 +33,8 @@ extern int qemuio_misalign; static int ncmdline; static char **cmdline; +static ReadLineState *readline_state; + static int close_f(BlockDriverState *bs, int argc, char **argv) { bdrv_unref(bs); @@ -56,7 +59,7 @@ static int openfile(char *name, int flags, int growable, QDict *opts) } if (growable) { - if (bdrv_file_open(&qemuio_bs, name, opts, flags, &local_err)) { + if (bdrv_file_open(&qemuio_bs, name, NULL, opts, flags, &local_err)) { fprintf(stderr, "%s: can't open device %s: %s\n", progname, name, error_get_pretty(local_err)); error_free(local_err); @@ -160,11 +163,13 @@ static int open_f(BlockDriverState *bs, int argc, char **argv) flags |= BDRV_O_RDWR; } - if (optind != argc - 1) { + if (optind == argc - 1) { + return openfile(argv[optind], flags, growable, opts); + } else if (optind == argc) { + return openfile(NULL, flags, growable, opts); + } else { return qemuio_command_usage(&open_cmd); } - - return openfile(argv[optind], flags, growable, opts); } static int quit_f(BlockDriverState *bs, int argc, char **argv) @@ -203,14 +208,6 @@ static void usage(const char *name) name); } - -#if defined(ENABLE_READLINE) -# include <readline/history.h> -# include <readline/readline.h> -#elif defined(ENABLE_EDITLINE) -# include <histedit.h> -#endif - static char *get_prompt(void) { static char prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ]; @@ -222,52 +219,53 @@ static char *get_prompt(void) return prompt; } -#if defined(ENABLE_READLINE) -static char *fetchline(void) +static void readline_printf_func(void *opaque, const char *fmt, ...) { - char *line = readline(get_prompt()); - if (line && *line) { - add_history(line); - } - return line; + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); } -#elif defined(ENABLE_EDITLINE) -static char *el_get_prompt(EditLine *e) + +static void readline_flush_func(void *opaque) { - return get_prompt(); + fflush(stdout); } -static char *fetchline(void) +static void readline_func(void *opaque, const char *str, void *readline_opaque) { - static EditLine *el; - static History *hist; - HistEvent hevent; - char *line; - int count; - - if (!el) { - hist = history_init(); - history(hist, &hevent, H_SETSIZE, 100); - el = el_init(progname, stdin, stdout, stderr); - el_source(el, NULL); - el_set(el, EL_SIGNAL, 1); - el_set(el, EL_PROMPT, el_get_prompt); - el_set(el, EL_HIST, history, (const char *)hist); - } - line = strdup(el_gets(el, &count)); - if (line) { - if (count > 0) { - line[count-1] = '\0'; - } - if (*line) { - history(hist, &hevent, H_ENTER, line); + char **line = readline_opaque; + *line = g_strdup(str); +} + +static void completion_match(const char *cmd, void *opaque) +{ + readline_add_completion(readline_state, cmd); +} + +static void readline_completion_func(void *opaque, const char *str) +{ + readline_set_completion_index(readline_state, strlen(str)); + qemuio_complete_command(str, completion_match, NULL); +} + +static char *fetchline_readline(void) +{ + char *line = NULL; + + readline_start(readline_state, get_prompt(), 0, readline_func, &line); + while (!line) { + int ch = getchar(); + if (ch == EOF) { + break; } + readline_handle_byte(readline_state, ch); } return line; } -#else -# define MAXREADLINESZ 1024 -static char *fetchline(void) + +#define MAXREADLINESZ 1024 +static char *fetchline_fgets(void) { char *p, *line = g_malloc(MAXREADLINESZ); @@ -283,7 +281,15 @@ static char *fetchline(void) return line; } -#endif + +static char *fetchline(void) +{ + if (readline_state) { + return fetchline_readline(); + } else { + return fetchline_fgets(); + } +} static void prep_fetchline(void *opaque) { @@ -339,6 +345,11 @@ static void add_user_command(char *optarg) cmdline[ncmdline-1] = optarg; } +static void reenable_tty_echo(void) +{ + qemu_set_tty_echo(STDIN_FILENO, true); +} + int main(int argc, char **argv) { int readonly = 0; @@ -435,6 +446,15 @@ int main(int argc, char **argv) qemuio_add_command(&open_cmd); qemuio_add_command(&close_cmd); + if (isatty(STDIN_FILENO)) { + readline_state = readline_init(readline_printf_func, + readline_flush_func, + NULL, + readline_completion_func); + qemu_set_tty_echo(STDIN_FILENO, false); + atexit(reenable_tty_echo); + } + /* open the device */ if (!readonly) { flags |= BDRV_O_RDWR; @@ -453,5 +473,6 @@ int main(int argc, char **argv) if (qemuio_bs) { bdrv_unref(qemuio_bs); } + g_free(readline_state); return 0; } diff --git a/qemu-seccomp.c b/qemu-seccomp.c index b7c125364c..caa926ebf2 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -220,7 +220,12 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { { SCMP_SYS(io_cancel), 241 }, { SCMP_SYS(io_setup), 241 }, { SCMP_SYS(io_destroy), 241 }, - { SCMP_SYS(arch_prctl), 240 } + { SCMP_SYS(arch_prctl), 240 }, + { SCMP_SYS(mkdir), 240 }, + { SCMP_SYS(fchmod), 240 }, + { SCMP_SYS(shmget), 240 }, + { SCMP_SYS(shmat), 240 }, + { SCMP_SYS(shmdt), 240 } }; int seccomp_start(void) diff --git a/qmp-commands.hx b/qmp-commands.hx index 02cc815bc5..cce6b81da4 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -931,7 +931,7 @@ EQMP { .name = "block_resize", - .args_type = "device:B,size:o", + .args_type = "device:s?,node-name:s?,size:o", .mhandler.cmd_new = qmp_marshal_input_block_resize, }, @@ -944,6 +944,7 @@ Resize a block image while a guest is running. Arguments: - "device": the device's ID, must be unique (json-string) +- "node-name": the node name in the block driver state graph (json-string) - "size": new size Example: @@ -965,6 +966,45 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_commit, }, +SQMP +block-commit +------------ + +Live commit of data from overlay image nodes into backing nodes - i.e., writes +data between 'top' and 'base' into 'base'. + +Arguments: + +- "device": The device's ID, must be unique (json-string) +- "base": The file name of the backing image to write data into. + If not specified, this is the deepest backing image + (json-string, optional) +- "top": The file name of the backing image within the image chain, + which contains the topmost data to be committed down. + + If top == base, that is an error. + If top == active, the job will not be completed by itself, + user needs to complete the job with the block-job-complete + command after getting the ready event. (Since 2.0) + + If the base image is smaller than top, then the base image + will be resized to be the same size as top. If top is + smaller than the base image, the base will not be + truncated. If you want the base image size to match the + size of the smaller top, you can safely truncate it + yourself once the commit operation successfully completes. + (json-string) +- "speed": the maximum speed, in bytes per second (json-int, optional) + + +Example: + +-> { "execute": "block-commit", "arguments": { "device": "virtio0", + "top": "/tmp/snap1.qcow2" } } +<- { "return": {} } + +EQMP + { .name = "drive-backup", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," @@ -1088,7 +1128,9 @@ actions array: - "data": a dictionary. The contents depend on the value of "type". When "type" is "blockdev-snapshot-sync": - "device": device name to snapshot (json-string) + - "node-name": graph node name to snapshot (json-string) - "snapshot-file": name of new image file (json-string) + - "snapshot-node-name": graph node name of the new snapshot (json-string) - "format": format of new image (json-string, optional) - "mode": whether and how QEMU should create the snapshot file (NewImageMode, optional, default "absolute-paths") @@ -1103,6 +1145,11 @@ Example: { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0", "snapshot-file": "/some/place/my-image", "format": "qcow2" } }, + { 'type': 'blockdev-snapshot-sync', 'data' : { "node-name": "myfile", + "snapshot-file": "/some/place/my-image2", + "snapshot-node-name": "node3432", + "mode": "existing", + "format": "qcow2" } }, { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1", "snapshot-file": "/some/place/my-image2", "mode": "existing", @@ -1116,7 +1163,7 @@ EQMP { .name = "blockdev-snapshot-sync", - .args_type = "device:B,snapshot-file:s,format:s?,mode:s?", + .args_type = "device:s?,node-name:s?,snapshot-file:s,snapshot-node-name:s?,format:s?,mode:s?", .mhandler.cmd_new = qmp_marshal_input_blockdev_snapshot_sync, }, @@ -1133,7 +1180,9 @@ snapshot image, default is qcow2. Arguments: - "device": device name to snapshot (json-string) +- "node-name": graph node name to snapshot (json-string) - "snapshot-file": name of new image file (json-string) +- "snapshot-node-name": graph node name of the new snapshot (json-string) - "mode": whether and how QEMU should create the snapshot file (NewImageMode, optional, default "absolute-paths") - "format": format of new image (json-string, optional) @@ -1503,7 +1552,7 @@ EQMP { .name = "block_passwd", - .args_type = "device:B,password:s", + .args_type = "device:s?,node-name:s?,password:s", .mhandler.cmd_new = qmp_marshal_input_block_passwd, }, @@ -1516,6 +1565,7 @@ Set the password of encrypted block devices. Arguments: - "device": device name (json-string) +- "node-name": name in the block driver state graph (json-string) - "password": password (json-string) Example: @@ -3346,3 +3396,64 @@ Example (2): <- { "return": {} } EQMP + + { + .name = "query-named-block-nodes", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_named_block_nodes, + }, + +SQMP +@query-named-block-nodes +------------------------ + +Return a list of BlockDeviceInfo for all the named block driver nodes + +Example: + +-> { "execute": "query-named-block-nodes" } +<- { "return": [ { "ro":false, + "drv":"qcow2", + "encrypted":false, + "file":"disks/test.qcow2", + "node-name": "my-node", + "backing_file_depth":1, + "bps":1000000, + "bps_rd":0, + "bps_wr":0, + "iops":1000000, + "iops_rd":0, + "iops_wr":0, + "bps_max": 8000000, + "bps_rd_max": 0, + "bps_wr_max": 0, + "iops_max": 0, + "iops_rd_max": 0, + "iops_wr_max": 0, + "iops_size": 0, + "image":{ + "filename":"disks/test.qcow2", + "format":"qcow2", + "virtual-size":2048000, + "backing_file":"base.qcow2", + "full-backing-filename":"disks/base.qcow2", + "backing-filename-format:"qcow2", + "snapshots":[ + { + "id": "1", + "name": "snapshot1", + "vm-state-size": 0, + "date-sec": 10000200, + "date-nsec": 12, + "vm-clock-sec": 206, + "vm-clock-nsec": 30 + } + ], + "backing-image":{ + "filename":"disks/base.qcow2", + "format":"qcow2", + "virtual-size":2048000 + } + } }Â ] } + +EQMP diff --git a/qobject/qdict.c b/qobject/qdict.c index 17e14f08b1..a3924f24bd 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -477,7 +477,43 @@ static void qdict_destroy_obj(QObject *obj) g_free(qdict); } -static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix) +static void qdict_flatten_qdict(QDict *qdict, QDict *target, + const char *prefix); + +static void qdict_flatten_qlist(QList *qlist, QDict *target, const char *prefix) +{ + QObject *value; + const QListEntry *entry; + char *new_key; + int i; + + /* This function is never called with prefix == NULL, i.e., it is always + * called from within qdict_flatten_q(list|dict)(). Therefore, it does not + * need to remove list entries during the iteration (the whole list will be + * deleted eventually anyway from qdict_flatten_qdict()). */ + assert(prefix); + + entry = qlist_first(qlist); + + for (i = 0; entry; entry = qlist_next(entry), i++) { + value = qlist_entry_obj(entry); + new_key = g_strdup_printf("%s.%i", prefix, i); + + if (qobject_type(value) == QTYPE_QDICT) { + qdict_flatten_qdict(qobject_to_qdict(value), target, new_key); + } else if (qobject_type(value) == QTYPE_QLIST) { + qdict_flatten_qlist(qobject_to_qlist(value), target, new_key); + } else { + /* All other types are moved to the target unchanged. */ + qobject_incref(value); + qdict_put_obj(target, new_key, value); + } + + g_free(new_key); + } +} + +static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) { QObject *value; const QDictEntry *entry, *next; @@ -500,8 +536,12 @@ static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix) if (qobject_type(value) == QTYPE_QDICT) { /* Entries of QDicts are processed recursively, the QDict object * itself disappears. */ - qdict_do_flatten(qobject_to_qdict(value), target, - new_key ? new_key : entry->key); + qdict_flatten_qdict(qobject_to_qdict(value), target, + new_key ? new_key : entry->key); + delete = true; + } else if (qobject_type(value) == QTYPE_QLIST) { + qdict_flatten_qlist(qobject_to_qlist(value), target, + new_key ? new_key : entry->key); delete = true; } else if (prefix) { /* All other objects are moved to the target unchanged. */ @@ -526,12 +566,14 @@ static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix) /** * qdict_flatten(): For each nested QDict with key x, all fields with key y - * are moved to this QDict and their key is renamed to "x.y". This operation - * is applied recursively for nested QDicts. + * are moved to this QDict and their key is renamed to "x.y". For each nested + * QList with key x, the field at index y is moved to this QDict with the key + * "x.y" (i.e., the reverse of what qdict_array_split() does). + * This operation is applied recursively for nested QDicts and QLists. */ void qdict_flatten(QDict *qdict) { - qdict_do_flatten(qdict, qdict, NULL); + qdict_flatten_qdict(qdict, qdict, NULL); } /* extract all the src QDict entries starting by start into dst */ @@ -554,3 +596,40 @@ void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start) entry = next; } } + +/** + * qdict_array_split(): This function moves array-like elements of a QDict into + * a new QList of QDicts. Every entry in the original QDict with a key prefixed + * "%u.", where %u designates an unsigned integer starting at 0 and + * incrementally counting up, will be moved to a new QDict at index %u in the + * output QList with the key prefix removed. The function terminates when there + * is no entry in the QDict with a prefix directly (incrementally) following the + * last one. + * Example: {"0.a": 42, "0.b": 23, "1.x": 0, "3.y": 1, "o.o": 7} + * (or {"1.x": 0, "3.y": 1, "0.a": 42, "o.o": 7, "0.b": 23}) + * => [{"a": 42, "b": 23}, {"x": 0}] + * and {"3.y": 1, "o.o": 7} (remainder of the old QDict) + */ +void qdict_array_split(QDict *src, QList **dst) +{ + unsigned i; + + *dst = qlist_new(); + + for (i = 0; i < UINT_MAX; i++) { + QDict *subqdict; + char prefix[32]; + size_t snprintf_ret; + + snprintf_ret = snprintf(prefix, 32, "%u.", i); + assert(snprintf_ret < 32); + + qdict_extract_subqdict(src, &subqdict, prefix); + if (!qdict_size(subqdict)) { + QDECREF(subqdict); + break; + } + + qlist_append_obj(*dst, QOBJECT(subqdict)); + } +} diff --git a/scripts/create_config b/scripts/create_config index b1adbf5897..06f5316d9d 100755 --- a/scripts/create_config +++ b/scripts/create_config @@ -26,6 +26,10 @@ case $line in # save for the next definitions prefix=${line#*=} ;; + IASL=*) # iasl executable + value=${line#*=} + echo "#define CONFIG_IASL $value" + ;; CONFIG_AUDIO_DRIVERS=*) drivers=${line#*=} echo "#define CONFIG_AUDIO_DRIVERS \\" diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py new file mode 100644 index 0000000000..1ed8b67883 --- /dev/null +++ b/scripts/dump-guest-memory.py @@ -0,0 +1,339 @@ +# This python script adds a new gdb command, "dump-guest-memory". It +# should be loaded with "source dump-guest-memory.py" at the (gdb) +# prompt. +# +# Copyright (C) 2013, Red Hat, Inc. +# +# Authors: +# Laszlo Ersek <lersek@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. See +# the COPYING file in the top-level directory. +# +# The leading docstring doesn't have idiomatic Python formatting. It is +# printed by gdb's "help" command (the first line is printed in the +# "help data" summary), and it should match how other help texts look in +# gdb. + +import struct + +class DumpGuestMemory(gdb.Command): + """Extract guest vmcore from qemu process coredump. + +The sole argument is FILE, identifying the target file to write the +guest vmcore to. + +This GDB command reimplements the dump-guest-memory QMP command in +python, using the representation of guest memory as captured in the qemu +coredump. The qemu process that has been dumped must have had the +command line option "-machine dump-guest-core=on". + +For simplicity, the "paging", "begin" and "end" parameters of the QMP +command are not supported -- no attempt is made to get the guest's +internal paging structures (ie. paging=false is hard-wired), and guest +memory is always fully dumped. + +Only x86_64 guests are supported. + +The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are +not written to the vmcore. Preparing these would require context that is +only present in the KVM host kernel module when the guest is alive. A +fake ELF note is written instead, only to keep the ELF parser of "crash" +happy. + +Dependent on how busted the qemu process was at the time of the +coredump, this command might produce unpredictable results. If qemu +deliberately called abort(), or it was dumped in response to a signal at +a halfway fortunate point, then its coredump should be in reasonable +shape and this command should mostly work.""" + + TARGET_PAGE_SIZE = 0x1000 + TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 + + # Various ELF constants + EM_X86_64 = 62 # AMD x86-64 target machine + ELFDATA2LSB = 1 # little endian + ELFCLASS64 = 2 + ELFMAG = "\x7FELF" + EV_CURRENT = 1 + ET_CORE = 4 + PT_LOAD = 1 + PT_NOTE = 4 + + # Special value for e_phnum. This indicates that the real number of + # program headers is too large to fit into e_phnum. Instead the real + # value is in the field sh_info of section 0. + PN_XNUM = 0xFFFF + + # Format strings for packing and header size calculation. + ELF64_EHDR = ("4s" # e_ident/magic + "B" # e_ident/class + "B" # e_ident/data + "B" # e_ident/version + "B" # e_ident/osabi + "8s" # e_ident/pad + "H" # e_type + "H" # e_machine + "I" # e_version + "Q" # e_entry + "Q" # e_phoff + "Q" # e_shoff + "I" # e_flags + "H" # e_ehsize + "H" # e_phentsize + "H" # e_phnum + "H" # e_shentsize + "H" # e_shnum + "H" # e_shstrndx + ) + ELF64_PHDR = ("I" # p_type + "I" # p_flags + "Q" # p_offset + "Q" # p_vaddr + "Q" # p_paddr + "Q" # p_filesz + "Q" # p_memsz + "Q" # p_align + ) + + def __init__(self): + super(DumpGuestMemory, self).__init__("dump-guest-memory", + gdb.COMMAND_DATA, + gdb.COMPLETE_FILENAME) + self.uintptr_t = gdb.lookup_type("uintptr_t") + self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR) + self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR) + + def int128_get64(self, val): + assert (val["hi"] == 0) + return val["lo"] + + def qtailq_foreach(self, head, field_str): + var_p = head["tqh_first"] + while (var_p != 0): + var = var_p.dereference() + yield var + var_p = var[field_str]["tqe_next"] + + def qemu_get_ram_block(self, ram_addr): + ram_blocks = gdb.parse_and_eval("ram_list.blocks") + for block in self.qtailq_foreach(ram_blocks, "next"): + if (ram_addr - block["offset"] < block["length"]): + return block + raise gdb.GdbError("Bad ram offset %x" % ram_addr) + + def qemu_get_ram_ptr(self, ram_addr): + block = self.qemu_get_ram_block(ram_addr) + return block["host"] + (ram_addr - block["offset"]) + + def memory_region_get_ram_ptr(self, mr): + if (mr["alias"] != 0): + return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + + mr["alias_offset"]) + return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK) + + def guest_phys_blocks_init(self): + self.guest_phys_blocks = [] + + def guest_phys_blocks_append(self): + print "guest RAM blocks:" + print ("target_start target_end host_addr message " + "count") + print ("---------------- ---------------- ---------------- ------- " + "-----") + + current_map_p = gdb.parse_and_eval("address_space_memory.current_map") + current_map = current_map_p.dereference() + for cur in range(current_map["nr"]): + flat_range = (current_map["ranges"] + cur).dereference() + mr = flat_range["mr"].dereference() + + # we only care about RAM + if (not mr["ram"]): + continue + + section_size = self.int128_get64(flat_range["addr"]["size"]) + target_start = self.int128_get64(flat_range["addr"]["start"]) + target_end = target_start + section_size + host_addr = (self.memory_region_get_ram_ptr(mr) + + flat_range["offset_in_region"]) + predecessor = None + + # find continuity in guest physical address space + if (len(self.guest_phys_blocks) > 0): + predecessor = self.guest_phys_blocks[-1] + predecessor_size = (predecessor["target_end"] - + predecessor["target_start"]) + + # the memory API guarantees monotonically increasing + # traversal + assert (predecessor["target_end"] <= target_start) + + # we want continuity in both guest-physical and + # host-virtual memory + if (predecessor["target_end"] < target_start or + predecessor["host_addr"] + predecessor_size != host_addr): + predecessor = None + + if (predecessor is None): + # isolated mapping, add it to the list + self.guest_phys_blocks.append({"target_start": target_start, + "target_end" : target_end, + "host_addr" : host_addr}) + message = "added" + else: + # expand predecessor until @target_end; predecessor's + # start doesn't change + predecessor["target_end"] = target_end + message = "joined" + + print ("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(self.uintptr_t), + message, len(self.guest_phys_blocks))) + + def cpu_get_dump_info(self): + # We can't synchronize the registers with KVM post-mortem, and + # the bits in (first_x86_cpu->env.hflags) seem to be stale; they + # may not reflect long mode for example. Hence just assume the + # most common values. This also means that instruction pointer + # etc. will be bogus in the dump, but at least the RAM contents + # should be valid. + self.dump_info = {"d_machine": self.EM_X86_64, + "d_endian" : self.ELFDATA2LSB, + "d_class" : self.ELFCLASS64} + + def encode_elf64_ehdr_le(self): + return self.elf64_ehdr_le.pack( + self.ELFMAG, # e_ident/magic + self.dump_info["d_class"], # e_ident/class + self.dump_info["d_endian"], # e_ident/data + self.EV_CURRENT, # e_ident/version + 0, # e_ident/osabi + "", # e_ident/pad + self.ET_CORE, # e_type + self.dump_info["d_machine"], # e_machine + self.EV_CURRENT, # e_version + 0, # e_entry + self.elf64_ehdr_le.size, # e_phoff + 0, # e_shoff + 0, # e_flags + self.elf64_ehdr_le.size, # e_ehsize + self.elf64_phdr_le.size, # e_phentsize + self.phdr_num, # e_phnum + 0, # e_shentsize + 0, # e_shnum + 0 # e_shstrndx + ) + + def encode_elf64_note_le(self): + return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type + 0, # p_flags + (self.memory_offset - + len(self.note)), # p_offset + 0, # p_vaddr + 0, # p_paddr + len(self.note), # p_filesz + len(self.note), # p_memsz + 0 # p_align + ) + + def encode_elf64_load_le(self, offset, start_hwaddr, range_size): + return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type + 0, # p_flags + offset, # p_offset + 0, # p_vaddr + start_hwaddr, # p_paddr + range_size, # p_filesz + range_size, # p_memsz + 0 # p_align + ) + + def note_init(self, name, desc, type): + # name must include a trailing NUL + namesz = (len(name) + 1 + 3) / 4 * 4 + descsz = (len(desc) + 3) / 4 * 4 + fmt = ("<" # little endian + "I" # n_namesz + "I" # n_descsz + "I" # n_type + "%us" # name + "%us" # desc + % (namesz, descsz)) + self.note = struct.pack(fmt, + len(name) + 1, len(desc), type, name, desc) + + def dump_init(self): + self.guest_phys_blocks_init() + self.guest_phys_blocks_append() + self.cpu_get_dump_info() + # we have no way to retrieve the VCPU status from KVM + # post-mortem + self.note_init("NONE", "EMPTY", 0) + + # Account for PT_NOTE. + self.phdr_num = 1 + + # We should never reach PN_XNUM for paging=false dumps: there's + # just a handful of discontiguous ranges after merging. + self.phdr_num += len(self.guest_phys_blocks) + assert (self.phdr_num < self.PN_XNUM) + + # Calculate the ELF file offset where the memory dump commences: + # + # ELF header + # PT_NOTE + # PT_LOAD: 1 + # PT_LOAD: 2 + # ... + # PT_LOAD: len(self.guest_phys_blocks) + # ELF note + # memory dump + self.memory_offset = (self.elf64_ehdr_le.size + + self.elf64_phdr_le.size * self.phdr_num + + len(self.note)) + + def dump_begin(self, vmcore): + vmcore.write(self.encode_elf64_ehdr_le()) + vmcore.write(self.encode_elf64_note_le()) + running = self.memory_offset + for block in self.guest_phys_blocks: + range_size = block["target_end"] - block["target_start"] + vmcore.write(self.encode_elf64_load_le(running, + block["target_start"], + range_size)) + running += range_size + vmcore.write(self.note) + + def dump_iterate(self, vmcore): + qemu_core = gdb.inferiors()[0] + for block in self.guest_phys_blocks: + cur = block["host_addr"] + left = block["target_end"] - block["target_start"] + print ("dumping range at %016x for length %016x" % + (cur.cast(self.uintptr_t), left)) + while (left > 0): + chunk_size = min(self.TARGET_PAGE_SIZE, left) + chunk = qemu_core.read_memory(cur, chunk_size) + vmcore.write(chunk) + cur += chunk_size + left -= chunk_size + + def create_vmcore(self, filename): + vmcore = open(filename, "wb") + self.dump_begin(vmcore) + self.dump_iterate(vmcore) + vmcore.close() + + def invoke(self, args, from_tty): + # Unwittingly pressing the Enter key after the command should + # not dump the same multi-gig coredump to the same file. + self.dont_repeat() + + argv = gdb.string_to_argv(args) + if (len(argv) != 1): + raise gdb.GdbError("usage: dump-guest-memory FILE") + + self.dump_init() + self.create_vmcore(argv[0]) + +DumpGuestMemory() diff --git a/scripts/qapi.py b/scripts/qapi.py index 750e9fb552..9b3de4c7c3 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -247,7 +247,7 @@ def c_var(name, protect=True): 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq']) # namespace pollution: - polluted_words = set(['unix']) + polluted_words = set(['unix', 'errno']) if protect and (name in c89_words | c99_words | c11_words | gcc_words | cpp_words | polluted_words): return "q_" + name return name.replace('-', '_').lstrip("*") diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py index 37ef599324..3dde372e46 100644 --- a/scripts/tracetool/backend/simple.py +++ b/scripts/tracetool/backend/simple.py @@ -56,7 +56,7 @@ def c(events): out('', - ' TraceEvent *eventp = trace_event_id(%(event_id)s);', + ' TraceEvent *eventp = trace_event_id(%(event_enum)s);', ' bool _state = trace_event_get_state_dynamic(eventp);', ' if (!_state) {', ' return;', @@ -65,6 +65,7 @@ def c(events): ' if (trace_record_start(&rec, %(event_id)s, %(size_str)s)) {', ' return; /* Trace Buffer Full, Event Dropped ! */', ' }', + event_enum = 'TRACE_' + event.name.upper(), event_id = num, size_str = sizestr, ) @@ -93,9 +94,6 @@ def c(events): def h(events): - out('#include "trace/simple.h"', - '') - for event in events: out('void trace_%(name)s(%(args)s);', name = event.name, diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0eea8c7160..e6f7eaf5cd 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -336,6 +336,10 @@ typedef struct ExtSaveArea { static const ExtSaveArea ext_save_areas[] = { [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX, .offset = 0x240, .size = 0x100 }, + [3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, + .offset = 0x3c0, .size = 0x40 }, + [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, + .offset = 0x400, .size = 0x10 }, }; const char *get_register_name_32(unsigned int reg) @@ -2461,6 +2465,9 @@ static void x86_cpu_reset(CPUState *s) cpu_breakpoint_remove_all(env, BP_CPU); cpu_watchpoint_remove_all(env, BP_CPU); + env->tsc_adjust = 0; + env->tsc = 0; + #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ if (s->cpu_index == 0) { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 1d94a9dbd7..1fcbc82698 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -380,9 +380,14 @@ #define MSR_VM_HSAVE_PA 0xc0010117 -#define XSTATE_FP 1 -#define XSTATE_SSE 2 -#define XSTATE_YMM 4 +#define MSR_IA32_BNDCFGS 0x00000d90 + +#define XSTATE_FP (1ULL << 0) +#define XSTATE_SSE (1ULL << 1) +#define XSTATE_YMM (1ULL << 2) +#define XSTATE_BNDREGS (1ULL << 3) +#define XSTATE_BNDCSR (1ULL << 4) + /* CPUID feature words */ typedef enum FeatureWord { @@ -545,6 +550,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_ERMS (1 << 9) #define CPUID_7_0_EBX_INVPCID (1 << 10) #define CPUID_7_0_EBX_RTM (1 << 11) +#define CPUID_7_0_EBX_MPX (1 << 14) #define CPUID_7_0_EBX_RDSEED (1 << 18) #define CPUID_7_0_EBX_ADX (1 << 19) #define CPUID_7_0_EBX_SMAP (1 << 20) @@ -695,6 +701,16 @@ typedef union { uint64_t q; } MMXReg; +typedef struct BNDReg { + uint64_t lb; + uint64_t ub; +} BNDReg; + +typedef struct BNDCSReg { + uint64_t cfgu; + uint64_t sts; +} BNDCSReg; + #ifdef HOST_WORDS_BIGENDIAN #define XMM_B(n) _b[15 - (n)] #define XMM_W(n) _w[7 - (n)] @@ -908,6 +924,9 @@ typedef struct CPUX86State { uint64_t xstate_bv; XMMReg ymmh_regs[CPU_NB_REGS]; + BNDReg bnd_regs[4]; + BNDCSReg bndcs_regs; + uint64_t msr_bndcfgs; uint64_t xcr0; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7522e98072..0a21c3085d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -69,6 +69,7 @@ static bool has_msr_feature_control; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; +static bool has_msr_bndcfgs; static bool has_msr_kvm_steal_time; static int lm_capable_kernel; @@ -772,6 +773,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_misc_enable = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_BNDCFGS) { + has_msr_bndcfgs = true; + continue; + } } } @@ -975,6 +980,8 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_XMM_SPACE 40 #define XSAVE_XSTATE_BV 128 #define XSAVE_YMMH_SPACE 144 +#define XSAVE_BNDREGS 240 +#define XSAVE_BNDCSR 256 static int kvm_put_xsave(X86CPU *cpu) { @@ -1007,6 +1014,10 @@ static int kvm_put_xsave(X86CPU *cpu) *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv; memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs, sizeof env->ymmh_regs); + memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs, + sizeof env->bnd_regs); + memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, + sizeof(env->bndcs_regs)); r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; } @@ -1104,6 +1115,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu) return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); } +/* + * Provide a separate write service for the feature control MSR in order to + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done + * before writing any other state because forcibly leaving nested mode + * invalidates the VCPU state. + */ +static int kvm_put_msr_feature_control(X86CPU *cpu) +{ + struct { + struct kvm_msrs info; + struct kvm_msr_entry entry; + } msr_data; + + kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL, + cpu->env.msr_ia32_feature_control); + msr_data.info.nmsrs = 1; + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); +} + static int kvm_put_msrs(X86CPU *cpu, int level) { CPUX86State *env = &cpu->env; @@ -1131,6 +1161,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE, env->msr_ia32_misc_enable); } + if (has_msr_bndcfgs) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1139,22 +1172,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); } #endif - if (level == KVM_PUT_FULL_STATE) { - /* - * KVM is yet unable to synchronize TSC values of multiple VCPUs on - * writeback. Until this is fixed, we only write the offset to SMP - * guests after migration, desynchronizing the VCPUs, but avoiding - * huge jump-backs that would occur without any writeback at all. - */ - if (smp_cpus == 1 || env->tsc != 0) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); - } - } /* * The following MSRs have side effects on the guest or are too heavy * for normal writeback. Limit them to reset or full state updates. */ if (level >= KVM_PUT_RESET_STATE) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); @@ -1204,10 +1227,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_vapic) { kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0); } - if (has_msr_feature_control) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL, - env->msr_ia32_feature_control); - } + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ } if (env->mcg_cap) { int i; @@ -1289,6 +1311,10 @@ static int kvm_get_xsave(X86CPU *cpu) env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV]; memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE], sizeof env->ymmh_regs); + memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS], + sizeof env->bnd_regs); + memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], + sizeof(env->bndcs_regs)); return 0; } @@ -1435,6 +1461,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_feature_control) { msrs[n++].index = MSR_IA32_FEATURE_CONTROL; } + if (has_msr_bndcfgs) { + msrs[n++].index = MSR_IA32_BNDCFGS; + } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1550,6 +1579,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_FEATURE_CONTROL: env->msr_ia32_feature_control = msrs[i].data; break; + case MSR_IA32_BNDCFGS: + env->msr_bndcfgs = msrs[i].data; + break; default: if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { @@ -1799,6 +1831,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) { + ret = kvm_put_msr_feature_control(x86_cpu); + if (ret < 0) { + return ret; + } + } + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret; diff --git a/target-i386/machine.c b/target-i386/machine.c index e568da2ba4..2de196428d 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -63,6 +63,21 @@ static const VMStateDescription vmstate_ymmh_reg = { #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg) +static const VMStateDescription vmstate_bnd_regs = { + .name = "bnd_regs", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(lb, BNDReg), + VMSTATE_UINT64(ub, BNDReg), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_BND_REGS(_field, _state, _n) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_bnd_regs, BNDReg) + static const VMStateDescription vmstate_mtrr_var = { .name = "mtrr_var", .version_id = 1, @@ -506,6 +521,39 @@ static const VMStateDescription vmstate_msr_architectural_pmu = { } }; +static bool mpx_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (env->bnd_regs[i].lb || env->bnd_regs[i].ub) { + return true; + } + } + + if (env->bndcs_regs.cfgu || env->bndcs_regs.sts) { + return true; + } + + return !!env->msr_bndcfgs; +} + +static const VMStateDescription vmstate_mpx = { + .name = "cpu/mpx", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField[]) { + VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4), + VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU), + VMSTATE_UINT64(env.bndcs_regs.sts, X86CPU), + VMSTATE_UINT64(env.msr_bndcfgs, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -638,6 +686,9 @@ const VMStateDescription vmstate_x86_cpu = { .vmsd = &vmstate_msr_architectural_pmu, .needed = pmu_enable_needed, } , { + .vmsd = &vmstate_mpx, + .needed = mpx_needed, + } , { /* empty */ } } diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 495b901080..5d4cf9386e 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -99,18 +99,31 @@ static const int tcg_target_call_oarg_regs[] = { # define TCG_REG_L1 TCG_REG_EDX #endif +/* The host compiler should supply <cpuid.h> to enable runtime features + detection, as we're not going to go so far as our own inline assembly. + If not available, default values will be assumed. */ +#if defined(CONFIG_CPUID_H) +#include <cpuid.h> +#endif + /* For 32-bit, we are going to attempt to determine at runtime whether cmov - is available. However, the host compiler must supply <cpuid.h>, as we're - not going to go so far as our own inline assembly. */ + is available. */ #if TCG_TARGET_REG_BITS == 64 # define have_cmov 1 #elif defined(CONFIG_CPUID_H) -#include <cpuid.h> static bool have_cmov; #else # define have_cmov 0 #endif +/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are + going to attempt to determine at runtime whether movbe is available. */ +#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE) +static bool have_movbe; +#else +# define have_movbe 0 +#endif + static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, @@ -240,13 +253,14 @@ static inline int tcg_target_const_match(tcg_target_long val, #endif #define P_EXT 0x100 /* 0x0f opcode prefix */ -#define P_DATA16 0x200 /* 0x66 opcode prefix */ +#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ +#define P_DATA16 0x400 /* 0x66 opcode prefix */ #if TCG_TARGET_REG_BITS == 64 -# define P_ADDR32 0x400 /* 0x67 opcode prefix */ -# define P_REXW 0x800 /* Set REX.W = 1 */ -# define P_REXB_R 0x1000 /* REG field as byte register */ -# define P_REXB_RM 0x2000 /* R/M field as byte register */ -# define P_GS 0x4000 /* gs segment override */ +# define P_ADDR32 0x800 /* 0x67 opcode prefix */ +# define P_REXW 0x1000 /* Set REX.W = 1 */ +# define P_REXB_R 0x2000 /* REG field as byte register */ +# define P_REXB_RM 0x4000 /* R/M field as byte register */ +# define P_GS 0x8000 /* gs segment override */ #else # define P_ADDR32 0 # define P_REXW 0 @@ -279,6 +293,8 @@ static inline int tcg_target_const_match(tcg_target_long val, #define OPC_MOVB_EvIz (0xc6) #define OPC_MOVL_EvIz (0xc7) #define OPC_MOVL_Iv (0xb8) +#define OPC_MOVBE_GyMy (0xf0 | P_EXT38) +#define OPC_MOVBE_MyGy (0xf1 | P_EXT38) #define OPC_MOVSBL (0xbe | P_EXT) #define OPC_MOVSWL (0xbf | P_EXT) #define OPC_MOVSLQ (0x63 | P_REXW) @@ -381,7 +397,7 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) } rex = 0; - rex |= (opc & P_REXW) >> 8; /* REX.W */ + rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ rex |= (r & 8) >> 1; /* REX.R */ rex |= (x & 8) >> 2; /* REX.X */ rex |= (rm & 8) >> 3; /* REX.B */ @@ -398,9 +414,13 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) tcg_out8(s, (uint8_t)(rex | 0x40)); } - if (opc & P_EXT) { + if (opc & (P_EXT | P_EXT38)) { tcg_out8(s, 0x0f); + if (opc & P_EXT38) { + tcg_out8(s, 0x38); + } } + tcg_out8(s, opc); } #else @@ -409,8 +429,11 @@ static void tcg_out_opc(TCGContext *s, int opc) if (opc & P_DATA16) { tcg_out8(s, 0x66); } - if (opc & P_EXT) { + if (opc & (P_EXT | P_EXT38)) { tcg_out8(s, 0x0f); + if (opc & P_EXT38) { + tcg_out8(s, 0x38); + } } tcg_out8(s, opc); } @@ -1336,7 +1359,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg base, intptr_t ofs, int seg, TCGMemOp memop) { - const TCGMemOp bswap = memop & MO_BSWAP; + const TCGMemOp real_bswap = memop & MO_BSWAP; + TCGMemOp bswap = real_bswap; + int movop = OPC_MOVL_GvEv; + + if (have_movbe && real_bswap) { + bswap = 0; + movop = OPC_MOVBE_GyMy; + } switch (memop & MO_SSIZE) { case MO_UB: @@ -1347,14 +1377,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, break; case MO_UW: tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); - if (bswap) { + if (real_bswap) { tcg_out_rolw_8(s, datalo); } break; case MO_SW: - if (bswap) { - tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); - tcg_out_rolw_8(s, datalo); + if (real_bswap) { + if (have_movbe) { + tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, + datalo, base, ofs); + } else { + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); + tcg_out_rolw_8(s, datalo); + } tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); } else { tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg, @@ -1362,16 +1397,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } break; case MO_UL: - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); if (bswap) { tcg_out_bswap32(s, datalo); } break; #if TCG_TARGET_REG_BITS == 64 case MO_SL: - if (bswap) { - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); - tcg_out_bswap32(s, datalo); + if (real_bswap) { + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } tcg_out_ext32s(s, datalo, datalo); } else { tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs); @@ -1380,27 +1417,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, #endif case MO_Q: if (TCG_TARGET_REG_BITS == 64) { - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg, - datalo, base, ofs); + tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); if (bswap) { tcg_out_bswap64(s, datalo); } } else { - if (bswap) { + if (real_bswap) { int t = datalo; datalo = datahi; datahi = t; } if (base != datalo) { - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, - datalo, base, ofs); - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, - datahi, base, ofs + 4); + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4); } else { - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, - datahi, base, ofs + 4); - tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, - datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4); + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); } if (bswap) { tcg_out_bswap32(s, datalo); @@ -1476,13 +1508,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg base, intptr_t ofs, int seg, TCGMemOp memop) { - const TCGMemOp bswap = memop & MO_BSWAP; - /* ??? Ideally we wouldn't need a scratch register. For user-only, we could perform the bswap twice to restore the original value instead of moving to the scratch. But as it is, the L constraint means that TCG_REG_L0 is definitely free here. */ const TCGReg scratch = TCG_REG_L0; + const TCGMemOp real_bswap = memop & MO_BSWAP; + TCGMemOp bswap = real_bswap; + int movop = OPC_MOVL_EvGv; + + if (have_movbe && real_bswap) { + bswap = 0; + movop = OPC_MOVBE_MyGy; + } switch (memop & MO_SIZE) { case MO_8: @@ -1501,8 +1539,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_rolw_8(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg, - datalo, base, ofs); + tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); break; case MO_32: if (bswap) { @@ -1510,7 +1547,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_bswap32(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); break; case MO_64: if (TCG_TARGET_REG_BITS == 64) { @@ -1519,8 +1556,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_bswap64(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg, - datalo, base, ofs); + tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); } else if (bswap) { tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); tcg_out_bswap32(s, scratch); @@ -1529,8 +1565,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_bswap32(s, scratch); tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); } else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4); + if (real_bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); } break; default: @@ -1985,9 +2026,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_setcond_i32, { "q", "r", "ri" } }, { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, -#if TCG_TARGET_HAS_movcond_i32 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, -#endif { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, @@ -2157,13 +2196,23 @@ static void tcg_target_qemu_prologue(TCGContext *s) static void tcg_target_init(TCGContext *s) { - /* For 32-bit, 99% certainty that we're running on hardware that supports - cmov, but we still need to check. In case cmov is not available, we'll - use a small forward branch. */ -#ifndef have_cmov +#if !(defined(have_cmov) && defined(have_movbe)) { unsigned a, b, c, d; - have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV)); + int ret = __get_cpuid(1, &a, &b, &c, &d); + +# ifndef have_cmov + /* For 32-bit, 99% certainty that we're running on hardware that + supports cmov, but we still need to check. In case cmov is not + available, we'll use a small forward branch. */ + have_cmov = ret && (d & bit_CMOV); +# endif + +# ifndef have_movbe + /* MOVBE is only available on Intel Atom and Haswell CPUs, so we + need to probe for it. */ + have_movbe = ret && (c & bit_MOVBE); +# endif } #endif @@ -586,7 +586,7 @@ static void tcg_temp_free_internal(int idx) assert(ts->temp_allocated != 0); ts->temp_allocated = 0; - k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0); + k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); set_bit(idx, s->free_temps[k].l); } diff --git a/tests/Makefile b/tests/Makefile index 0aaf657be5..fd36eee641 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -151,7 +151,7 @@ tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a tests/check-qlist$(EXESUF): tests/check-qlist.o libqemuutil.a tests/check-qfloat$(EXESUF): tests/check-qfloat.o libqemuutil.a tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a -tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a +tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a libqemustub.a tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a diff --git a/tests/acpi-test-data/pc/APIC b/tests/acpi-test-data/pc/APIC Binary files differnew file mode 100644 index 0000000000..84509e0ae4 --- /dev/null +++ b/tests/acpi-test-data/pc/APIC diff --git a/tests/acpi-test-data/pc/DSDT b/tests/acpi-test-data/pc/DSDT Binary files differnew file mode 100644 index 0000000000..fbf1c3e6e8 --- /dev/null +++ b/tests/acpi-test-data/pc/DSDT diff --git a/tests/acpi-test-data/pc/FACP b/tests/acpi-test-data/pc/FACP Binary files differnew file mode 100644 index 0000000000..0639999ed1 --- /dev/null +++ b/tests/acpi-test-data/pc/FACP diff --git a/tests/acpi-test-data/pc/FACS b/tests/acpi-test-data/pc/FACS Binary files differnew file mode 100644 index 0000000000..fc67ecc407 --- /dev/null +++ b/tests/acpi-test-data/pc/FACS diff --git a/tests/acpi-test-data/pc/HPET b/tests/acpi-test-data/pc/HPET Binary files differnew file mode 100644 index 0000000000..df689b8f99 --- /dev/null +++ b/tests/acpi-test-data/pc/HPET diff --git a/tests/acpi-test-data/pc/SSDT b/tests/acpi-test-data/pc/SSDT Binary files differnew file mode 100644 index 0000000000..a51c68e21b --- /dev/null +++ b/tests/acpi-test-data/pc/SSDT diff --git a/tests/acpi-test-data/q35/APIC b/tests/acpi-test-data/q35/APIC Binary files differnew file mode 100644 index 0000000000..84509e0ae4 --- /dev/null +++ b/tests/acpi-test-data/q35/APIC diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT Binary files differnew file mode 100644 index 0000000000..5086b839a6 --- /dev/null +++ b/tests/acpi-test-data/q35/DSDT diff --git a/tests/acpi-test-data/q35/FACP b/tests/acpi-test-data/q35/FACP Binary files differnew file mode 100644 index 0000000000..19f3ac3ce6 --- /dev/null +++ b/tests/acpi-test-data/q35/FACP diff --git a/tests/acpi-test-data/q35/FACS b/tests/acpi-test-data/q35/FACS Binary files differnew file mode 100644 index 0000000000..fc67ecc407 --- /dev/null +++ b/tests/acpi-test-data/q35/FACS diff --git a/tests/acpi-test-data/q35/HPET b/tests/acpi-test-data/q35/HPET Binary files differnew file mode 100644 index 0000000000..df689b8f99 --- /dev/null +++ b/tests/acpi-test-data/q35/HPET diff --git a/tests/acpi-test-data/q35/MCFG b/tests/acpi-test-data/q35/MCFG Binary files differnew file mode 100644 index 0000000000..79ceb27a03 --- /dev/null +++ b/tests/acpi-test-data/q35/MCFG diff --git a/tests/acpi-test-data/q35/SSDT b/tests/acpi-test-data/q35/SSDT Binary files differnew file mode 100644 index 0000000000..9c6cad8b0b --- /dev/null +++ b/tests/acpi-test-data/q35/SSDT diff --git a/tests/acpi-test-data/rebuild-expected-aml.sh b/tests/acpi-test-data/rebuild-expected-aml.sh new file mode 100755 index 0000000000..ab98498884 --- /dev/null +++ b/tests/acpi-test-data/rebuild-expected-aml.sh @@ -0,0 +1,36 @@ +#! /bin/bash + +# +# Rebuild expected AML files for acpi unit-test +# +# Copyright (c) 2013 Red Hat Inc. +# +# Authors: +# Marcel Apfelbaum <marcel.a@redhat.com> +# +# This work is licensed under the terms of the GNU GPLv2. +# See the COPYING.LIB file in the top-level directory. + +qemu= + +if [ -e x86_64-softmmu/qemu-system-x86_64 ]; then + qemu="x86_64-softmmu/qemu-system-x86_64" +elif [ -e i386-softmmu/qemu-system-i386 ]; then + qemu="i386-softmmu/qemu-system-i386" +else + echo "Run 'make' to build the qemu exectutable!" + echo "Run this script from the build directory." + exit 1; +fi + +if [ ! -e "tests/acpi-test" ]; then + echo "Test: acpi-test is required! Run make check before this script." + echo "Run this script from the build directory." + exit 1; +fi + +TEST_ACPI_REBUILD_AML=y QTEST_QEMU_BINARY=$qemu tests/acpi-test + +echo "The files were rebuilt and can be added to git." +echo "However, if new files were created, please copy them manually" \ + "to tests/acpi-test-data/pc/ or tests/acpi-test-data/q35/ ." diff --git a/tests/acpi-test.c b/tests/acpi-test.c index df1af83158..31f5359787 100644 --- a/tests/acpi-test.c +++ b/tests/acpi-test.c @@ -13,19 +13,32 @@ #include <string.h> #include <stdio.h> #include <glib.h> +#include <glib/gstdio.h> #include "qemu-common.h" #include "libqtest.h" #include "qemu/compiler.h" #include "hw/i386/acpi-defs.h" +#define MACHINE_PC "pc" +#define MACHINE_Q35 "q35" + +#define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" +#define ACPI_SSDT_SIGNATURE 0x54445353 /* SSDT */ + /* DSDT and SSDTs format */ typedef struct { AcpiTableHeader header; - uint8_t *aml; - int aml_len; -} AcpiSdtTable; + gchar *aml; /* aml bytecode from guest */ + gsize aml_len; + gchar *aml_file; + gchar *asl; /* asl code generated from aml */ + gsize asl_len; + gchar *asl_file; + bool asl_file_retain; /* do not delete the temp asl */ +} QEMU_PACKED AcpiSdtTable; typedef struct { + const char *machine; uint32_t rsdp_addr; AcpiRsdpDescriptor rsdp_table; AcpiRsdtDescriptorRev1 rsdt_table; @@ -33,8 +46,7 @@ typedef struct { AcpiFacsDescriptorRev1 facs_table; uint32_t *rsdt_tables_addr; int rsdt_tables_nr; - AcpiSdtTable dsdt_table; - GArray *ssdt_tables; + GArray *tables; } test_data; #define LOW(x) ((x) & 0xff) @@ -51,13 +63,13 @@ typedef struct { field = readb(addr); \ break; \ case 2: \ - field = le16_to_cpu(readw(addr)); \ + field = readw(addr); \ break; \ case 4: \ - field = le32_to_cpu(readl(addr)); \ + field = readl(addr); \ break; \ case 8: \ - field = le64_to_cpu(readq(addr)); \ + field = readq(addr); \ break; \ default: \ g_assert(false); \ @@ -91,8 +103,10 @@ typedef struct { /* Boot sector code: write SIGNATURE into memory, * then halt. + * Q35 machine requires a minimum 0x7e000 bytes disk. + * (bug or feature?) */ -static uint8_t boot_sector[0x200] = { +static uint8_t boot_sector[0x7e000] = { /* 7c00: mov $0xdead,%ax */ [0x00] = 0xb8, [0x01] = LOW(SIGNATURE), @@ -117,17 +131,45 @@ static uint8_t boot_sector[0x200] = { }; static const char *disk = "tests/acpi-test-disk.raw"; +static const char *data_dir = "tests/acpi-test-data"; +#ifdef CONFIG_IASL +static const char *iasl = stringify(CONFIG_IASL); +#else +static const char *iasl; +#endif static void free_test_data(test_data *data) { + AcpiSdtTable *temp; int i; - g_free(data->rsdt_tables_addr); - for (i = 0; i < data->ssdt_tables->len; ++i) { - g_free(g_array_index(data->ssdt_tables, AcpiSdtTable, i).aml); + if (data->rsdt_tables_addr) { + g_free(data->rsdt_tables_addr); + } + + for (i = 0; i < data->tables->len; ++i) { + temp = &g_array_index(data->tables, AcpiSdtTable, i); + if (temp->aml) { + g_free(temp->aml); + } + if (temp->aml_file) { + if (g_strstr_len(temp->aml_file, -1, "aml-")) { + unlink(temp->aml_file); + } + g_free(temp->aml_file); + } + if (temp->asl) { + g_free(temp->asl); + } + if (temp->asl_file) { + if (!temp->asl_file_retain) { + unlink(temp->asl_file); + } + g_free(temp->asl_file); + } } - g_array_free(data->ssdt_tables, false); - g_free(data->dsdt_table.aml); + + g_array_free(data->tables, false); } static uint8_t acpi_checksum(const uint8_t *data, int len) @@ -292,34 +334,219 @@ static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr) ACPI_READ_ARRAY_PTR(sdt_table->aml, sdt_table->aml_len, addr); checksum = acpi_checksum((uint8_t *)sdt_table, sizeof(AcpiTableHeader)) + - acpi_checksum(sdt_table->aml, sdt_table->aml_len); + acpi_checksum((uint8_t *)sdt_table->aml, sdt_table->aml_len); g_assert(!checksum); } static void test_acpi_dsdt_table(test_data *data) { - AcpiSdtTable *dsdt_table = &data->dsdt_table; + AcpiSdtTable dsdt_table; uint32_t addr = data->fadt_table.dsdt; - test_dst_table(dsdt_table, addr); - g_assert_cmphex(dsdt_table->header.signature, ==, ACPI_DSDT_SIGNATURE); + memset(&dsdt_table, 0, sizeof(dsdt_table)); + data->tables = g_array_new(false, true, sizeof(AcpiSdtTable)); + + test_dst_table(&dsdt_table, addr); + g_assert_cmphex(dsdt_table.header.signature, ==, ACPI_DSDT_SIGNATURE); + + /* Place DSDT first */ + g_array_append_val(data->tables, dsdt_table); } -static void test_acpi_ssdt_tables(test_data *data) +static void test_acpi_tables(test_data *data) { - GArray *ssdt_tables; - int ssdt_tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */ + int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */ int i; - ssdt_tables = g_array_sized_new(false, true, sizeof(AcpiSdtTable), - ssdt_tables_nr); - for (i = 0; i < ssdt_tables_nr; i++) { + for (i = 0; i < tables_nr; i++) { AcpiSdtTable ssdt_table; + + memset(&ssdt_table, 0 , sizeof(ssdt_table)); uint32_t addr = data->rsdt_tables_addr[i + 1]; /* fadt is first */ test_dst_table(&ssdt_table, addr); - g_array_append_val(ssdt_tables, ssdt_table); + g_array_append_val(data->tables, ssdt_table); + } +} + +static void dump_aml_files(test_data *data, bool rebuild) +{ + AcpiSdtTable *sdt; + GError *error = NULL; + gchar *aml_file = NULL; + gint fd; + ssize_t ret; + int i; + + for (i = 0; i < data->tables->len; ++i) { + sdt = &g_array_index(data->tables, AcpiSdtTable, i); + g_assert(sdt->aml); + + if (rebuild) { + aml_file = g_strdup_printf("%s/%s/%.4s", data_dir, data->machine, + (gchar *)&sdt->header.signature); + fd = g_open(aml_file, O_WRONLY|O_TRUNC|O_CREAT, + S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH); + } else { + fd = g_file_open_tmp("aml-XXXXXX", &sdt->aml_file, &error); + g_assert_no_error(error); + } + g_assert(fd >= 0); + + ret = qemu_write_full(fd, sdt, sizeof(AcpiTableHeader)); + g_assert(ret == sizeof(AcpiTableHeader)); + ret = qemu_write_full(fd, sdt->aml, sdt->aml_len); + g_assert(ret == sdt->aml_len); + + close(fd); + + if (aml_file) { + g_free(aml_file); + } + } +} + +static bool compare_signature(AcpiSdtTable *sdt, uint32_t signature) +{ + return sdt->header.signature == signature; +} + +static void load_asl(GArray *sdts, AcpiSdtTable *sdt) +{ + AcpiSdtTable *temp; + GError *error = NULL; + GString *command_line = g_string_new(iasl); + gint fd; + gchar *out, *out_err; + gboolean ret; + int i; + + fd = g_file_open_tmp("asl-XXXXXX.dsl", &sdt->asl_file, &error); + g_assert_no_error(error); + close(fd); + + /* build command line */ + g_string_append_printf(command_line, " -p %s ", sdt->asl_file); + if (compare_signature(sdt, ACPI_DSDT_SIGNATURE) || + compare_signature(sdt, ACPI_SSDT_SIGNATURE)) { + for (i = 0; i < sdts->len; ++i) { + temp = &g_array_index(sdts, AcpiSdtTable, i); + if (compare_signature(temp, ACPI_DSDT_SIGNATURE) || + compare_signature(temp, ACPI_SSDT_SIGNATURE)) { + g_string_append_printf(command_line, "-e %s ", temp->aml_file); + } + } } - data->ssdt_tables = ssdt_tables; + g_string_append_printf(command_line, "-d %s", sdt->aml_file); + + /* pass 'out' and 'out_err' in order to be redirected */ + g_spawn_command_line_sync(command_line->str, &out, &out_err, NULL, &error); + g_assert_no_error(error); + + ret = g_file_get_contents(sdt->asl_file, (gchar **)&sdt->asl, + &sdt->asl_len, &error); + g_assert(ret); + g_assert_no_error(error); + g_assert(sdt->asl_len); + + g_free(out); + g_free(out_err); + g_string_free(command_line, true); +} + +#define COMMENT_END "*/" +#define DEF_BLOCK "DefinitionBlock (" +#define BLOCK_NAME_END ".aml" + +static GString *normalize_asl(gchar *asl_code) +{ + GString *asl = g_string_new(asl_code); + gchar *comment, *block_name; + + /* strip comments (different generation days) */ + comment = g_strstr_len(asl->str, asl->len, COMMENT_END); + if (comment) { + asl = g_string_erase(asl, 0, comment + sizeof(COMMENT_END) - asl->str); + } + + /* strip def block name (it has file path in it) */ + if (g_str_has_prefix(asl->str, DEF_BLOCK)) { + block_name = g_strstr_len(asl->str, asl->len, BLOCK_NAME_END); + g_assert(block_name); + asl = g_string_erase(asl, 0, + block_name + sizeof(BLOCK_NAME_END) - asl->str); + } + + return asl; +} + +static GArray *load_expected_aml(test_data *data) +{ + int i; + AcpiSdtTable *sdt; + gchar *aml_file; + GError *error = NULL; + gboolean ret; + + GArray *exp_tables = g_array_new(false, true, sizeof(AcpiSdtTable)); + for (i = 0; i < data->tables->len; ++i) { + AcpiSdtTable exp_sdt; + sdt = &g_array_index(data->tables, AcpiSdtTable, i); + + memset(&exp_sdt, 0, sizeof(exp_sdt)); + exp_sdt.header.signature = sdt->header.signature; + + aml_file = g_strdup_printf("%s/%s/%.4s", data_dir, data->machine, + (gchar *)&exp_sdt.header.signature); + exp_sdt.aml_file = aml_file; + g_assert(g_file_test(aml_file, G_FILE_TEST_EXISTS)); + ret = g_file_get_contents(aml_file, &exp_sdt.aml, + &exp_sdt.aml_len, &error); + g_assert(ret); + g_assert_no_error(error); + g_assert(exp_sdt.aml); + g_assert(exp_sdt.aml_len); + + g_array_append_val(exp_tables, exp_sdt); + } + + return exp_tables; +} + +static void test_acpi_asl(test_data *data) +{ + int i; + AcpiSdtTable *sdt, *exp_sdt; + test_data exp_data; + + memset(&exp_data, 0, sizeof(exp_data)); + exp_data.tables = load_expected_aml(data); + dump_aml_files(data, false); + for (i = 0; i < data->tables->len; ++i) { + GString *asl, *exp_asl; + + sdt = &g_array_index(data->tables, AcpiSdtTable, i); + exp_sdt = &g_array_index(exp_data.tables, AcpiSdtTable, i); + + load_asl(data->tables, sdt); + asl = normalize_asl(sdt->asl); + + load_asl(exp_data.tables, exp_sdt); + exp_asl = normalize_asl(exp_sdt->asl); + + if (g_strcmp0(asl->str, exp_asl->str)) { + sdt->asl_file_retain = true; + exp_sdt->asl_file_retain = true; + fprintf(stderr, + "acpi-test: Warning! %.4s mismatch. " + "Orig asl: %s, expected asl %s.\n", + (gchar *)&exp_sdt->header.signature, + sdt->asl_file, exp_sdt->asl_file); + } + g_string_free(asl, true); + g_string_free(exp_asl, true); + } + + free_test_data(&exp_data); } static void test_acpi_one(const char *params, test_data *data) @@ -329,10 +556,14 @@ static void test_acpi_one(const char *params, test_data *data) uint8_t signature_high; uint16_t signature; int i; + const char *device = ""; + + if (!g_strcmp0(data->machine, MACHINE_Q35)) { + device = ",id=hd -device ide-hd,drive=hd"; + } - memset(data, 0, sizeof(*data)); - args = g_strdup_printf("-net none -display none %s %s", - params ? params : "", disk); + args = g_strdup_printf("-net none -display none %s -drive file=%s%s,", + params ? params : "", disk, device); qtest_start(args); /* Wait at most 1 minute */ @@ -360,7 +591,15 @@ static void test_acpi_one(const char *params, test_data *data) test_acpi_fadt_table(data); test_acpi_facs_table(data); test_acpi_dsdt_table(data); - test_acpi_ssdt_tables(data); + test_acpi_tables(data); + + if (iasl) { + if (getenv(ACPI_REBUILD_EXPECTED_AML)) { + dump_aml_files(data, true); + } else { + test_acpi_asl(data); + } + } qtest_quit(global_qtest); g_free(args); @@ -373,8 +612,14 @@ static void test_acpi_tcg(void) /* Supplying -machine accel argument overrides the default (qtest). * This is to make guest actually run. */ + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_PC; test_acpi_one("-machine accel=tcg", &data); + free_test_data(&data); + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_Q35; + test_acpi_one("-machine q35,accel=tcg", &data); free_test_data(&data); } diff --git a/tests/check-qdict.c b/tests/check-qdict.c index dc5f05a85f..7a7461b0b2 100644 --- a/tests/check-qdict.c +++ b/tests/check-qdict.c @@ -227,6 +227,160 @@ static void qdict_iterapi_test(void) QDECREF(tests_dict); } +static void qdict_flatten_test(void) +{ + QList *list1 = qlist_new(); + QList *list2 = qlist_new(); + QDict *dict1 = qdict_new(); + QDict *dict2 = qdict_new(); + QDict *dict3 = qdict_new(); + + /* + * Test the flattening of + * + * { + * "e": [ + * 42, + * [ + * 23, + * 66, + * { + * "a": 0, + * "b": 1 + * } + * ] + * ], + * "f": { + * "c": 2, + * "d": 3, + * }, + * "g": 4 + * } + * + * to + * + * { + * "e.0": 42, + * "e.1.0": 23, + * "e.1.1": 66, + * "e.1.2.a": 0, + * "e.1.2.b": 1, + * "f.c": 2, + * "f.d": 3, + * "g": 4 + * } + */ + + qdict_put(dict1, "a", qint_from_int(0)); + qdict_put(dict1, "b", qint_from_int(1)); + + qlist_append_obj(list1, QOBJECT(qint_from_int(23))); + qlist_append_obj(list1, QOBJECT(qint_from_int(66))); + qlist_append_obj(list1, QOBJECT(dict1)); + qlist_append_obj(list2, QOBJECT(qint_from_int(42))); + qlist_append_obj(list2, QOBJECT(list1)); + + qdict_put(dict2, "c", qint_from_int(2)); + qdict_put(dict2, "d", qint_from_int(3)); + qdict_put_obj(dict3, "e", QOBJECT(list2)); + qdict_put_obj(dict3, "f", QOBJECT(dict2)); + qdict_put(dict3, "g", qint_from_int(4)); + + qdict_flatten(dict3); + + g_assert(qdict_get_int(dict3, "e.0") == 42); + g_assert(qdict_get_int(dict3, "e.1.0") == 23); + g_assert(qdict_get_int(dict3, "e.1.1") == 66); + g_assert(qdict_get_int(dict3, "e.1.2.a") == 0); + g_assert(qdict_get_int(dict3, "e.1.2.b") == 1); + g_assert(qdict_get_int(dict3, "f.c") == 2); + g_assert(qdict_get_int(dict3, "f.d") == 3); + g_assert(qdict_get_int(dict3, "g") == 4); + + g_assert(qdict_size(dict3) == 8); + + QDECREF(dict3); +} + +static void qdict_array_split_test(void) +{ + QDict *test_dict = qdict_new(); + QDict *dict1, *dict2; + QList *test_list; + + /* + * Test the split of + * + * { + * "1.x": 0, + * "3.y": 1, + * "0.a": 42, + * "o.o": 7, + * "0.b": 23 + * } + * + * to + * + * [ + * { + * "a": 42, + * "b": 23 + * }, + * { + * "x": 0 + * } + * ] + * + * and + * + * { + * "3.y": 1, + * "o.o": 7 + * } + * + * (remaining in the old QDict) + * + * This example is given in the comment of qdict_array_split(). + */ + + qdict_put(test_dict, "1.x", qint_from_int(0)); + qdict_put(test_dict, "3.y", qint_from_int(1)); + qdict_put(test_dict, "0.a", qint_from_int(42)); + qdict_put(test_dict, "o.o", qint_from_int(7)); + qdict_put(test_dict, "0.b", qint_from_int(23)); + + qdict_array_split(test_dict, &test_list); + + dict1 = qobject_to_qdict(qlist_pop(test_list)); + dict2 = qobject_to_qdict(qlist_pop(test_list)); + + g_assert(dict1); + g_assert(dict2); + g_assert(qlist_empty(test_list)); + + QDECREF(test_list); + + g_assert(qdict_get_int(dict1, "a") == 42); + g_assert(qdict_get_int(dict1, "b") == 23); + + g_assert(qdict_size(dict1) == 2); + + QDECREF(dict1); + + g_assert(qdict_get_int(dict2, "x") == 0); + + g_assert(qdict_size(dict2) == 1); + + QDECREF(dict2); + + g_assert(qdict_get_int(test_dict, "3.y") == 1); + g_assert(qdict_get_int(test_dict, "o.o") == 7); + + g_assert(qdict_size(test_dict) == 2); + + QDECREF(test_dict); +} + /* * Errors test-cases */ @@ -365,6 +519,8 @@ int main(int argc, char **argv) g_test_add_func("/public/del", qdict_del_test); g_test_add_func("/public/to_qdict", qobject_to_qdict_test); g_test_add_func("/public/iterapi", qdict_iterapi_test); + g_test_add_func("/public/flatten", qdict_flatten_test); + g_test_add_func("/public/array_split", qdict_array_split_test); g_test_add_func("/errors/put_exists", qdict_put_exists_test); g_test_add_func("/errors/get_not_exists", qdict_get_not_exists_test); diff --git a/tests/fdc-test.c b/tests/fdc-test.c index 38b5b178d0..37096dcc13 100644 --- a/tests/fdc-test.c +++ b/tests/fdc-test.c @@ -518,7 +518,6 @@ static void fuzz_registers(void) int main(int argc, char **argv) { const char *arch = qtest_get_arch(); - char *cmdline; int fd; int ret; @@ -538,9 +537,7 @@ int main(int argc, char **argv) /* Run the tests */ g_test_init(&argc, &argv, NULL); - cmdline = g_strdup_printf("-vnc none "); - - qtest_start(cmdline); + qtest_start(NULL); qtest_irq_intercept_in(global_qtest, "ioapic"); qtest_add_func("/fdc/cmos", test_cmos); qtest_add_func("/fdc/no_media_on_start", test_no_media_on_start); diff --git a/tests/ide-test.c b/tests/ide-test.c index d5cec5a1fc..4a0d97f197 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -380,7 +380,6 @@ static void test_bmdma_no_busmaster(void) static void test_bmdma_setup(void) { ide_test_start( - "-vnc none " "-drive file=%s,if=ide,serial=%s,cache=writeback " "-global ide-hd.ver=%s", tmp_path, "testdisk", "version"); @@ -410,7 +409,6 @@ static void test_identify(void) int ret; ide_test_start( - "-vnc none " "-drive file=%s,if=ide,serial=%s,cache=writeback " "-global ide-hd.ver=%s", tmp_path, "testdisk", "version"); @@ -455,7 +453,6 @@ static void test_flush(void) uint8_t data; ide_test_start( - "-vnc none " "-drive file=blkdebug::%s,if=ide,cache=writeback", tmp_path); diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 index aba3faf712..3af3cdfbc3 100755 --- a/tests/qemu-iotests/017 +++ b/tests/qemu-iotests/017 @@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" TEST_OFFSETS="0 4294967296" diff --git a/tests/qemu-iotests/018 b/tests/qemu-iotests/018 index 15fcfe5670..6f7f0545d0 100755 --- a/tests/qemu-iotests/018 +++ b/tests/qemu-iotests/018 @@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" TEST_OFFSETS="0 4294967296" diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019 index 5bb18d0c0a..b43e70f3cb 100755 --- a/tests/qemu-iotests/019 +++ b/tests/qemu-iotests/019 @@ -47,6 +47,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" TEST_OFFSETS="0 4294967296" CLUSTER_SIZE=65536 diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 index b3c86d844e..73a0429481 100755 --- a/tests/qemu-iotests/020 +++ b/tests/qemu-iotests/020 @@ -45,6 +45,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" TEST_OFFSETS="0 4294967296" diff --git a/tests/qemu-iotests/034 b/tests/qemu-iotests/034 index 67f1959690..7349789583 100755 --- a/tests/qemu-iotests/034 +++ b/tests/qemu-iotests/034 @@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" CLUSTER_SIZE=4k size=128M diff --git a/tests/qemu-iotests/037 b/tests/qemu-iotests/037 index 743bae33d3..e444349e6d 100755 --- a/tests/qemu-iotests/037 +++ b/tests/qemu-iotests/037 @@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" CLUSTER_SIZE=4k size=128M diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index c2cadba2fc..d0c5173626 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -222,7 +222,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Can't use 'qcow2' as a block driver for the protocol level +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename' === Parsing protocol from file name === diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059 index 65bea1d6c6..2d604d3a91 100755 --- a/tests/qemu-iotests/059 +++ b/tests/qemu-iotests/059 @@ -42,6 +42,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt vmdk _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" capacity_offset=16 granularity_offset=20 @@ -95,10 +98,23 @@ EOF _img_info echo +echo "=== Testing truncated sparse ===" +IMGOPTS="subformat=monolithicSparse" _make_test_img 100G +truncate -s 10M $TEST_IMG +_img_info + +echo echo "=== Testing version 3 ===" _use_sample_img iotest-version3.vmdk.bz2 _img_info +echo +echo "=== Testing 4TB monolithicFlat creation and IO ===" +IMGOPTS="subformat=monolithicFlat" _make_test_img 4T +_img_info +$QEMU_IO -c "write -P 0xa 900G 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -v 900G 1024" "$TEST_IMG" | _filter_qemu_io + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out index 16ab7c6c1f..4ffeb54710 100644 --- a/tests/qemu-iotests/059.out +++ b/tests/qemu-iotests/059.out @@ -2043,8 +2043,87 @@ qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Invalid extent lines: RW 12582912 VMFS "dummy.IMGFMT" 1 +=== Testing truncated sparse === +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=107374182400 +qemu-img: File truncated, expecting at least 13172736 bytes +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Wrong medium type + === Testing version 3 === image: TEST_DIR/iotest-version3.IMGFMT file format: IMGFMT virtual size: 1.0G (1073741824 bytes) + +=== Testing 4TB monolithicFlat creation and IO === +Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104 +image: TEST_DIR/iotest-version3.IMGFMT +file format: IMGFMT +virtual size: 4.0T (4398046511104 bytes) +wrote 512/512 bytes at offset 966367641600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +e100000000: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000010: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000020: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000030: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000040: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000050: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000060: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000070: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000080: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000090: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000a0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000b0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000c0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000d0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000e0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000f0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000100: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000110: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000120: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000130: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000140: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000150: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000160: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000170: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000180: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000190: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001a0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001b0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001c0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001d0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001e0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001f0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000220: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000230: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000240: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000250: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000260: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000270: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000290: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000310: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000320: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000330: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000340: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000350: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000360: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000370: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000390: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +read 1024/1024 bytes at offset 966367641600 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063 index 2ab8f20e02..77503a2984 100755 --- a/tests/qemu-iotests/063 +++ b/tests/qemu-iotests/063 @@ -44,6 +44,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed raw _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" _make_test_img 4M diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069 index 3042803a81..50347d91d2 100755 --- a/tests/qemu-iotests/069 +++ b/tests/qemu-iotests/069 @@ -41,6 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt cow qed qcow qcow2 vmdk _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" IMG_SIZE=128K diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071 new file mode 100755 index 0000000000..2a22546e1a --- /dev/null +++ b/tests/qemu-iotests/071 @@ -0,0 +1,239 @@ +#!/bin/bash +# +# Test case for the QMP blkdebug and blkverify interfaces +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto generic +_supported_os Linux + +function do_run_qemu() +{ + echo Testing: "$@" | _filter_imgfmt + $QEMU -nographic -qmp stdio -serial none "$@" + echo +} + +function run_qemu() +{ + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io +} + +IMG_SIZE=64M + +echo +echo "=== Testing blkverify through filename ===" +echo + +TEST_IMG="$TEST_IMG.base" IMGOPTS="" IMGFMT="raw" _make_test_img $IMG_SIZE |\ + _filter_imgfmt +_make_test_img $IMG_SIZE +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \ + -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 512' | _filter_qemu_io + +$QEMU_IO -c 'write -P 42 0 512' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \ + -c 'read -P 42 0 512' | _filter_qemu_io + +echo +echo "=== Testing blkverify through file blockref ===" +echo + +TEST_IMG="$TEST_IMG.base" IMGOPTS="" IMGFMT="raw" _make_test_img $IMG_SIZE |\ + _filter_imgfmt +_make_test_img $IMG_SIZE +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base,file.test.driver=$IMGFMT,file.test.file.filename=$TEST_IMG" \ + -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 512' | _filter_qemu_io + +$QEMU_IO -c 'write -P 42 0 512' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \ + -c 'read -P 42 0 512' | _filter_qemu_io + +echo +echo "=== Testing blkdebug through filename ===" +echo + +$QEMU_IO -c "open -o file.driver=blkdebug,file.inject-error.event=l2_load $TEST_IMG" \ + -c 'read -P 42 0x38000 512' + +echo +echo "=== Testing blkdebug through file blockref ===" +echo + +$QEMU_IO -c "open -o driver=$IMGFMT,file.driver=blkdebug,file.inject-error.event=l2_load,file.image.filename=$TEST_IMG" \ + -c 'read -P 42 0x38000 512' + +echo +echo "=== Testing blkdebug on existing block device ===" +echo + +run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "$IMGFMT", + "id": "drive0-debug", + "file": { + "driver": "blkdebug", + "image": "drive0", + "inject-error": [{ + "event": "l2_load" + }] + } + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +echo +echo "=== Testing blkverify on existing block device ===" +echo + +run_qemu -drive "file=$TEST_IMG,format=$IMGFMT,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "blkverify", + "id": "drive0-verify", + "test": "drive0", + "raw": { + "driver": "raw", + "file": { + "driver": "file", + "filename": "$TEST_IMG.base" + } + } + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-verify "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +echo +echo "=== Testing blkverify on existing raw block device ===" +echo + +run_qemu -drive "file=$TEST_IMG.base,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "blkverify", + "id": "drive0-verify", + "test": { + "driver": "$IMGFMT", + "file": { + "driver": "file", + "filename": "$TEST_IMG" + } + }, + "raw": "drive0" + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-verify "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +echo +echo "=== Testing blkdebug's set-state through QMP ===" +echo + +run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "$IMGFMT", + "id": "drive0-debug", + "file": { + "driver": "blkdebug", + "image": "drive0", + "inject-error": [{ + "event": "read_aio", + "state": 42 + }], + "set-state": [{ + "event": "write_aio", + "new_state": 42 + }] + } + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "read 0 512"' + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "write 0 512"' + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out new file mode 100644 index 0000000000..5f840a9980 --- /dev/null +++ b/tests/qemu-iotests/071.out @@ -0,0 +1,90 @@ +QA output created by 071 + +=== Testing blkverify through filename === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0 + +=== Testing blkverify through file blockref === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0 + +=== Testing blkdebug through filename === + +read failed: Input/output error + +=== Testing blkdebug through file blockref === + +read failed: Input/output error + +=== Testing blkdebug on existing block device === + +Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +read failed: Input/output error +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} + + +=== Testing blkverify on existing block device === + +Testing: -drive file=TEST_DIR/t.IMGFMT,format=IMGFMT,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 + + +=== Testing blkverify on existing raw block device === + +Testing: -drive file=TEST_DIR/t.IMGFMT.base,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 + + +=== Testing blkdebug's set-state through QMP === + +Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +read failed: Input/output error +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} + +*** done diff --git a/tests/qemu-iotests/072 b/tests/qemu-iotests/072 new file mode 100755 index 0000000000..a3876c2161 --- /dev/null +++ b/tests/qemu-iotests/072 @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Test case for nested image formats +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow cow +_supported_proto generic +_supported_os Linux + +IMG_SIZE=64M + +echo +echo "=== Testing nested image formats ===" +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE + +$QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \ + -c 'write -P 66 1024 512' "$TEST_IMG.base" | _filter_qemu_io + +$QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG" + +$QEMU_IO -c "open -o driver=$IMGFMT,file.driver=$IMGFMT,file.file.filename=$TEST_IMG" \ + -c 'read -P 42 0 512' -c 'read -P 23 512 512' \ + -c 'read -P 66 1024 512' | _filter_qemu_io + +# When not giving any format, qemu should open only one "layer". Therefore, this +# should not work for any image formats with a header. +$QEMU_IO -c 'read -P 42 0 512' "$TEST_IMG" | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/072.out b/tests/qemu-iotests/072.out new file mode 100644 index 0000000000..efe577c1c0 --- /dev/null +++ b/tests/qemu-iotests/072.out @@ -0,0 +1,21 @@ +QA output created by 072 + +=== Testing nested image formats === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 1024 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 1024 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Pattern verification failed at offset 0, 512 bytes +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/077 b/tests/qemu-iotests/077 new file mode 100755 index 0000000000..bbf7b5145a --- /dev/null +++ b/tests/qemu-iotests/077 @@ -0,0 +1,278 @@ +#!/bin/bash +# +# Test concurrent pread/pwrite +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto generic +_supported_os Linux + +CLUSTER_SIZE=4k +size=128M + +_make_test_img $size + +echo +echo "== Some concurrent requests involving RMW ==" + +function test_io() +{ +echo "open -o file.align=4k blkdebug::$TEST_IMG" +# A simple RMW request +cat <<EOF +aio_write -P 10 0x200 0x200 +aio_flush +EOF + +# Sequential RMW requests on the same physical sector +off=0x1000 +for ev in "head" "after_head" "tail" "after_tail"; do +cat <<EOF +break pwritev_rmw.$ev A +aio_write -P 10 $((off + 0x200)) 0x200 +wait_break A +aio_write -P 11 $((off + 0x400)) 0x200 +sleep 100 +resume A +aio_flush +EOF +off=$((off + 0x1000)) +done + +# Chained dependencies +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0x5000 0x200 +wait_break A +aio_write -P 11 0x5200 0x200 +aio_write -P 12 0x5400 0x200 +aio_write -P 13 0x5600 0x200 +aio_write -P 14 0x5800 0x200 +aio_write -P 15 0x5a00 0x200 +aio_write -P 16 0x5c00 0x200 +aio_write -P 17 0x5e00 0x200 +sleep 100 +resume A +aio_flush +EOF + +# Overlapping multiple requests +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0x6000 0x200 +wait_break A +break pwritev_rmw.after_head B +aio_write -P 10 0x7e00 0x200 +wait_break B +aio_write -P 11 0x6800 0x1000 +resume A +sleep 100 +resume B +aio_flush +EOF + +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0x8000 0x200 +wait_break A +break pwritev_rmw.after_head B +aio_write -P 10 0x9e00 0x200 +wait_break B +aio_write -P 11 0x8800 0x1000 +resume B +sleep 100 +resume A +aio_flush +EOF + +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0xa000 0x200 +wait_break A +aio_write -P 11 0xa800 0x1000 +break pwritev_rmw.after_head B +aio_write -P 10 0xbe00 0x200 +wait_break B +resume A +sleep 100 +resume B +aio_flush +EOF + +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0xc000 0x200 +wait_break A +aio_write -P 11 0xc800 0x1000 +break pwritev_rmw.after_head B +aio_write -P 10 0xde00 0x200 +wait_break B +resume B +sleep 100 +resume A +aio_flush +EOF + +# Only RMW for the tail part +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0xe000 0x1800 +wait_break A +aio_write -P 11 0xf000 0xc00 +sleep 100 +resume A +aio_flush +EOF + +cat <<EOF +break pwritev A +aio_write -P 10 0x10000 0x800 +wait_break A +break pwritev_rmw.after_tail B +aio_write -P 11 0x10000 0x400 +break pwritev_done C +resume A +wait_break C +resume C +sleep 100 +wait_break B +resume B +aio_flush +EOF + +cat <<EOF +break pwritev A +aio_write -P 10 0x11000 0x800 +wait_break A +aio_write -P 11 0x11000 0x1000 +sleep 100 +resume A +aio_flush +EOF +} + +test_io | $QEMU_IO | _filter_qemu_io | \ + sed -e 's,[0-9/]* bytes at offset [0-9]*,XXX/XXX bytes at offset XXX,g' \ + -e 's/^[0-9]* \(bytes\|KiB\)/XXX bytes/' \ + -e '/Suspended/d' + +echo +echo "== Verify image content ==" + +function verify_io() +{ + # A simple RMW request + echo read -P 0 0 0x200 + echo read -P 10 0x200 0x200 + echo read -P 0 0x400 0xc00 + + # Sequential RMW requests on the same physical sector + echo read -P 0 0x1000 0x200 + echo read -P 10 0x1200 0x200 + echo read -P 11 0x1400 0x200 + echo read -P 0 0x1600 0xa00 + + echo read -P 0 0x2000 0x200 + echo read -P 10 0x2200 0x200 + echo read -P 11 0x2400 0x200 + echo read -P 0 0x2600 0xa00 + + echo read -P 0 0x3000 0x200 + echo read -P 10 0x3200 0x200 + echo read -P 11 0x3400 0x200 + echo read -P 0 0x3600 0xa00 + + echo read -P 0 0x4000 0x200 + echo read -P 10 0x4200 0x200 + echo read -P 11 0x4400 0x200 + echo read -P 0 0x4600 0xa00 + + # Chained dependencies + echo read -P 10 0x5000 0x200 + echo read -P 11 0x5200 0x200 + echo read -P 12 0x5400 0x200 + echo read -P 13 0x5600 0x200 + echo read -P 14 0x5800 0x200 + echo read -P 15 0x5a00 0x200 + echo read -P 16 0x5c00 0x200 + echo read -P 17 0x5e00 0x200 + + # Overlapping multiple requests + echo read -P 10 0x6000 0x200 + echo read -P 0 0x6200 0x600 + echo read -P 11 0x6800 0x1000 + echo read -P 0 0x7800 0x600 + echo read -P 10 0x7e00 0x200 + + echo read -P 10 0x8000 0x200 + echo read -P 0 0x8200 0x600 + echo read -P 11 0x8800 0x1000 + echo read -P 0 0x9800 0x600 + echo read -P 10 0x9e00 0x200 + + echo read -P 10 0xa000 0x200 + echo read -P 0 0xa200 0x600 + echo read -P 11 0xa800 0x1000 + echo read -P 0 0xb800 0x600 + echo read -P 10 0xbe00 0x200 + + echo read -P 10 0xc000 0x200 + echo read -P 0 0xc200 0x600 + echo read -P 11 0xc800 0x1000 + echo read -P 0 0xd800 0x600 + echo read -P 10 0xde00 0x200 + + # Only RMW for the tail part + echo read -P 10 0xe000 0x1000 + echo read -P 11 0xf800 0x400 + echo read -P 0 0xfc00 0x400 + + echo read -P 11 0x10000 0x400 + echo read -P 10 0x10400 0x400 + + echo read -P 11 0x11800 0x800 +} + +verify_io | $QEMU_IO "$TEST_IMG" | _filter_qemu_io + +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/077.out b/tests/qemu-iotests/077.out new file mode 100644 index 0000000000..ab612344d6 --- /dev/null +++ b/tests/qemu-iotests/077.out @@ -0,0 +1,202 @@ +QA output created by 077 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 + +== Some concurrent requests involving RMW == +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +blkdebug: Resuming request 'C' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Verify image content == +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 3072/3072 bytes at offset 1024 +3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 4096 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 4608 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 5120 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 5632 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 8192 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 8704 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 9216 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 9728 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 12288 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 12800 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 13312 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 13824 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 16384 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 16896 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 17408 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 17920 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 20480 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 20992 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 21504 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 22016 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 22528 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 23040 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 23552 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 24064 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 24576 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 25088 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 26624 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 30720 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 32256 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 32768 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 33280 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 34816 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 38912 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 40448 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 40960 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 41472 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 43008 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 47104 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 48640 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 49152 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 49664 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 51200 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 55296 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 56832 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 57344 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 63488 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 64512 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 65536 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 66560 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2048/2048 bytes at offset 71680 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 28ba0d9ad5..0f68156400 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -170,6 +170,17 @@ _make_test_img() fi } +_rm_test_img() +{ + local img=$1 + if [ "$IMGFMT" = "vmdk" ]; then + # Remove all the extents for vmdk + $QEMU_IMG info $img 2>/dev/null | grep 'filename:' | cut -f 2 -d: \ + | xargs -I {} rm -f "{}" + fi + rm -f $img +} + _cleanup_test_img() { case "$IMGPROTO" in @@ -179,9 +190,9 @@ _cleanup_test_img() rm -f "$TEST_IMG_FILE" ;; file) - rm -f "$TEST_DIR/t.$IMGFMT" - rm -f "$TEST_DIR/t.$IMGFMT.orig" - rm -f "$TEST_DIR/t.$IMGFMT.base" + _rm_test_img "$TEST_DIR/t.$IMGFMT" + _rm_test_img "$TEST_DIR/t.$IMGFMT.orig" + _rm_test_img "$TEST_DIR/t.$IMGFMT.base" if [ -n "$SAMPLE_IMG_FILE" ] then rm -f "$TEST_DIR/$SAMPLE_IMG_FILE" @@ -406,6 +417,17 @@ _default_cache_mode() fi } +_unsupported_imgopts() +{ + for bad_opt + do + if echo "$IMGOPTS" | grep -q 2>/dev/null "$bad_opt" + then + _notrun "not suitable for image option: $bad_opt" + fi + done +} + # this test requires that a specified command (executable) exists # _require_command() diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index cc750c986e..03c762fb4f 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -77,5 +77,8 @@ 068 rw auto 069 rw auto 070 rw auto +071 rw auto +072 rw auto 073 rw auto 074 rw auto +077 rw auto diff --git a/trace-events b/trace-events index 9f4456a82e..1b668d1ac2 100644 --- a/trace-events +++ b/trace-events @@ -402,6 +402,7 @@ usb_desc_config(int addr, int index, int len, int ret) "dev %d query config %d, usb_desc_other_speed_config(int addr, int index, int len, int ret) "dev %d query config %d, len %d, ret %d" usb_desc_string(int addr, int index, int len, int ret) "dev %d query string %d, len %d, ret %d" usb_desc_bos(int addr, int len, int ret) "dev %d bos, len %d, ret %d" +usb_desc_msos(int addr, int index, int len, int ret) "dev %d msos, index 0x%x, len %d, ret %d" usb_set_addr(int addr) "dev %d" usb_set_config(int addr, int config, int ret) "dev %d, config %d, ret %d" usb_set_interface(int addr, int iface, int alt, int ret) "dev %d, interface %d, altsetting %d, ret %d" diff --git a/trace/simple.c b/trace/simple.c index 1e3f6914c5..57572c4905 100644 --- a/trace/simple.c +++ b/trace/simple.c @@ -19,6 +19,7 @@ #include "qemu/timer.h" #include "trace.h" #include "trace/control.h" +#include "trace/simple.h" /** Trace file header event ID */ #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ @@ -39,7 +40,17 @@ * Trace records are written out by a dedicated thread. The thread waits for * records to become available, writes them out, and then waits again. */ +#if GLIB_CHECK_VERSION(2, 32, 0) +static GMutex trace_lock; +#define lock_trace_lock() g_mutex_lock(&trace_lock) +#define unlock_trace_lock() g_mutex_unlock(&trace_lock) +#define get_trace_lock_mutex() (&trace_lock) +#else static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; +#define lock_trace_lock() g_static_mutex_lock(&trace_lock) +#define unlock_trace_lock() g_static_mutex_unlock(&trace_lock) +#define get_trace_lock_mutex() g_static_mutex_get_mutex(&trace_lock) +#endif /* g_cond_new() was deprecated in glib 2.31 but we still need to support it */ #if GLIB_CHECK_VERSION(2, 31, 0) @@ -139,27 +150,26 @@ static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) */ static void flush_trace_file(bool wait) { - g_static_mutex_lock(&trace_lock); + lock_trace_lock(); trace_available = true; g_cond_signal(trace_available_cond); if (wait) { - g_cond_wait(trace_empty_cond, g_static_mutex_get_mutex(&trace_lock)); + g_cond_wait(trace_empty_cond, get_trace_lock_mutex()); } - g_static_mutex_unlock(&trace_lock); + unlock_trace_lock(); } static void wait_for_trace_records_available(void) { - g_static_mutex_lock(&trace_lock); + lock_trace_lock(); while (!(trace_available && trace_writeout_enabled)) { g_cond_signal(trace_empty_cond); - g_cond_wait(trace_available_cond, - g_static_mutex_get_mutex(&trace_lock)); + g_cond_wait(trace_available_cond, get_trace_lock_mutex()); } trace_available = false; - g_static_mutex_unlock(&trace_lock); + unlock_trace_lock(); } static gpointer writeout_thread(gpointer opaque) @@ -34,6 +34,10 @@ #define GETTEXT_PACKAGE "qemu" #define LOCALEDIR "po" +#ifdef _WIN32 +# define _WIN32_WINNT 0x0601 /* needed to get definition of MAPVK_VK_TO_VSC */ +#endif + #include "qemu-common.h" #ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE @@ -704,11 +708,18 @@ static gboolean gd_button_event(GtkWidget *widget, GdkEventButton *button, static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque) { GtkDisplayState *s = opaque; - int gdk_keycode; - int qemu_keycode; + int gdk_keycode = key->hardware_keycode; int i; - gdk_keycode = key->hardware_keycode; +#ifdef _WIN32 + UINT qemu_keycode = MapVirtualKey(gdk_keycode, MAPVK_VK_TO_VSC); + switch (qemu_keycode) { + case 103: /* alt gr */ + qemu_keycode = 56 | SCANCODE_GREY; + break; + } +#else + int qemu_keycode; if (gdk_keycode < 9) { qemu_keycode = 0; @@ -723,6 +734,7 @@ static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque) } else { qemu_keycode = 0; } +#endif trace_gd_key_event(gdk_keycode, qemu_keycode, (key->type == GDK_KEY_PRESS) ? "down" : "up"); diff --git a/util/Makefile.objs b/util/Makefile.objs index af3e5cb157..937376b082 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -13,3 +13,4 @@ util-obj-y += hexdump.o util-obj-y += crc32c.o util-obj-y += throttle.o util-obj-y += getauxval.o +util-obj-y += readline.o diff --git a/util/error.c b/util/error.c index e5de34f3bb..f11f1d57a0 100644 --- a/util/error.c +++ b/util/error.c @@ -44,7 +44,7 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...) err->err_class = err_class; if (errp == &error_abort) { - fprintf(stderr, "%s\n", error_get_pretty(err)); + error_report("%s", error_get_pretty(err)); abort(); } @@ -80,7 +80,7 @@ void error_set_errno(Error **errp, int os_errno, ErrorClass err_class, err->err_class = err_class; if (errp == &error_abort) { - fprintf(stderr, "%s\n", error_get_pretty(err)); + error_report("%s", error_get_pretty(err)); abort(); } @@ -125,7 +125,7 @@ void error_set_win32(Error **errp, int win32_err, ErrorClass err_class, err->err_class = err_class; if (errp == &error_abort) { - fprintf(stderr, "%s\n", error_get_pretty(err)); + error_report("%s", error_get_pretty(err)); abort(); } @@ -171,7 +171,7 @@ void error_free(Error *err) void error_propagate(Error **dst_err, Error *local_err) { if (local_err && dst_err == &error_abort) { - fprintf(stderr, "%s\n", error_get_pretty(local_err)); + error_report("%s", error_get_pretty(local_err)); abort(); } else if (dst_err && !*dst_err) { *dst_err = local_err; diff --git a/util/oslib-posix.c b/util/oslib-posix.c index e00a44c86f..d5dca4729a 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -47,6 +47,9 @@ extern int daemon(int, int); # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#include <termios.h> +#include <unistd.h> + #include <glib/gprintf.h> #include "config-host.h" @@ -85,6 +88,11 @@ void *qemu_oom_check(void *ptr) void *qemu_memalign(size_t alignment, size_t size) { void *ptr; + + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } + #if defined(_POSIX_C_SOURCE) && !defined(__sun__) int ret; ret = posix_memalign(&ptr, alignment, size); @@ -251,3 +259,18 @@ qemu_get_local_state_pathname(const char *relative_pathname) return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, relative_pathname); } + +void qemu_set_tty_echo(int fd, bool echo) +{ + struct termios tty; + + tcgetattr(fd, &tty); + + if (echo) { + tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; + } else { + tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); + } + + tcsetattr(fd, TCSANOW, &tty); +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 776ccfaaf0..50be0440f2 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -189,3 +189,22 @@ qemu_get_local_state_pathname(const char *relative_pathname) return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path, relative_pathname); } + +void qemu_set_tty_echo(int fd, bool echo) +{ + HANDLE handle = (HANDLE)_get_osfhandle(fd); + DWORD dwMode = 0; + + if (handle == INVALID_HANDLE_VALUE) { + return; + } + + GetConsoleMode(handle, &dwMode); + + if (echo) { + SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT); + } else { + SetConsoleMode(handle, + dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT)); + } +} diff --git a/util/qemu-config.c b/util/qemu-config.c index 7973659518..9298f55ecf 100644 --- a/util/qemu-config.c +++ b/util/qemu-config.c @@ -356,3 +356,103 @@ int qemu_read_config_file(const char *filename) return -EINVAL; } } + +static void config_parse_qdict_section(QDict *options, QemuOptsList *opts, + Error **errp) +{ + QemuOpts *subopts; + QDict *subqdict; + QList *list = NULL; + Error *local_err = NULL; + size_t orig_size, enum_size; + char *prefix; + + prefix = g_strdup_printf("%s.", opts->name); + qdict_extract_subqdict(options, &subqdict, prefix); + g_free(prefix); + orig_size = qdict_size(subqdict); + if (!orig_size) { + goto out; + } + + subopts = qemu_opts_create(opts, NULL, 0, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + + qemu_opts_absorb_qdict(subopts, subqdict, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + + enum_size = qdict_size(subqdict); + if (enum_size < orig_size && enum_size) { + error_setg(errp, "Unknown option '%s' for [%s]", + qdict_first(subqdict)->key, opts->name); + goto out; + } + + if (enum_size) { + /* Multiple, enumerated sections */ + QListEntry *list_entry; + unsigned i = 0; + + /* Not required anymore */ + qemu_opts_del(subopts); + + qdict_array_split(subqdict, &list); + if (qdict_size(subqdict)) { + error_setg(errp, "Unused option '%s' for [%s]", + qdict_first(subqdict)->key, opts->name); + goto out; + } + + QLIST_FOREACH_ENTRY(list, list_entry) { + QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry)); + char *opt_name; + + opt_name = g_strdup_printf("%s.%u", opts->name, i++); + subopts = qemu_opts_create(opts, opt_name, 1, &local_err); + g_free(opt_name); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + + qemu_opts_absorb_qdict(subopts, section, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + qemu_opts_del(subopts); + goto out; + } + + if (qdict_size(section)) { + error_setg(errp, "[%s] section doesn't support the option '%s'", + opts->name, qdict_first(section)->key); + qemu_opts_del(subopts); + goto out; + } + } + } + +out: + QDECREF(subqdict); + QDECREF(list); +} + +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, + Error **errp) +{ + int i; + Error *local_err = NULL; + + for (i = 0; lists[i]; i++) { + config_parse_qdict_section(options, lists[i], &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + } +} diff --git a/util/qemu-progress.c b/util/qemu-progress.c index 9a3f96cd47..4ee5cd07f2 100644 --- a/util/qemu-progress.c +++ b/util/qemu-progress.c @@ -24,7 +24,6 @@ #include "qemu-common.h" #include "qemu/osdep.h" -#include "sysemu/sysemu.h" #include <stdio.h> struct progress_state { @@ -83,12 +82,22 @@ static void progress_dummy_init(void) { #ifdef CONFIG_POSIX struct sigaction action; + sigset_t set; memset(&action, 0, sizeof(action)); sigfillset(&action.sa_mask); action.sa_handler = sigusr_print; action.sa_flags = 0; sigaction(SIGUSR1, &action, NULL); + + /* + * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the + * tools that use the progress report SIGUSR1 isn't used in this meaning + * and instead should print the progress, so reenable it. + */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); #endif state.print = progress_dummy_print; diff --git a/readline.c b/util/readline.c index abf27ddec3..8441be484c 100644 --- a/readline.c +++ b/util/readline.c @@ -21,21 +21,19 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "monitor/readline.h" -#include "monitor/monitor.h" + +#include "qemu-common.h" +#include "qemu/readline.h" #define IS_NORM 0 #define IS_ESC 1 #define IS_CSI 2 #define IS_SS3 3 -#undef printf -#define printf do_not_use_printf - void readline_show_prompt(ReadLineState *rs) { - monitor_printf(rs->mon, "%s", rs->prompt); - monitor_flush(rs->mon); + rs->printf_func(rs->opaque, "%s", rs->prompt); + rs->flush_func(rs->opaque); rs->last_cmd_buf_index = 0; rs->last_cmd_buf_size = 0; rs->esc_state = IS_NORM; @@ -49,17 +47,17 @@ static void readline_update(ReadLineState *rs) if (rs->cmd_buf_size != rs->last_cmd_buf_size || memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) { for(i = 0; i < rs->last_cmd_buf_index; i++) { - monitor_printf(rs->mon, "\033[D"); + rs->printf_func(rs->opaque, "\033[D"); } rs->cmd_buf[rs->cmd_buf_size] = '\0'; if (rs->read_password) { len = strlen(rs->cmd_buf); for(i = 0; i < len; i++) - monitor_printf(rs->mon, "*"); + rs->printf_func(rs->opaque, "*"); } else { - monitor_printf(rs->mon, "%s", rs->cmd_buf); + rs->printf_func(rs->opaque, "%s", rs->cmd_buf); } - monitor_printf(rs->mon, "\033[K"); + rs->printf_func(rs->opaque, "\033[K"); memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size); rs->last_cmd_buf_size = rs->cmd_buf_size; rs->last_cmd_buf_index = rs->cmd_buf_size; @@ -68,17 +66,17 @@ static void readline_update(ReadLineState *rs) delta = rs->cmd_buf_index - rs->last_cmd_buf_index; if (delta > 0) { for(i = 0;i < delta; i++) { - monitor_printf(rs->mon, "\033[C"); + rs->printf_func(rs->opaque, "\033[C"); } } else { delta = -delta; for(i = 0;i < delta; i++) { - monitor_printf(rs->mon, "\033[D"); + rs->printf_func(rs->opaque, "\033[D"); } } rs->last_cmd_buf_index = rs->cmd_buf_index; } - monitor_flush(rs->mon); + rs->flush_func(rs->opaque); } static void readline_insert_char(ReadLineState *rs, int ch) @@ -284,7 +282,7 @@ static void readline_completion(ReadLineState *rs) cmdline = g_malloc(rs->cmd_buf_index + 1); memcpy(cmdline, rs->cmd_buf, rs->cmd_buf_index); cmdline[rs->cmd_buf_index] = '\0'; - rs->completion_finder(rs->mon, cmdline); + rs->completion_finder(rs->opaque, cmdline); g_free(cmdline); /* no completion found */ @@ -299,7 +297,7 @@ static void readline_completion(ReadLineState *rs) if (len > 0 && rs->completions[0][len - 1] != '/') readline_insert_char(rs, ' '); } else { - monitor_printf(rs->mon, "\n"); + rs->printf_func(rs->opaque, "\n"); max_width = 0; max_prefix = 0; for(i = 0; i < rs->nb_completions; i++) { @@ -329,9 +327,9 @@ static void readline_completion(ReadLineState *rs) nb_cols = 80 / max_width; j = 0; for(i = 0; i < rs->nb_completions; i++) { - monitor_printf(rs->mon, "%-*s", max_width, rs->completions[i]); + rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]); if (++j == nb_cols || i == (rs->nb_completions - 1)) { - monitor_printf(rs->mon, "\n"); + rs->printf_func(rs->opaque, "\n"); j = 0; } } @@ -365,12 +363,12 @@ void readline_handle_byte(ReadLineState *rs, int ch) rs->cmd_buf[rs->cmd_buf_size] = '\0'; if (!rs->read_password) readline_hist_add(rs, rs->cmd_buf); - monitor_printf(rs->mon, "\n"); + rs->printf_func(rs->opaque, "\n"); rs->cmd_buf_index = 0; rs->cmd_buf_size = 0; rs->last_cmd_buf_index = 0; rs->last_cmd_buf_size = 0; - rs->readline_func(rs->mon, rs->cmd_buf, rs->readline_opaque); + rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque); break; case 23: /* ^W */ @@ -480,13 +478,17 @@ const char *readline_get_history(ReadLineState *rs, unsigned int index) return rs->history[index]; } -ReadLineState *readline_init(Monitor *mon, +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, + ReadLineFlushFunc *flush_func, + void *opaque, ReadLineCompletionFunc *completion_finder) { ReadLineState *rs = g_malloc0(sizeof(*rs)); rs->hist_entry = -1; - rs->mon = mon; + rs->opaque = opaque; + rs->printf_func = printf_func; + rs->flush_func = flush_func; rs->completion_finder = completion_finder; return rs; @@ -2925,7 +2925,7 @@ int main(int argc, char **argv, char **envp) bdrv_init_with_whitelist(); - autostart= 1; + autostart = 1; /* first pass of option parsing */ optind = 1; @@ -3879,8 +3879,10 @@ int main(int argc, char **argv, char **envp) qemu_set_log(mask); } - if (!trace_backend_init(trace_events, trace_file)) { - exit(1); + if (!is_daemonized()) { + if (!trace_backend_init(trace_events, trace_file)) { + exit(1); + } } /* If no data_dir is specified then try to find it relative to the @@ -4379,6 +4381,12 @@ int main(int argc, char **argv, char **envp) os_setup_post(); + if (is_daemonized()) { + if (!trace_backend_init(trace_events, trace_file)) { + exit(1); + } + } + main_loop(); bdrv_close_all(); pause_all_vcpus(); |