diff options
158 files changed, 6116 insertions, 1855 deletions
diff --git a/.shippable.yml b/.shippable.yml index 1a1fd7a91d..653bd750fe 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -5,6 +5,8 @@ env: TARGET_LIST=arm-softmmu,arm-linux-user - IMAGE=debian-arm64-cross TARGET_LIST=aarch64-softmmu,aarch64-linux-user + - IMAGE=debian-s390x-cross + TARGET_LIST=s390x-softmmu,s390x-linux-user build: pre_ci: - make docker-image-${IMAGE} @@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) return 0; } +static char *bdrv_child_get_parent_desc(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + return g_strdup(bdrv_get_device_or_node_name(parent)); +} + static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_file = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_format = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, }; +static void bdrv_backing_attach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + BlockDriverState *backing_hd = c->bs; + + assert(!parent->backing_blocker); + error_setg(&parent->backing_blocker, + "node is used as backing hd of '%s'", + bdrv_get_device_or_node_name(parent)); + + parent->open_flags &= ~BDRV_O_NO_BACKING; + pstrcpy(parent->backing_file, sizeof(parent->backing_file), + backing_hd->filename); + pstrcpy(parent->backing_format, sizeof(parent->backing_format), + backing_hd->drv ? backing_hd->drv->format_name : ""); + + bdrv_op_block_all(backing_hd, parent->backing_blocker); + /* Otherwise we won't be able to commit or stream */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, + parent->backing_blocker); + /* + * We do backup in 3 ways: + * 1. drive backup + * The target bs is new opened, and the source is top BDS + * 2. blockdev backup + * Both the source and the target are top BDSes. + * 3. internal backup(used for block replication) + * Both the source and the target are backing file + * + * In case 1 and 2, neither the source nor the target is the backing file. + * In case 3, we will block the top BDS, so there is only one block job + * for the top BDS and its backing chain. + */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, + parent->backing_blocker); +} + +static void bdrv_backing_detach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + + assert(parent->backing_blocker); + bdrv_op_unblock_all(c->bs, parent->backing_blocker); + error_free(parent->backing_blocker); + parent->backing_blocker = NULL; +} + /* * Returns the options and flags that bs->backing should get, based on the * given options and flags for the parent BDS @@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, *child_flags = flags; } -static const BdrvChildRole child_backing = { +const BdrvChildRole child_backing = { + .get_parent_desc = bdrv_child_get_parent_desc, + .attach = bdrv_backing_attach, + .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +/* + * Check whether permissions on this node can be changed in a way that + * @cumulative_perms and @cumulative_shared_perms are the new cumulative + * permissions of all its parents. This involves checking whether all necessary + * permission changes to child nodes can be performed. + * + * A call to this function must always be followed by a call to bdrv_set_perm() + * or bdrv_abort_perm_update(). + */ +static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms, Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + int ret; + + /* Write permissions never work with read-only images */ + if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && + bdrv_is_read_only(bs)) + { + error_setg(errp, "Block node is read-only"); + return -EPERM; + } + + /* Check this node */ + if (!drv) { + return 0; + } + + if (drv->bdrv_check_perm) { + return drv->bdrv_check_perm(bs, cumulative_perms, + cumulative_shared_perms, errp); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return 0; + } + + /* Check all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +/* + * Notifies drivers that after a previous bdrv_check_perm() call, the + * permission update is not performed and any preparations made for it (e.g. + * taken file locks) need to be undone. + * + * This function recursively notifies all child nodes. + */ +static void bdrv_abort_perm_update(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + if (drv->bdrv_abort_perm_update) { + drv->bdrv_abort_perm_update(bs); + } + + QLIST_FOREACH(c, &bs->children, next) { + bdrv_child_abort_perm_update(c); + } +} + +static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + /* Update this node */ + if (drv->bdrv_set_perm) { + drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return; + } + + /* Update all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + bdrv_child_set_perm(c, cur_perm, cur_shared); + } +} + +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm) +{ + BdrvChild *c; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + *perm = cumulative_perms; + *shared_perm = cumulative_shared_perms; +} + +static char *bdrv_child_user_desc(BdrvChild *c) +{ + if (c->role->get_parent_desc) { + return c->role->get_parent_desc(c); + } + + return g_strdup("another user"); +} + +static char *bdrv_perm_names(uint64_t perm) +{ + struct perm_name { + uint64_t perm; + const char *name; + } permissions[] = { + { BLK_PERM_CONSISTENT_READ, "consistent read" }, + { BLK_PERM_WRITE, "write" }, + { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, + { BLK_PERM_RESIZE, "resize" }, + { BLK_PERM_GRAPH_MOD, "change children" }, + { 0, NULL } + }; + + char *result = g_strdup(""); + struct perm_name *p; + + for (p = permissions; p->name; p++) { + if (perm & p->perm) { + char *old = result; + result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name); + g_free(old); + } + } + + return result; +} + +/* + * Checks whether a new reference to @bs can be added if the new user requires + * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set, + * this old reference is ignored in the calculations; this allows checking + * permission updates for an existing reference. + * + * Needs to be followed by a call to either bdrv_set_perm() or + * bdrv_abort_perm_update(). */ +static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, + uint64_t new_shared_perm, + BdrvChild *ignore_child, Error **errp) +{ + BdrvChild *c; + uint64_t cumulative_perms = new_used_perm; + uint64_t cumulative_shared_perms = new_shared_perm; + + /* There is no reason why anyone couldn't tolerate write_unchanged */ + assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c == ignore_child) { + continue; + } + + if ((new_used_perm & c->shared_perm) != new_used_perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which does not " + "allow '%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + if ((c->perm & new_shared_perm) != c->perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which uses " + "'%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp); +} + +/* Needs to be followed by a call to either bdrv_child_set_perm() or + * bdrv_child_abort_perm_update(). */ +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + return bdrv_check_update_perm(c->bs, perm, shared, c, errp); +} + +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared) +{ + uint64_t cumulative_perms, cumulative_shared_perms; + + c->perm = perm; + c->shared_perm = shared; + + bdrv_get_cumulative_perm(c->bs, &cumulative_perms, + &cumulative_shared_perms); + bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms); +} + +void bdrv_child_abort_perm_update(BdrvChild *c) +{ + bdrv_abort_perm_update(c->bs); +} + +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + int ret; + + ret = bdrv_child_check_perm(c, perm, shared, errp); + if (ret < 0) { + bdrv_child_abort_perm_update(c); + return ret; + } + + bdrv_child_set_perm(c, perm, shared); + + return 0; +} + +#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_WRITE_UNCHANGED \ + | BLK_PERM_RESIZE) +#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) + +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (c == NULL) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | + (c->perm & DEFAULT_PERM_UNCHANGED); + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | + (c->shared_perm & DEFAULT_PERM_UNCHANGED); +} + +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + bool backing = (role == &child_backing); + assert(role == &child_backing || role == &child_file); + + if (!backing) { + /* Apart from the modifications below, the same permissions are + * forwarded and left alone as for filters */ + bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); + + /* Format drivers may touch metadata even if the guest doesn't write */ + if (!bdrv_is_read_only(bs)) { + perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + /* bs->file always needs to be consistent because of the metadata. We + * can never allow other users to resize or write to it. */ + perm |= BLK_PERM_CONSISTENT_READ; + shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } else { + /* We want consistent read from backing files if the parent needs it. + * No other operations are performed on backing files. */ + perm &= BLK_PERM_CONSISTENT_READ; + + /* If the parent can deal with changing data, we're okay with a + * writable and resizable backing file. */ + /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */ + if (shared & BLK_PERM_WRITE) { + shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; + } else { + shared = 0; + } + + shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | + BLK_PERM_WRITE_UNCHANGED; + } + + *nperm = perm; + *nshared = shared; +} + +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, + bool check_new_perm) { BlockDriverState *old_bs = child->bs; + uint64_t perm, shared_perm; if (old_bs) { if (old_bs->quiesce_counter && child->role->drained_end) { child->role->drained_end(child); } + if (child->role->detach) { + child->role->detach(child); + } QLIST_REMOVE(child, next_parent); + + /* Update permissions for old node. This is guaranteed to succeed + * because we're just taking a parent away, so we're loosening + * restrictions. */ + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); + bdrv_check_perm(old_bs, perm, shared_perm, &error_abort); + bdrv_set_perm(old_bs, perm, shared_perm); } child->bs = new_bs; @@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) if (new_bs->quiesce_counter && child->role->drained_begin) { child->role->drained_begin(child); } + + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + if (check_new_perm) { + bdrv_check_perm(new_bs, perm, shared_perm, &error_abort); + } + bdrv_set_perm(new_bs, perm, shared_perm); + + if (child->role->attach) { + child->role->attach(child); + } } } BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque) + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp) { - BdrvChild *child = g_new(BdrvChild, 1); + BdrvChild *child; + int ret; + + ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); + if (ret < 0) { + bdrv_abort_perm_update(child_bs); + return NULL; + } + + child = g_new(BdrvChild, 1); *child = (BdrvChild) { - .bs = NULL, - .name = g_strdup(child_name), - .role = child_role, - .opaque = opaque, + .bs = NULL, + .name = g_strdup(child_name), + .role = child_role, + .perm = perm, + .shared_perm = shared_perm, + .opaque = opaque, }; - bdrv_replace_child(child, child_bs); + /* This performs the matching bdrv_set_perm() for the above check. */ + bdrv_replace_child(child, child_bs, false); return child; } @@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role) + const BdrvChildRole *child_role, + Error **errp) { - BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role, - parent_bs); + BdrvChild *child; + uint64_t perm, shared_perm; + + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + + assert(parent_bs->drv); + parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, + perm, shared_perm, &perm, &shared_perm); + + child = bdrv_root_attach_child(child_bs, child_name, child_role, + perm, shared_perm, parent_bs, errp); + if (child == NULL) { + return NULL; + } + QLIST_INSERT_HEAD(&parent_bs->children, child, next); return child; } @@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child) child->next.le_prev = NULL; } - bdrv_replace_child(child, NULL); + bdrv_replace_child(child, NULL, false); g_free(child->name); g_free(child); @@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) * Sets the backing file link of a BDS. A new reference is created; callers * which don't need their own reference any more must call bdrv_unref(). */ -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) { if (backing_hd) { bdrv_ref(backing_hd); } if (bs->backing) { - assert(bs->backing_blocker); - bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker); bdrv_unref_child(bs, bs->backing); - } else if (backing_hd) { - error_setg(&bs->backing_blocker, - "node is used as backing hd of '%s'", - bdrv_get_device_or_node_name(bs)); } if (!backing_hd) { - error_free(bs->backing_blocker); - bs->backing_blocker = NULL; bs->backing = NULL; goto out; } - bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing); - bs->open_flags &= ~BDRV_O_NO_BACKING; - pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); - pstrcpy(bs->backing_format, sizeof(bs->backing_format), - backing_hd->drv ? backing_hd->drv->format_name : ""); - bdrv_op_block_all(backing_hd, bs->backing_blocker); - /* Otherwise we won't be able to commit or stream */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, - bs->backing_blocker); - /* - * We do backup in 3 ways: - * 1. drive backup - * The target bs is new opened, and the source is top BDS - * 2. blockdev backup - * Both the source and the target are top BDSes. - * 3. internal backup(used for block replication) - * Both the source and the target are backing file - * - * In case 1 and 2, neither the source nor the target is the backing file. - * In case 3, we will block the top BDS, so there is only one block job - * for the top BDS and its backing chain. - */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, - bs->backing_blocker); + bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, + errp); + if (!bs->backing) { + bdrv_unref(backing_hd); + } + out: bdrv_refresh_limits(bs, NULL); } @@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, /* Hook up the backing file link; drop our reference, bs owns the * backing_hd reference now */ - bdrv_set_backing_hd(bs, backing_hd); + bdrv_set_backing_hd(bs, backing_hd, &local_err); bdrv_unref(backing_hd); + if (local_err) { + ret = -EINVAL; + goto free_exit; + } qdict_del(parent_options, bdref_key); @@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { + BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - return bdrv_attach_child(parent, bs, bdref_key, child_role); + c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); + if (!c) { + bdrv_unref(bs); + return NULL; + } + + return c; } static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, @@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot; + Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, * call bdrv_unref() on it), so in order to be able to return one, we have * to increase bs_snapshot's refcount here */ bdrv_ref(bs_snapshot); - bdrv_append(bs_snapshot, bs); + bdrv_append(bs_snapshot, bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } g_free(tmp_filename); return bs_snapshot; @@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, goto fail; } if (file_bs != NULL) { - file = blk_new(); - blk_insert_bs(file, file_bs); + file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); + if (local_err) { + goto fail; + } qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file_bs))); @@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs) bs->drv->bdrv_close(bs); bs->drv = NULL; - bdrv_set_backing_hd(bs, NULL); + bdrv_set_backing_hd(bs, NULL, &error_abort); if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); @@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from, BdrvChild *c, *next, *to_c; QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + if (c->role->stay_at_node) { + continue; + } if (c->role == &child_backing) { - /* @from is generally not allowed to be a backing file, except for - * when @to is the overlay. In that case, @from may not be replaced - * by @to as @to's backing node. */ + /* If @from is a backing file of @to, ignore the child to avoid + * creating a loop. We only want to change the pointer of other + * parents. */ QLIST_FOREACH(to_c, &to->children, next) { if (to_c == c) { break; @@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from, } } - assert(c->role != &child_backing); bdrv_ref(to); - bdrv_replace_child(c, to); + /* FIXME Are we sure that bdrv_replace_child() can't run into + * &error_abort because of permissions? */ + bdrv_replace_child(c, to, true); bdrv_unref(from); } } @@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from, * parents of bs_top after bdrv_append() returns. If the caller needs to keep a * reference of its own, it must call bdrv_ref(). */ -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) { - assert(!bdrv_requests_pending(bs_top)); - assert(!bdrv_requests_pending(bs_new)); + Error *local_err = NULL; - bdrv_ref(bs_top); + assert(!atomic_read(&bs_top->in_flight)); + assert(!atomic_read(&bs_new->in_flight)); + + bdrv_set_backing_hd(bs_new, bs_top, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } change_parent_backing_link(bs_top, bs_new); - bdrv_set_backing_hd(bs_new, bs_top); - bdrv_unref(bs_top); /* bs_new is now referenced by its new parents, we don't need the * additional reference any more. */ +out: bdrv_unref(bs_new); } @@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base, const char *backing_file_str) { BlockDriverState *new_top_bs = NULL; + Error *local_err = NULL; int ret = -EIO; if (!top->drv || !base->drv) { @@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, if (ret) { goto exit; } - bdrv_set_backing_hd(new_top_bs, base); + + bdrv_set_backing_hd(new_top_bs, base, &local_err); + if (local_err) { + ret = -EPERM; + error_report_err(local_err); + goto exit; + } ret = 0; exit: @@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset) BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; int ret; + + assert(child->perm & BLK_PERM_RESIZE); + if (!drv) return -ENOMEDIUM; if (!drv->bdrv_truncate) diff --git a/block/backup.c b/block/backup.c index fe010e78e3..d1ab617c7e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - job = block_job_create(job_id, &backup_job_driver, bs, speed, - creation_flags, cb, opaque, errp); + /* job->common.len is fixed, so we can't allow resize */ + job = block_job_create(job_id, &backup_job_driver, bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD, + speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } - job->target = blk_new(); - blk_insert_bs(job->target, target); + /* The target must match the source in size, so no resize here either */ + job->target = blk_new(BLK_PERM_WRITE, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(job->target, target, errp); + if (ret < 0) { + goto error; + } job->on_source_error = on_source_error; job->on_target_error = on_target_error; @@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } - block_job_add_bdrv(&job->common, target); + /* Required permissions are already taken with target's blk_new() */ + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); job->common.len = len; block_job_txn_add_job(txn, &job->common); diff --git a/block/blkdebug.c b/block/blkdebug.c index 6117ce5fca..67e8024e36 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, .bdrv_reopen_prepare = blkdebug_reopen_prepare, + .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_getlength = blkdebug_getlength, .bdrv_truncate = blkdebug_truncate, .bdrv_refresh_filename = blkdebug_refresh_filename, diff --git a/block/blkreplay.c b/block/blkreplay.c index cfc8c5be02..e1102119fb 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = { .bdrv_file_open = blkreplay_open, .bdrv_close = blkreplay_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkreplay_getlength, .bdrv_co_preadv = blkreplay_co_preadv, diff --git a/block/blkverify.c b/block/blkverify.c index 43a940c2f5..9a1e21c6ad 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_parse_filename = blkverify_parse_filename, .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkverify_getlength, .bdrv_refresh_filename = blkverify_refresh_filename, diff --git a/block/block-backend.c b/block/block-backend.c index 492e71e41f..daa7908d01 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -59,6 +59,9 @@ struct BlockBackend { bool iostatus_enabled; BlockDeviceIoStatus iostatus; + uint64_t perm; + uint64_t shared_perm; + bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); +static char *blk_get_attached_dev_id(BlockBackend *blk); /* All BlockBackends */ static QTAILQ_HEAD(, BlockBackend) block_backends = @@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); +static char *blk_root_get_parent_desc(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + char *dev_id; + + if (blk->name) { + return g_strdup(blk->name); + } + + dev_id = blk_get_attached_dev_id(blk); + if (*dev_id) { + return dev_id; + } else { + /* TODO Callback into the BB owner for something more detailed */ + g_free(dev_id); + return g_strdup("a block device"); + } +} + static const char *blk_root_get_name(BdrvChild *child) { return blk_name(child->opaque); @@ -110,6 +133,7 @@ static const BdrvChildRole child_root = { .change_media = blk_root_change_media, .resize = blk_root_resize, .get_name = blk_root_get_name, + .get_parent_desc = blk_root_get_parent_desc, .drained_begin = blk_root_drained_begin, .drained_end = blk_root_drained_end, @@ -117,15 +141,23 @@ static const BdrvChildRole child_root = { /* * Create a new BlockBackend with a reference count of one. - * Store an error through @errp on failure, unless it's null. + * + * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions + * to request for a block driver node that is attached to this BlockBackend. + * @shared_perm is a bitmask which describes which permissions may be granted + * to other users of the attached node. + * Both sets of permissions can be changed later using blk_set_perm(). + * * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(void) +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk->perm = perm; + blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); qemu_co_queue_init(&blk->public.throttled_reqs[0]); @@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, { BlockBackend *blk; BlockDriverState *bs; + uint64_t perm; + + /* blk_new_open() is mainly used in .bdrv_create implementations and the + * tools where sharing isn't a concern because the BDS stays private, so we + * just request permission according to the flags. + * + * The exceptions are xen_disk and blockdev_init(); in these cases, the + * caller of blk_new_open() doesn't make use of the permissions, but they + * shouldn't hurt either. We can still share everything here because the + * guest devices will add their own blockers if they can't share. */ + perm = BLK_PERM_CONSISTENT_READ; + if (flags & BDRV_O_RDWR) { + perm |= BLK_PERM_WRITE; + } + if (flags & BDRV_O_RESIZE) { + perm |= BLK_PERM_RESIZE; + } - blk = blk_new(); + blk = blk_new(perm, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + perm, BLK_PERM_ALL, blk, &error_abort); return blk; } @@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk) /* * Associates a new BlockDriverState with @blk. */ -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->perm, blk->shared_perm, blk, errp); + if (blk->root == NULL) { + return -EPERM; + } bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { throttle_timers_attach_aio_context( &blk->public.throttle_timers, bdrv_get_aio_context(bs)); } + + return 0; +} + +/* + * Sets the permission bitmasks that the user of the BlockBackend needs. + */ +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + + if (blk->root) { + ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); + if (ret < 0) { + return ret; + } + } + + blk->perm = perm; + blk->shared_perm = shared_perm; + + return 0; +} + +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) +{ + *perm = blk->perm; + *shared_perm = blk->shared_perm; } static int blk_do_attach_dev(BlockBackend *blk, void *dev) @@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev_ops = NULL; blk->dev_opaque = NULL; blk->guest_block_size = 512; + blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); blk_unref(blk); } @@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, /* * Notify @blk's attached device model of media change. - * If @load is true, notify of media load. - * Else, notify of media eject. + * + * If @load is true, notify of media load. This action can fail, meaning that + * the medium cannot be loaded. @errp is set then. + * + * If @load is false, notify of media eject. This can never fail. + * * Also send DEVICE_TRAY_MOVED events as appropriate. */ -void blk_dev_change_media_cb(BlockBackend *blk, bool load) +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; + Error *local_err = NULL; assert(!blk->legacy_dev); tray_was_open = blk_dev_is_tray_open(blk); - blk->dev_ops->change_media_cb(blk->dev_opaque, load); + blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err); + if (local_err) { + assert(load == true); + error_propagate(errp, local_err); + return; + } tray_is_open = blk_dev_is_tray_open(blk); if (tray_was_open != tray_is_open) { @@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load) static void blk_root_change_media(BdrvChild *child, bool load) { - blk_dev_change_media_cb(child->opaque, load); + blk_dev_change_media_cb(child->opaque, load, NULL); } /* diff --git a/block/bochs.c b/block/bochs.c index 7dd2ac4f51..516da56c3b 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = { .instance_size = sizeof(BDRVBochsState), .bdrv_probe = bochs_probe, .bdrv_open = bochs_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = bochs_refresh_limits, .bdrv_co_preadv = bochs_co_preadv, .bdrv_close = bochs_close, diff --git a/block/cloop.c b/block/cloop.c index 877c9b0d1b..a6c7b9dbe6 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = { .instance_size = sizeof(BDRVCloopState), .bdrv_probe = cloop_probe, .bdrv_open = cloop_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = cloop_refresh_limits, .bdrv_co_preadv = cloop_co_preadv, .bdrv_close = cloop_close, diff --git a/block/commit.c b/block/commit.c index c284e8535d..22a0a4db98 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,6 +36,7 @@ typedef struct CommitBlockJob { BlockJob common; RateLimit limit; BlockDriverState *active; + BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; @@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque) BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, top); + BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); int ret = data->ret; + bool remove_commit_top_bs = false; + + /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before + * the normal backing chain can be restored. */ + blk_unref(s->base); if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); + ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + s->backing_file_str); + } else if (overlay_bs) { + /* XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote + * something to base, the intermediate images aren't valid any more. */ + remove_commit_top_bs = true; } /* restore base open flags here if appropriate (e.g., change the base back @@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque) } g_free(s->backing_file_str); blk_unref(s->top); - blk_unref(s->base); block_job_completed(&s->common, ret); g_free(data); + + /* If bdrv_drop_intermediate() didn't already do that, remove the commit + * filter driver from the backing chain. Do this as the final step so that + * the 'consistent read' permission can be granted. */ + if (remove_commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top, &error_abort); + } } static void coroutine_fn commit_run(void *opaque) @@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = { .start = commit_run, }; +static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static void bdrv_commit_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = 0; + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_commit_top = { + .format_name = "commit_top", + .bdrv_co_preadv = bdrv_commit_top_preadv, + .bdrv_close = bdrv_commit_top_close, + .bdrv_child_perm = bdrv_commit_top_child_perm, +}; + void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp) + const char *filter_node_name, Error **errp) { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; @@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, int orig_base_flags; BlockDriverState *iter; BlockDriverState *overlay_bs; + BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; + int ret; assert(top != bs); if (top == base) { @@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - s = block_job_create(job_id, &commit_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); + s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } @@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs, bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); - block_job_unref(&s->common); - return; + goto fail; } } + /* Insert commit_top block node above top, so we can block consistent read + * on the backing chain below it */ + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, + errp); + if (commit_top_bs == NULL) { + goto fail; + } + + bdrv_set_backing_hd(commit_top_bs, top, &error_abort); + bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort); + + s->commit_top_bs = commit_top_bs; + bdrv_unref(commit_top_bs); /* Block all nodes between top and base, because they will * disappear from the chain after this operation. */ assert(bdrv_chain_contains(top, base)); - for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + for (iter = top; iter != base; iter = backing_bs(iter)) { + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves + * at s->base (if writes are blocked for a node, they are also blocked + * for its backing file). The other options would be a second filter + * driver above s->base. */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } + } + + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; } + /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string, but if it's the root node then - * don't block it again */ - if (bs != overlay_bs) { - block_job_add_bdrv(&s->common, overlay_bs); + * update the backing image string. */ + ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, + BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; } - s->base = blk_new(); - blk_insert_bs(s->base, base); + s->base = blk_new(BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_RESIZE, + BLK_PERM_CONSISTENT_READ + | BLK_PERM_GRAPH_MOD + | BLK_PERM_WRITE_UNCHANGED); + ret = blk_insert_bs(s->base, base, errp); + if (ret < 0) { + goto fail; + } - s->top = blk_new(); - blk_insert_bs(s->top, top); + /* Required permissions are already taken with block_job_add_bdrv() */ + s->top = blk_new(0, BLK_PERM_ALL); + blk_insert_bs(s->top, top, errp); + if (ret < 0) { + goto fail; + } s->active = bs; @@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, trace_commit_start(bs, base, top, s); block_job_start(&s->common); + return; + +fail: + if (s->base) { + blk_unref(s->base); + } + if (s->top) { + blk_unref(s->top); + } + if (commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top, &error_abort); + } + block_job_unref(&s->common); } @@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs, int bdrv_commit(BlockDriverState *bs) { BlockBackend *src, *backing; + BlockDriverState *backing_file_bs = NULL; + BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; int64_t sector, total_sectors, length, backing_length; int n, ro, open_flags; int ret = 0; uint8_t *buf = NULL; + Error *local_err = NULL; if (!drv) return -ENOMEDIUM; @@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs) } } - src = blk_new(); - blk_insert_bs(src, bs); + src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); - backing = blk_new(); - blk_insert_bs(backing, bs->backing->bs); + ret = blk_insert_bs(src, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } + + /* Insert commit_top block node above backing, so we can write to it */ + backing_file_bs = backing_bs(bs); + + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, + &local_err); + if (commit_top_bs == NULL) { + error_report_err(local_err); + goto ro_cleanup; + } + + bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); + bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); + + ret = blk_insert_bs(backing, backing_file_bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } length = blk_getlength(src); if (length < 0) { @@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs) ro_cleanup: qemu_vfree(buf); - blk_unref(src); blk_unref(backing); + if (backing_file_bs) { + bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); + } + bdrv_unref(commit_top_bs); + blk_unref(src); if (ro) { /* ignoring error return here */ diff --git a/block/crypto.c b/block/crypto.c index 7cb2ff2946..4a2038888d 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_probe = block_crypto_probe_luks, .bdrv_open = block_crypto_open_luks, .bdrv_close = block_crypto_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = block_crypto_create_luks, .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, diff --git a/block/dmg.c b/block/dmg.c index 8e387cdfe5..a7d25fc47b 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = { .bdrv_probe = dmg_probe, .bdrv_open = dmg_open, .bdrv_refresh_limits = dmg_refresh_limits, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = dmg_co_preadv, .bdrv_close = dmg_close, }; diff --git a/block/io.c b/block/io.c index d5c45447fd..8f38d46de0 100644 --- a/block/io.c +++ b/block/io.c @@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); } -static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { + BlockDriverState *bs = child->bs; + /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up * modifying the image file. This is critical for zero-copy guest I/O @@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, size_t skip_bytes; int ret; + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ @@ -1001,10 +1005,11 @@ err: * handles copy on read, zeroing after EOF, and fragmentation of large * reads; any other features must be implemented by the caller. */ -static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; int64_t total_bytes, max_bytes; int ret = 0; uint64_t bytes_remaining = bytes; @@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov); goto out; } } @@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, } tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); - ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, + ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); tracked_request_end(&req); @@ -1306,10 +1311,11 @@ fail: * Forwards an already correctly aligned write request to the BlockDriver, * after possibly fragmenting it. */ -static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; bool waited; int ret; @@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert(!waited || !req->serialising); assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); + assert(child->perm & BLK_PERM_WRITE); + assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); @@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, return ret; } -static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, BdrvRequestFlags flags, BdrvTrackedRequest *req) { + BlockDriverState *bs = child->bs; uint8_t *buf = NULL; QEMUIOVector local_qiov; struct iovec iov; @@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); memset(buf + head_padding_bytes, 0, zero_bytes); - ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); if (ret < 0) { @@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, if (bytes >= align) { /* Write the aligned part in the middle. */ uint64_t aligned_bytes = bytes & ~(align - 1); - ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align, + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, NULL, flags); if (ret < 0) { goto fail; @@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, req, offset, align, + ret = bdrv_aligned_preadv(child, req, offset, align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); memset(buf, 0, bytes); - ret = bdrv_aligned_pwritev(bs, req, offset, align, align, + ret = bdrv_aligned_pwritev(child, req, offset, align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); } fail: @@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); if (!qiov) { - ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); + ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); goto out; } @@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&head_qiov, &head_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, align, &head_qiov, 0); if (ret < 0) { goto fail; @@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, - align, &tail_qiov, 0); + ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), + align, align, &tail_qiov, 0); if (ret < 0) { goto fail; } @@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, bytes = ROUND_UP(bytes, align); } - ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); diff --git a/block/mirror.c b/block/mirror.c index 1b34b366d0..57f26c33a4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -38,7 +38,10 @@ typedef struct MirrorBlockJob { BlockJob common; RateLimit limit; BlockBackend *target; + BlockDriverState *mirror_top_bs; + BlockDriverState *source; BlockDriverState *base; + /* The name of the graph node to replace */ char *replaces; /* The BDS to replace */ @@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { - BlockDriverState *source = blk_bs(s->common.blk); + BlockDriverState *source = s->source; int64_t sector_num, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ @@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = blk_bs(s->common.blk); + BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); + BlockDriverState *mirror_top_bs = s->mirror_top_bs; + Error *local_err = NULL; /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); + bdrv_ref(mirror_top_bs); + + /* We don't access the source any more. Dropping any WRITE/RESIZE is + * required before it could become a backing file of target_bs. */ + bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target_bs) != backing) { + bdrv_set_backing_hd(target_bs, backing, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + } + } + } if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); @@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_drained_begin(target_bs); bdrv_replace_in_backing_chain(to_replace, target_bs); bdrv_drained_end(target_bs); - - /* We just changed the BDS the job BB refers to */ - blk_remove_bs(job->blk); - blk_insert_bs(job->blk, src); } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); @@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque) g_free(s->replaces); blk_unref(s->target); s->target = NULL; + + /* Remove the mirror filter driver from the graph. Before this, get rid of + * the blockers on the intermediate nodes so that the resulting state is + * valid. */ + block_job_remove_all_bdrv(job); + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); + + /* We just changed the BDS the job BB refers to (with either or both of the + * bdrv_replace_in_backing_chain() calls), so switch the BB back so the + * cleanup does the right thing. We don't need any permissions any more + * now. */ + blk_remove_bs(job->blk); + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); + blk_insert_bs(job->blk, mirror_top_bs, &error_abort); + block_job_completed(&s->common, data->ret); + g_free(data); bdrv_drained_end(src); + bdrv_unref(mirror_top_bs); bdrv_unref(src); } @@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t sector_num, end; BlockDriverState *base = s->base; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); int ret, n; @@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; @@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - BlockDriverState *src, *target; + BlockDriverState *target; - src = blk_bs(job->blk); target = blk_bs(s->target); if (!s->synced) { @@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp) replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); + /* TODO Translate this into permission system. Current definition of + * GRAPH_MOD would require to request it for the parents; they might + * not even be BlockDriverStates, however, so a BdrvChild can't address + * them. May need redefinition of GRAPH_MOD. */ error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); @@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp) aio_context_release(replace_aio_context); } - if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target) != backing) { - bdrv_set_backing_hd(target, backing); - } - } - s->should_complete = true; block_job_enter(&s->common); } @@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = { .drain = mirror_drain, }; +static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) +{ + return bdrv_co_flush(bs->backing->bs); +} + +static int64_t coroutine_fn bdrv_mirror_top_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + *pnum = nb_sectors; + *file = bs->backing->bs; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | + (sector_num << BDRV_SECTOR_BITS); +} + +static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int count, BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags); +} + +static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, + int64_t offset, int count) +{ + return bdrv_co_pdiscard(bs->backing->bs, offset, count); +} + +static void bdrv_mirror_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + /* Must be able to forward guest writes to the real image */ + *nperm = 0; + if (perm & BLK_PERM_WRITE) { + *nperm |= BLK_PERM_WRITE; + } + + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_mirror_top = { + .format_name = "mirror_top", + .bdrv_co_preadv = bdrv_mirror_top_preadv, + .bdrv_co_pwritev = bdrv_mirror_top_pwritev, + .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, + .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, + .bdrv_co_flush = bdrv_mirror_top_flush, + .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status, + .bdrv_close = bdrv_mirror_top_close, + .bdrv_child_perm = bdrv_mirror_top_child_perm, +}; + static void mirror_start_job(const char *job_id, BlockDriverState *bs, int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, @@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, void *opaque, Error **errp, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, - bool auto_complete) + bool auto_complete, const char *filter_node_name) { MirrorBlockJob *s; + BlockDriverState *mirror_top_bs; + bool target_graph_mod; + bool target_is_backing; + Error *local_err = NULL; + int ret; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, buf_size = DEFAULT_MIRROR_BUF_SIZE; } - s = block_job_create(job_id, driver, bs, speed, creation_flags, - cb, opaque, errp); - if (!s) { + /* In the case of active commit, add dummy driver to provide consistent + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, + BDRV_O_RDWR, errp); + if (mirror_top_bs == NULL) { + return; + } + mirror_top_bs->total_sectors = bs->total_sectors; + + /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep + * it alive until block_job_create() even if bs has no parent. */ + bdrv_ref(mirror_top_bs); + bdrv_drained_begin(bs); + bdrv_append(mirror_top_bs, bs, &local_err); + bdrv_drained_end(bs); + + if (local_err) { + bdrv_unref(mirror_top_bs); + error_propagate(errp, local_err); return; } - s->target = blk_new(); - blk_insert_bs(s->target, target); + /* Make sure that the source is not resized while the job is running */ + s = block_job_create(job_id, driver, mirror_top_bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, + creation_flags, cb, opaque, errp); + bdrv_unref(mirror_top_bs); + if (!s) { + goto fail; + } + s->source = bs; + s->mirror_top_bs = mirror_top_bs; + + /* No resize for the target either; while the mirror is still running, a + * consistent read isn't necessarily possible. We could possibly allow + * writes and graph modifications, though it would likely defeat the + * purpose of a mirror, so leave them blocked for now. + * + * In the case of active commit, things look a bit different, though, + * because the target is an already populated backing file in active use. + * We can allow anything except resize there.*/ + target_is_backing = bdrv_chain_contains(bs, target); + target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); + s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE | + (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), + BLK_PERM_WRITE_UNCHANGED | + (target_is_backing ? BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE | + BLK_PERM_GRAPH_MOD : 0)); + ret = blk_insert_bs(s->target, target, errp); + if (ret < 0) { + goto fail; + } s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; @@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - block_job_add_bdrv(&s->common, target); + /* Required permissions are already taken with blk_new() */ + block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); + /* In commit_active_start() all intermediate nodes disappear, so * any jobs in them must be blocked */ - if (bdrv_chain_contains(bs, target)) { + if (target_is_backing) { BlockDriverState *iter; for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block + * ourselves at s->base (if writes are blocked for a node, they are + * also blocked for its backing file). The other options would be a + * second filter driver above s->base (== target). */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } } } trace_mirror_start(bs, s, opaque); block_job_start(&s->common); + return; + +fail: + if (s) { + g_free(s->replaces); + blk_unref(s->target); + block_job_unref(&s->common); + } + + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); } void mirror_start(const char *job_id, BlockDriverState *bs, @@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp) + bool unmap, const char *filter_node_name, Error **errp) { bool is_none_mode; BlockDriverState *base; @@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, errp, - &mirror_job_driver, is_none_mode, base, false); + &mirror_job_driver, is_none_mode, base, false, + filter_node_name); } void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, BlockCompletionFunc *cb, void *opaque, Error **errp, bool auto_complete) { @@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &local_err, - &commit_active_job_driver, false, base, auto_complete); + &commit_active_job_driver, false, base, auto_complete, + filter_node_name); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/block/parallels.c b/block/parallels.c index b2ec09f7e6..19935e29a9 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) } file = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (file == NULL) { error_propagate(errp, local_err); return -EIO; @@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = { .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_get_block_status = parallels_co_get_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_flush_to_os = parallels_co_flush_to_os, diff --git a/block/qcow.c b/block/qcow.c index 038b05ab1b..9d6ac83959 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } qcow_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (qcow_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, diff --git a/block/qcow2.c b/block/qcow2.c index 21e61427eb..6a92d2ef3f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); blk = blk_new_open(filename, NULL, options, - BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, uint64_t cluster_size = s->cluster_size; bool encrypt; int refcount_bits = s->refcount_bits; + Error *local_err = NULL; int ret; QemuOptDesc *desc = opts->list->desc; Qcow2AmendHelperCBInfo helper_cb_info; @@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (new_size) { - BlockBackend *blk = blk_new(); - blk_insert_bs(blk, bs); + BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + blk_unref(blk); + return ret; + } + ret = blk_truncate(blk, new_size); blk_unref(blk); - if (ret < 0) { return ret; } @@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_commit = qcow2_reopen_commit, .bdrv_reopen_abort = qcow2_reopen_abort, .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, diff --git a/block/qed.c b/block/qed.c index 62a0a09326..5ec7fd83f2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = { .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, diff --git a/block/quorum.c b/block/quorum.c index 86e2072dce..40205fb1b3 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, /* We can safely add the child now */ bdrv_ref(child_bs); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); + + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp); + if (child == NULL) { + s->next_child_index--; + bdrv_unref(child_bs); + goto out; + } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); s->children[s->num_children++] = child; +out: bdrv_drained_end(bs); } @@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = { .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, + .bdrv_child_perm = bdrv_filter_default_perms, + .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; diff --git a/block/raw-format.c b/block/raw-format.c index ce34d1b1cd..86fbc657eb 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -467,6 +467,7 @@ BlockDriver bdrv_raw = { .bdrv_reopen_abort = &raw_reopen_abort, .bdrv_open = &raw_open, .bdrv_close = &raw_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_create = &raw_create, .bdrv_co_preadv = &raw_co_preadv, .bdrv_co_pwritev = &raw_co_pwritev, diff --git a/block/replication.c b/block/replication.c index eff85c77ba..22f170fd33 100644 --- a/block/replication.c +++ b/block/replication.c @@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->replication_state = BLOCK_REPLICATION_FAILOVER; commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs, BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, - replication_done, bs, errp, true); + NULL, replication_done, bs, errp, true); break; default: aio_context_release(aio_context); @@ -660,6 +660,7 @@ BlockDriver bdrv_replication = { .bdrv_open = replication_open, .bdrv_close = replication_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = replication_getlength, .bdrv_co_readv = replication_co_readv, diff --git a/block/sheepdog.c b/block/sheepdog.c index 860ba61502..743471043e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp) int ret; blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, errp); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); if (blk == NULL) { ret = -EIO; goto out_with_err_set; diff --git a/block/stream.c b/block/stream.c index 1523ba7dfb..0113710845 100644 --- a/block/stream.c +++ b/block/stream.c @@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque) StreamCompleteData *data = opaque; BlockDriverState *bs = blk_bs(job->blk); BlockDriverState *base = s->base; + Error *local_err = NULL; if (!block_job_is_cancelled(&s->common) && data->reached_end && data->ret == 0) { @@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque) } } data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); - bdrv_set_backing_hd(bs, base); + bdrv_set_backing_hd(bs, base, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + goto out; + } } +out: /* Reopen the image back in read-only mode if necessary */ if (s->bs_flags != bdrv_get_flags(bs)) { + /* Give up write permissions before making it read-only */ + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); bdrv_reopen(bs, s->bs_flags, NULL); } @@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs, BlockDriverState *iter; int orig_bs_flags; - s = block_job_create(job_id, &stream_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); - if (!s) { - return; - } - /* Make sure that the image is opened in read-write mode */ orig_bs_flags = bdrv_get_flags(bs); if (!(orig_bs_flags & BDRV_O_RDWR)) { if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) { - block_job_unref(&s->common); return; } } - /* Block all intermediate nodes between bs and base, because they - * will disappear from the chain after this operation */ + /* Prevent concurrent jobs trying to modify the graph structure here, we + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. */ + s = block_job_create(job_id, &stream_job_driver, bs, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); + if (!s) { + goto fail; + } + + /* Block all intermediate nodes between bs and base, because they will + * disappear from the chain after this operation. The streaming job reads + * every block only once, assuming that it doesn't change, so block writes + * and resizes. */ for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, + &error_abort); } s->base = base; @@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, s->on_error = on_error; trace_stream_start(bs, base, s); block_job_start(&s->common); + return; + +fail: + if (orig_bs_flags != bdrv_get_flags(bs)) { + bdrv_reopen(bs, s->bs_flags, NULL); + } } diff --git a/block/vdi.c b/block/vdi.c index 18b4773aac..9b4f70e977 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_reopen_prepare = vdi_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vdi_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = vdi_co_get_block_status, diff --git a/block/vhdx.c b/block/vhdx.c index 9918ee98ff..052a753159 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = { .bdrv_open = vhdx_open, .bdrv_close = vhdx_close, .bdrv_reopen_prepare = vhdx_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, .bdrv_create = vhdx_create, diff --git a/block/vmdk.c b/block/vmdk.c index 9d68ec5a4e..a9bd22bf93 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } new_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (new_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_open = vmdk_open, .bdrv_check = vmdk_check, .bdrv_reopen_prepare = vmdk_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = vmdk_co_preadv, .bdrv_co_pwritev = vmdk_co_pwritev, .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, diff --git a/block/vpc.c b/block/vpc.c index d0df2a1c54..f591d4be38 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = { .bdrv_open = vpc_open, .bdrv_close = vpc_close, .bdrv_reopen_prepare = vpc_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vpc_create, .bdrv_co_preadv = vpc_co_preadv, diff --git a/block/vvfat.c b/block/vvfat.c index 7f230be006..aa61c329e7 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) &error_abort); *(void**) backing->opaque = s; - bdrv_set_backing_hd(s->bs, backing); + bdrv_set_backing_hd(s->bs, backing, &error_abort); bdrv_unref(backing); return 0; @@ -3052,6 +3052,27 @@ err: return ret; } +static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVVVFATState *s = bs->opaque; + + assert(c == s->qcow || role == &child_backing); + + if (c == s->qcow) { + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; + } else { + /* The backing file is there so 'commit' can use it. vvfat doesn't + * access it in any way. */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + } +} + static void vvfat_close(BlockDriverState *bs) { BDRVVVFATState *s = bs->opaque; @@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_file_open = vvfat_open, .bdrv_refresh_limits = vvfat_refresh_limits, .bdrv_close = vvfat_close, + .bdrv_child_perm = vvfat_child_perm, .bdrv_co_preadv = vvfat_co_preadv, .bdrv_co_pwritev = vvfat_co_pwritev, diff --git a/blockdev.c b/blockdev.c index 8682bd81d8..8eb4e84fe0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -558,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(); + blk = blk_new(0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = read_only; @@ -1768,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common, if (!state->new_bs->drv->supports_backing) { error_setg(errp, "The snapshot does not support backing images"); + return; + } + + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ + bdrv_ref(state->new_bs); + bdrv_append(state->new_bs, state->old_bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } } @@ -1778,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common) bdrv_set_aio_context(state->new_bs, state->aio_context); - /* This removes our old bs and adds the new bs */ - bdrv_append(state->new_bs, state->old_bs); /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ @@ -1794,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common) ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); if (state->new_bs) { - bdrv_unref(state->new_bs); + if (state->new_bs->backing) { + bdrv_replace_in_backing_chain(state->new_bs, state->old_bs); + } } } @@ -1805,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common) if (state->aio_context) { bdrv_drained_end(state->old_bs); aio_context_release(state->aio_context); + bdrv_unref(state->new_bs); } } @@ -2311,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id, } if (!locked || force) { - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } if (locked && !force) { @@ -2349,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, Error **errp) { BlockBackend *blk; + Error *local_err = NULL; device = has_device ? device : NULL; id = has_id ? id : NULL; @@ -2372,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, return; } - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } void qmp_x_blockdev_remove_medium(bool has_device, const char *device, @@ -2425,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device, * called at all); therefore, the medium needs to be ejected here. * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. false). */ - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } out: @@ -2435,7 +2452,9 @@ out: static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + Error *local_err = NULL; bool has_device; + int ret; /* For BBs without a device, we can exchange the BDS tree at will */ has_device = blk_get_attached_dev(blk); @@ -2455,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, return; } - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + return; + } if (!blk_dev_has_tray(blk)) { /* For tray-less devices, blockdev-close-tray is a no-op (or may not be @@ -2463,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, * slot here. * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. true). */ - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk_remove_bs(blk); + return; + } } } @@ -2890,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto out; + } /* complete all in-flight operations before resizing the device */ bdrv_drain_all(); @@ -3014,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, bool has_top, const char *top, bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, + bool has_filter_node_name, const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3029,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, if (!has_speed) { speed = 0; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } /* Important Note: * libvirt relies on the DeviceNotFound error class in order to probe for @@ -3103,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, goto out; } commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, - BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL, - &local_err, false); + BLOCK_JOB_DEFAULT, speed, on_error, + filter_node_name, NULL, NULL, &local_err, false); } else { BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { @@ -3112,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, } commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, on_error, has_backing_file ? backing_file : NULL, - &local_err); + filter_node_name, &local_err); } if (local_err != NULL) { error_propagate(errp, local_err); @@ -3348,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_on_target_error, BlockdevOnError on_target_error, bool has_unmap, bool unmap, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { @@ -3369,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, if (!has_unmap) { unmap = true; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -3398,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, mirror_start(job_id, bs, target, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, backing_mode, - on_source_error, on_target_error, unmap, errp); + on_source_error, on_target_error, unmap, filter_node_name, + errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3536,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) arg->has_on_source_error, arg->on_source_error, arg->has_on_target_error, arg->on_target_error, arg->has_unmap, arg->unmap, + false, NULL, &local_err); bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3554,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3585,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_on_source_error, on_source_error, has_on_target_error, on_target_error, true, true, + has_filter_node_name, filter_node_name, &local_err); error_propagate(errp, local_err); diff --git a/blockjob.c b/blockjob.c index abee11bb08..69126af97f 100644 --- a/blockjob.c +++ b/blockjob.c @@ -55,6 +55,19 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs) +void block_job_remove_all_bdrv(BlockJob *job) +{ + GSList *l; + for (l = job->nodes; l; l = l->next) { + BdrvChild *c = l->data; + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); + } + g_slist_free(job->nodes); + job->nodes = NULL; +} + +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) { - job->nodes = g_slist_prepend(job->nodes, bs); + BdrvChild *c; + + c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, + job, errp); + if (c == NULL) { + return -EPERM; + } + + job->nodes = g_slist_prepend(job->nodes, c); bdrv_ref(bs); bdrv_op_block_all(bs, job->blocker); + + return 0; } void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp) { BlockBackend *blk; BlockJob *job; + int ret; if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); @@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } } - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(perm, shared_perm); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, bs); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); job->driver = driver; @@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { - GSList *l; BlockDriverState *bs = blk_bs(job->blk); bs->job = NULL; - for (l = job->nodes; l; l = l->next) { - bs = l->data; - bdrv_op_unblock_all(bs, job->blocker); - bdrv_unref(bs); - } - g_slist_free(job->nodes); + block_job_remove_all_bdrv(job); blk_remove_aio_context_notifier(job->blk, block_job_attached_aio_context, block_job_detach_aio_context, job); diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index fdf40893aa..1e3bd2b8ca 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y CONFIG_A9MPCORE=y CONFIG_A15MPCORE=y +CONFIG_ARM_V7M=y + CONFIG_ARM_GIC=y CONFIG_ARM_GIC_KVM=$(CONFIG_KVM) CONFIG_ARM_TIMER=y @@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) const char* device = qdict_get_str(qdict, "device"); const char* command = qdict_get_str(qdict, "command"); Error *err = NULL; + int ret; blk = blk_by_name(device); if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - blk = local_blk = blk_new(); - blk_insert_bs(blk, bs); + blk = local_blk = blk_new(0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); + if (ret < 0) { + goto fail; + } } else { goto fail; } @@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) aio_context = blk_get_aio_context(blk); aio_context_acquire(aio_context); + /* + * Notably absent: Proper permission management. This is sad, but it seems + * almost impossible to achieve without changing the semantics and thereby + * limiting the use cases of the qemu-io HMP command. + * + * In an ideal world we would unconditionally create a new BlockBackend for + * qemuio_command(), but we have commands like 'reopen' and want them to + * take effect on the exact BlockBackend whose name the user passed instead + * of just on a temporary copy of it. + * + * Another problem is that deleting the temporary BlockBackend involves + * draining all requests on it first, but some qemu-iotests cases want to + * issue multiple aio_read/write requests and expect them to complete in + * the background while the monitor has already returned. + * + * This is also what prevents us from saving the original permissions and + * restoring them later: We can't revoke permissions until all requests + * have completed, and we don't know when that is nor can we really let + * anything else run before we have revoken them to avoid race conditions. + * + * What happens now is that command() in qemu-io-cmds.c can extend the + * permissions if necessary for the qemu-io command. And they simply stay + * extended, possibly resulting in a read-only guest device keeping write + * permissions. Ugly, but it appears to be the lesser evil. + */ qemuio_command(blk, command); aio_context_release(aio_context); diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 2369b918aa..f22a3c3654 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -13,7 +13,9 @@ #include "qemu/osdep.h" #include "9p.h" +#include "9p-local.h" #include "9p-xattr.h" +#include "9p-util.h" #include "fsdev/qemu-fsdev.h" /* local_ops */ #include <arpa/inet.h> #include <pwd.h> @@ -43,40 +45,62 @@ #define BTRFS_SUPER_MAGIC 0x9123683E #endif -#define VIRTFS_META_DIR ".virtfs_metadata" +typedef struct { + int mountfd; +} LocalData; -static char *local_mapped_attr_path(FsContext *ctx, const char *path) +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode) { - int dirlen; - const char *name = strrchr(path, '/'); - if (name) { - dirlen = name - path; - ++name; - } else { - name = path; - dirlen = 0; + LocalData *data = fs_ctx->private; + + /* All paths are relative to the path data->mountfd points to */ + while (*path == '/') { + path++; } - return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root, - dirlen, path, VIRTFS_META_DIR, name); + + return relative_openat_nofollow(data->mountfd, path, flags, mode); +} + +int local_opendir_nofollow(FsContext *fs_ctx, const char *path) +{ + return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0); +} + +static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd, + const char *npath) +{ + int serrno = errno; + renameat(odirfd, opath, ndirfd, npath); + errno = serrno; } -static FILE *local_fopen(const char *path, const char *mode) +static void unlinkat_preserve_errno(int dirfd, const char *path, int flags) +{ + int serrno = errno; + unlinkat(dirfd, path, flags); + errno = serrno; +} + +#define VIRTFS_META_DIR ".virtfs_metadata" + +static FILE *local_fopenat(int dirfd, const char *name, const char *mode) { int fd, o_mode = 0; FILE *fp; - int flags = O_NOFOLLOW; + int flags; /* * only supports two modes */ if (mode[0] == 'r') { - flags |= O_RDONLY; + flags = O_RDONLY; } else if (mode[0] == 'w') { - flags |= O_WRONLY | O_TRUNC | O_CREAT; + flags = O_WRONLY | O_TRUNC | O_CREAT; o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; } else { return NULL; } - fd = open(path, flags, o_mode); + fd = openat_file(dirfd, name, flags, o_mode); if (fd == -1) { return NULL; } @@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode) } #define ATTR_MAX 100 -static void local_mapped_file_attr(FsContext *ctx, const char *path, +static void local_mapped_file_attr(int dirfd, const char *name, struct stat *stbuf) { FILE *fp; char buf[ATTR_MAX]; - char *attr_path; + int map_dirfd; - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); - g_free(attr_path); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return; + } + + fp = local_fopenat(map_dirfd, name, "r"); + close_preserve_errno(map_dirfd); if (!fp) { return; } @@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path, static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) { - int err; - char *buffer; - char *path = fs_path->data; + int err = -1; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } - buffer = rpath(fs_ctx, path); - err = lstat(buffer, stbuf); + err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW); if (err) { goto err_out; } @@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) gid_t tmp_gid; mode_t tmp_mode; dev_t tmp_dev; - if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { + + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } - if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } - if (getxattr(buffer, "user.virtfs.mode", - &tmp_mode, sizeof(mode_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } - if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev, + sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - local_mapped_file_attr(fs_ctx, path, stbuf); + local_mapped_file_attr(dirfd, name, stbuf); } err_out: - g_free(buffer); - return err; -} - -static int local_create_mapped_attr_dir(FsContext *ctx, const char *path) -{ - int err; - char *attr_dir; - char *tmp_path = g_strdup(path); - - attr_dir = g_strdup_printf("%s/%s/%s", - ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR); - - err = mkdir(attr_dir, 0700); - if (err < 0 && errno == EEXIST) { - err = 0; - } - g_free(attr_dir); - g_free(tmp_path); + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } -static int local_set_mapped_file_attr(FsContext *ctx, - const char *path, FsCred *credp) +static int local_set_mapped_file_attrat(int dirfd, const char *name, + FsCred *credp) { FILE *fp; - int ret = 0; + int ret; char buf[ATTR_MAX]; - char *attr_path; int uid = -1, gid = -1, mode = -1, rdev = -1; + int map_dirfd; + + ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + return -1; + } - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return -1; + } + + fp = local_fopenat(map_dirfd, name, "r"); if (!fp) { - goto create_map_file; + if (errno == ENOENT) { + goto update_map_file; + } else { + close_preserve_errno(map_dirfd); + return -1; + } } memset(buf, 0, ATTR_MAX); while (fgets(buf, ATTR_MAX, fp)) { if (!strncmp(buf, "virtfs.uid", 10)) { - uid = atoi(buf+11); + uid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.gid", 10)) { - gid = atoi(buf+11); + gid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.mode", 11)) { - mode = atoi(buf+12); + mode = atoi(buf + 12); } else if (!strncmp(buf, "virtfs.rdev", 11)) { - rdev = atoi(buf+12); + rdev = atoi(buf + 12); } memset(buf, 0, ATTR_MAX); } fclose(fp); - goto update_map_file; - -create_map_file: - ret = local_create_mapped_attr_dir(ctx, path); - if (ret < 0) { - goto err_out; - } update_map_file: - fp = local_fopen(attr_path, "w"); + fp = local_fopenat(map_dirfd, name, "w"); + close_preserve_errno(map_dirfd); if (!fp) { - ret = -1; - goto err_out; + return -1; } if (credp->fc_uid != -1) { @@ -230,7 +259,6 @@ update_map_file: rdev = credp->fc_rdev; } - if (uid != -1) { fprintf(fp, "virtfs.uid=%d\n", uid); } @@ -245,39 +273,71 @@ update_map_file: } fclose(fp); -err_out: - g_free(attr_path); + return 0; +} + +static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) +{ + int fd, ret; + + /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). + * Unfortunately, the linux kernel doesn't implement it yet. As an + * alternative, let's open the file and use fchmod() instead. This + * may fail depending on the permissions of the file, but it is the + * best we can do to avoid TOCTTOU. We first try to open read-only + * in case name points to a directory. If that fails, we try write-only + * in case name doesn't point to a directory. + */ + fd = openat_file(dirfd, name, O_RDONLY, 0); + if (fd == -1) { + /* In case the file is writable-only and isn't a directory. */ + if (errno == EACCES) { + fd = openat_file(dirfd, name, O_WRONLY, 0); + } + if (fd == -1 && errno == EISDIR) { + errno = EACCES; + } + } + if (fd == -1) { + return -1; + } + ret = fchmod(fd, mode); + close_preserve_errno(fd); return ret; } -static int local_set_xattr(const char *path, FsCred *credp) +static int local_set_xattrat(int dirfd, const char *path, FsCred *credp) { int err; if (credp->fc_uid != -1) { uint32_t tmp_uid = cpu_to_le32(credp->fc_uid); - err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t), 0); if (err) { return err; } } if (credp->fc_gid != -1) { uint32_t tmp_gid = cpu_to_le32(credp->fc_gid); - err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t), 0); if (err) { return err; } } if (credp->fc_mode != -1) { uint32_t tmp_mode = cpu_to_le32(credp->fc_mode); - err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t), 0); if (err) { return err; } } if (credp->fc_rdev != -1) { uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev); - err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev, + sizeof(dev_t), 0); if (err) { return err; } @@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp) return 0; } -static int local_post_create_passthrough(FsContext *fs_ctx, const char *path, - FsCred *credp) +static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd, + const char *name, FsCred *credp) { - char *buffer; - - buffer = rpath(fs_ctx, path); - if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) { + if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - goto err; + return -1; } } - if (chmod(buffer, credp->fc_mode & 07777) < 0) { - goto err; - } - - g_free(buffer); - return 0; -err: - g_free(buffer); - return -1; + return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777); } static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path, char *buf, size_t bufsz) { ssize_t tsize = -1; - char *buffer; - char *path = fs_path->data; if ((fs_ctx->export_flags & V9FS_SM_MAPPED) || (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) { int fd; - buffer = rpath(fs_ctx, path); - fd = open(buffer, O_RDONLY | O_NOFOLLOW); - g_free(buffer); + + fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0); if (fd == -1) { return -1; } do { tsize = read(fd, (void *)buf, bufsz); } while (tsize == -1 && errno == EINTR); - close(fd); + close_preserve_errno(fd); } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - tsize = readlink(buffer, buf, bufsz); - g_free(buffer); + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + tsize = readlinkat(dirfd, name, buf, bufsz); + close_preserve_errno(dirfd); + out: + g_free(name); + g_free(dirpath); } return tsize; } @@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs) static int local_open(FsContext *ctx, V9fsPath *fs_path, int flags, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int fd; - buffer = rpath(ctx, path); - fs->fd = open(buffer, flags | O_NOFOLLOW); - g_free(buffer); + fd = local_open_nofollow(ctx, fs_path->data, flags, 0); + if (fd == -1) { + return -1; + } + fs->fd = fd; return fs->fd; } static int local_opendir(FsContext *ctx, V9fsPath *fs_path, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int dirfd; + DIR *stream; + + dirfd = local_opendir_nofollow(ctx, fs_path->data); + if (dirfd == -1) { + return -1; + } - buffer = rpath(ctx, path); - fs->dir.stream = opendir(buffer); - g_free(buffer); - if (!fs->dir.stream) { + stream = fdopendir(dirfd); + if (!stream) { return -1; } + fs->dir.stream = stream; return 0; } @@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs, static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = chmod(buffer, credp->fc_mode); - g_free(buffer); + ret = local_set_mapped_file_attrat(dirfd, name, credp); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + ret = fchmodat_nofollow(dirfd, name, credp->fc_mode); } + close_preserve_errno(dirfd); + +out: + g_free(dirpath); + g_free(name); return ret; } static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0); if (err == -1) { goto out; } - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); - if (err == -1) { - goto out; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, credp->fc_mode, credp->fc_rdev); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS); if (err == -1) { goto out; } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); - if (err == -1) { - goto out; + credp->fc_mode = credp->fc_mode | S_IFDIR; + + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, credp->fc_mode); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mkdirat(dirfd, name, credp->fc_mode); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, int flags, FsCred *credp, V9fsFidOpenState *fs) { - char *path; int fd = -1; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; /* * Mark all the open to not follow symlinks */ flags |= O_NOFOLLOW; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set cleint credentials in xattr */ - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + /* Set cleint credentials in xattr */ + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set client credentials in .virtfs_metadata directory files */ - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, credp->fc_mode); + fd = openat_file(dirfd, name, flags, credp->fc_mode); if (fd == -1) { - err = fd; goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } @@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, goto out; err_end: - close(fd); - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, + flags & O_DIRECTORY ? AT_REMOVEDIR : 0); + close_preserve_errno(fd); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, V9fsPath *dir_path, const char *name, FsCred *credp) { int err = -1; - int serrno = 0; - char *newpath; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - newpath = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { int fd; ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); + + fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR, + SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } /* Write the oldpath (target) to the file. */ @@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, do { write_size = write(fd, (void *)oldpath, oldpath_size); } while (write_size == -1 && errno == EINTR); + close_preserve_errno(fd); if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; goto err_end; } - close(fd); /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - int fd; - ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; - } - /* Write the oldpath (target) to the file. */ - oldpath_size = strlen(oldpath); - do { - write_size = write(fd, (void *)oldpath, oldpath_size); - } while (write_size == -1 && errno == EINTR); + credp->fc_mode = credp->fc_mode | S_IFLNK; - if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; - goto err_end; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - close(fd); - /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_mapped_file_attr(fs_ctx, newpath, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, newpath); - err = symlink(oldpath, buffer); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = symlinkat(oldpath, dirfd, name); if (err) { goto out; } - err = lchown(buffer, credp->fc_uid, credp->fc_gid); + err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); if (err == -1) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - serrno = errno; goto err_end; - } else + } else { err = 0; + } } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_link(FsContext *ctx, V9fsPath *oldpath, V9fsPath *dirpath, const char *name) { - int ret; - V9fsString newpath; - char *buffer, *buffer1; + char *odirpath = g_path_get_dirname(oldpath->data); + char *oname = g_path_get_basename(oldpath->data); + int ret = -1; + int odirfd, ndirfd; + + odirfd = local_opendir_nofollow(ctx, odirpath); + if (odirfd == -1) { + goto out; + } - v9fs_string_init(&newpath); - v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name); + ndirfd = local_opendir_nofollow(ctx, dirpath->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + goto out; + } - buffer = rpath(ctx, oldpath->data); - buffer1 = rpath(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + ret = linkat(odirfd, oname, ndirfd, name, 0); + if (ret < 0) { + goto out_close; + } /* now link the virtfs_metadata files */ - if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) { - /* Link the .virtfs_metadata files. Create the metada directory */ - ret = local_create_mapped_attr_dir(ctx, newpath.data); - if (ret < 0) { - goto err_out; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; + + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_link; } - buffer = local_mapped_attr_path(ctx, oldpath->data); - buffer1 = local_mapped_attr_path(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; + } + + ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); if (ret < 0 && errno != ENOENT) { - goto err_out; + goto err_undo_link; } - } -err_out: - v9fs_string_free(&newpath); - return ret; -} -static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) -{ - char *buffer; - int ret; - char *path = fs_path->data; + ret = 0; + } + goto out_close; - buffer = rpath(ctx, path); - ret = truncate(buffer, size); - g_free(buffer); +err: + ret = -1; +err_undo_link: + unlinkat_preserve_errno(ndirfd, name, 0); +out_close: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); +out: + g_free(oname); + g_free(odirpath); return ret; } -static int local_rename(FsContext *ctx, const char *oldpath, - const char *newpath) +static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) { - int err; - char *buffer, *buffer1; + int fd, ret; - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = local_create_mapped_attr_dir(ctx, newpath); - if (err < 0) { - return err; - } - /* rename the .virtfs_metadata files */ - buffer = local_mapped_attr_path(ctx, oldpath); - buffer1 = local_mapped_attr_path(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - if (err < 0 && errno != ENOENT) { - return err; - } + fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0); + if (fd == -1) { + return -1; } - - buffer = rpath(ctx, oldpath); - buffer1 = rpath(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - return err; + ret = ftruncate(fd, size); + close_preserve_errno(fd); + return ret; } static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if ((credp->fc_uid == -1 && credp->fc_gid == -1) || (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = lchown(buffer, credp->fc_uid, credp->fc_gid); - g_free(buffer); + ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); + ret = local_set_mapped_file_attrat(dirfd, name, credp); } + + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return ret; } static int local_utimensat(FsContext *s, V9fsPath *fs_path, const struct timespec *buf) { - char *buffer; - int ret; - char *path = fs_path->data; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd, ret = -1; + + dirfd = local_opendir_nofollow(s, dirpath); + if (dirfd == -1) { + goto out; + } - buffer = rpath(s, path); - ret = qemu_utimens(buffer, buf); - g_free(buffer); + ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(name); return ret; } -static int local_remove(FsContext *ctx, const char *path) +static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, + int flags) { - int err; - struct stat stbuf; - char *buffer; + int ret = -1; if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(ctx, path); - err = lstat(buffer, &stbuf); - g_free(buffer); - if (err) { - goto err_out; - } - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - if (S_ISDIR(stbuf.st_mode)) { - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - path, VIRTFS_META_DIR); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + int map_dirfd; + + if (flags == AT_REMOVEDIR) { + int fd; + + fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH); + if (fd == -1) { + goto err_out; + } + /* + * If directory remove .virtfs_metadata contained in the + * directory + */ + ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); + close_preserve_errno(fd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path) } /* * Now remove the name from parent directory - * .virtfs_metadata directory + * .virtfs_metadata directory. */ - buffer = local_mapped_attr_path(ctx, path); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + ret = unlinkat(map_dirfd, name, 0); + close_preserve_errno(map_dirfd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path) } } - buffer = rpath(ctx, path); - err = remove(buffer); - g_free(buffer); + ret = unlinkat(dirfd, name, flags); +err_out: + return ret; +} + +static int local_remove(FsContext *ctx, const char *path) +{ + struct stat stbuf; + char *dirpath = g_path_get_dirname(path); + char *name = g_path_get_basename(path); + int flags = 0; + int dirfd; + int err = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd) { + goto out; + } + + if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) { + goto err_out; + } + + if (S_ISDIR(stbuf.st_mode)) { + flags |= AT_REMOVEDIR; + } + + err = local_unlinkat_common(ctx, dirfd, name, flags); err_out: + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } @@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type, static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf) { - char *buffer; - int ret; - char *path = fs_path->data; + int fd, ret; - buffer = rpath(s, path); - ret = statfs(buffer, stbuf); - g_free(buffer); + fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0); + ret = fstatfs(fd, stbuf); + close_preserve_errno(fd); return ret; } @@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir, const char *new_name) { int ret; - V9fsString old_full_name, new_full_name; + int odirfd, ndirfd; - v9fs_string_init(&old_full_name); - v9fs_string_init(&new_full_name); + odirfd = local_opendir_nofollow(ctx, olddir->data); + if (odirfd == -1) { + return -1; + } - v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name); - v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name); + ndirfd = local_opendir_nofollow(ctx, newdir->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + return -1; + } - ret = local_rename(ctx, old_full_name.data, new_full_name.data); - v9fs_string_free(&old_full_name); - v9fs_string_free(&new_full_name); + ret = renameat(odirfd, old_name, ndirfd, new_name); + if (ret < 0) { + goto out; + } + + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; + + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_rename; + } + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; + } + + /* rename the .virtfs_metadata files */ + ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); + if (ret < 0 && errno != ENOENT) { + goto err_undo_rename; + } + + ret = 0; + } + goto out; + +err: + ret = -1; +err_undo_rename: + renameat_preserve_errno(ndirfd, new_name, odirfd, old_name); +out: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); return ret; } +static void v9fs_path_init_dirname(V9fsPath *path, const char *str) +{ + path->data = g_path_get_dirname(str); + path->size = strlen(path->data) + 1; +} + +static int local_rename(FsContext *ctx, const char *oldpath, + const char *newpath) +{ + int err; + char *oname = g_path_get_basename(oldpath); + char *nname = g_path_get_basename(newpath); + V9fsPath olddir, newdir; + + v9fs_path_init_dirname(&olddir, oldpath); + v9fs_path_init_dirname(&newdir, newpath); + + err = local_renameat(ctx, &olddir, oname, &newdir, nname); + + v9fs_path_free(&newdir); + v9fs_path_free(&olddir); + g_free(nname); + g_free(oname); + + return err; +} + static int local_unlinkat(FsContext *ctx, V9fsPath *dir, const char *name, int flags) { int ret; - V9fsString fullname; - char *buffer; - - v9fs_string_init(&fullname); + int dirfd; - v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name); - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - if (flags == AT_REMOVEDIR) { - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - fullname.data, VIRTFS_META_DIR); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } - } - /* - * Now remove the name from parent directory - * .virtfs_metadata directory. - */ - buffer = local_mapped_attr_path(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } + dirfd = local_opendir_nofollow(ctx, dir->data); + if (dirfd == -1) { + return -1; } - /* Remove the name finally */ - buffer = rpath(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); -err_out: - v9fs_string_free(&fullname); + ret = local_unlinkat_common(ctx, dirfd, name, flags); + close_preserve_errno(dirfd); return ret; } @@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path, static int local_init(FsContext *ctx) { - int err = 0; struct statfs stbuf; + LocalData *data = g_malloc(sizeof(*data)); + + data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY); + if (data->mountfd == -1) { + goto err; + } + +#ifdef FS_IOC_GETVERSION + /* + * use ioc_getversion only if the ioctl is definied + */ + if (fstatfs(data->mountfd, &stbuf) < 0) { + close_preserve_errno(data->mountfd); + goto err; + } + switch (stbuf.f_type) { + case EXT2_SUPER_MAGIC: + case BTRFS_SUPER_MAGIC: + case REISERFS_SUPER_MAGIC: + case XFS_SUPER_MAGIC: + ctx->exops.get_st_gen = local_ioc_getversion; + break; + } +#endif if (ctx->export_flags & V9FS_SM_PASSTHROUGH) { ctx->xops = passthrough_xattr_ops; @@ -1185,23 +1277,21 @@ static int local_init(FsContext *ctx) ctx->xops = passthrough_xattr_ops; } ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT; -#ifdef FS_IOC_GETVERSION - /* - * use ioc_getversion only if the iocl is definied - */ - err = statfs(ctx->fs_root, &stbuf); - if (!err) { - switch (stbuf.f_type) { - case EXT2_SUPER_MAGIC: - case BTRFS_SUPER_MAGIC: - case REISERFS_SUPER_MAGIC: - case XFS_SUPER_MAGIC: - ctx->exops.get_st_gen = local_ioc_getversion; - break; - } - } -#endif - return err; + + ctx->private = data; + return 0; + +err: + g_free(data); + return -1; +} + +static void local_cleanup(FsContext *ctx) +{ + LocalData *data = ctx->private; + + close(data->mountfd); + g_free(data); } static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) @@ -1252,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) FileOperations local_ops = { .parse_opts = local_parse_opts, .init = local_init, + .cleanup = local_cleanup, .lstat = local_lstat, .readlink = local_readlink, .close = local_close, diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h new file mode 100644 index 0000000000..32c72749d9 --- /dev/null +++ b/hw/9pfs/9p-local.h @@ -0,0 +1,20 @@ +/* + * 9p local backend utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_LOCAL_H +#define QEMU_9P_LOCAL_H + +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode); +int local_opendir_nofollow(FsContext *fs_ctx, const char *path); + +#endif diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c index ec003181cd..bbf89064f7 100644 --- a/hw/9pfs/9p-posix-acl.c +++ b/hw/9pfs/9p-posix-acl.c @@ -25,13 +25,7 @@ static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size); } static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, @@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size, + flags); } static int mp_pacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_ACCESS); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size); } static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, @@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size, + flags); } static int mp_dacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_DEFAULT); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c new file mode 100644 index 0000000000..fdb4d57376 --- /dev/null +++ b/hw/9pfs/9p-util.c @@ -0,0 +1,69 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "9p-util.h" + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode) +{ + int fd; + + fd = dup(dirfd); + if (fd == -1) { + return -1; + } + + while (*path) { + const char *c; + int next_fd; + char *head; + + /* Only relative paths without consecutive slashes */ + assert(path[0] != '/'); + + head = g_strdup(path); + c = strchr(path, '/'); + if (c) { + head[c - path] = 0; + next_fd = openat_dir(fd, head); + } else { + next_fd = openat_file(fd, head, flags, mode); + } + g_free(head); + if (next_fd == -1) { + close_preserve_errno(fd); + return -1; + } + close(fd); + fd = next_fd; + + if (!c) { + break; + } + path = c + 1; + } + + return fd; +} + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lgetxattr(proc_path, name, value, size); + g_free(proc_path); + return ret; +} diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h new file mode 100644 index 0000000000..091f3ce88e --- /dev/null +++ b/hw/9pfs/9p-util.h @@ -0,0 +1,54 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_UTIL_H +#define QEMU_9P_UTIL_H + +static inline void close_preserve_errno(int fd) +{ + int serrno = errno; + close(fd); + errno = serrno; +} + +static inline int openat_dir(int dirfd, const char *name) +{ + return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH); +} + +static inline int openat_file(int dirfd, const char *name, int flags, + mode_t mode) +{ + int fd, serrno, ret; + + fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, + mode); + if (fd == -1) { + return -1; + } + + serrno = errno; + /* O_NONBLOCK was only needed to open the file. Let's drop it. */ + ret = fcntl(fd, F_SETFL, flags); + assert(!ret); + errno = serrno; + return fd; +} + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode); +ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size); +int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size, int flags); + +#endif diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c index f87530c8b5..2c90817b75 100644 --- a/hw/9pfs/9p-xattr-user.c +++ b/hw/9pfs/9p-xattr-user.c @@ -20,9 +20,6 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, errno = ENOATTR; return -1; } - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, name, value, size); } static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, @@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, name, value, size, flags); } static int mp_user_removexattr(FsContext *ctx, const char *path, const char *name) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, name); - g_free(buffer); - return ret; + return local_removexattr_nofollow(ctx, path, name); } XattrOperations mapped_user_xattr = { diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index 5d8595ed93..eec160b3c2 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -15,6 +15,8 @@ #include "9p.h" #include "fsdev/file-op-9p.h" #include "9p-xattr.h" +#include "9p-util.h" +#include "9p-local.h" static XattrOperations *get_xattr_operations(XattrOperations **h, @@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path, return name_size; } +static ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = llistxattr(proc_path, list, size); + g_free(proc_path); + return ret; +} /* * Get the list and pass to each layer to find out whether @@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, size_t vsize) { ssize_t size = 0; - char *buffer; void *ovalue = value; XattrOperations *xops; char *orig_value, *orig_value_start; ssize_t xattr_len, parsed_len = 0, attr_len; + char *dirpath, *name; + int dirfd; /* Get the actual len */ - buffer = rpath(ctx, path); - xattr_len = llistxattr(buffer, value, 0); + dirpath = g_path_get_dirname(path); + dirfd = local_opendir_nofollow(ctx, dirpath); + g_free(dirpath); + if (dirfd == -1) { + return -1; + } + + name = g_path_get_basename(path); + xattr_len = flistxattrat_nofollow(dirfd, name, value, 0); if (xattr_len <= 0) { - g_free(buffer); + g_free(name); + close_preserve_errno(dirfd); return xattr_len; } /* Now fetch the xattr and find the actual size */ orig_value = g_malloc(xattr_len); - xattr_len = llistxattr(buffer, orig_value, xattr_len); - g_free(buffer); + xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len); + g_free(name); + close_preserve_errno(dirfd); + if (xattr_len < 0) { + return -1; + } /* store the orig pointer */ orig_value_start = orig_value; @@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx, } +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fgetxattrat_nofollow(dirfd, filename, name, value, size); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + return local_getxattr_nofollow(ctx, path, name, value, size); +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lsetxattr(proc_path, name, value, size, flags); + g_free(proc_path); + return ret; +} + +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags) +{ + return local_setxattr_nofollow(ctx, path, name, value, size, flags); +} + +static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, + const char *name) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lremovexattr(proc_path, name); + g_free(proc_path); + return ret; +} + +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fremovexattrat_nofollow(dirfd, filename, name); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_removexattr(FsContext *ctx, const char *path, const char *name) +{ + return local_removexattr_nofollow(ctx, path, name); +} + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + errno = ENOTSUP; + return -1; +} + +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags) +{ + errno = ENOTSUP; + return -1; +} + +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size) +{ + return 0; +} + +int notsup_removexattr(FsContext *ctx, const char *path, const char *name) +{ + errno = ENOTSUP; + return -1; +} + XattrOperations *mapped_xattr_ops[] = { &mapped_user_xattr, &mapped_pacl_xattr, diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h index a853ea641c..0d83996575 100644 --- a/hw/9pfs/9p-xattr.h +++ b/hw/9pfs/9p-xattr.h @@ -29,6 +29,13 @@ typedef struct xattr_operations const char *path, const char *name); } XattrOperations; +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size); +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags); +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name); extern XattrOperations mapped_user_xattr; extern XattrOperations passthrough_user_xattr; @@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags); int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name); + ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value, size_t size); - -static inline ssize_t pt_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, size_t size) -{ - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; -} - -static inline int pt_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; -} - -static inline int pt_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lremovexattr(path, name); - g_free(buffer); - return ret; -} - -static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size) -{ - errno = ENOTSUP; - return -1; -} - -static inline int notsup_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - errno = ENOTSUP; - return -1; -} - -static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path, - char *name, void *value, size_t size) -{ - return 0; -} - -static inline int notsup_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - errno = ENOTSUP; - return -1; -} +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags); +int pt_removexattr(FsContext *ctx, const char *path, const char *name); + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags); +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size); +int notsup_removexattr(FsContext *ctx, const char *path, const char *name); #endif diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs index da0ae0cfdb..32197e6671 100644 --- a/hw/9pfs/Makefile.objs +++ b/hw/9pfs/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y = 9p.o +common-obj-y = 9p.o 9p-util.o common-obj-y += 9p-local.o 9p-xattr.o common-obj-y += 9p-xattr-user.o 9p-posix-acl.o common-obj-y += coth.o cofs.o codir.o cofile.o diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index 0c9ca7bfa0..c8a11f2b53 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "hw/arm/armv7m.h" #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" @@ -17,147 +18,260 @@ #include "elf.h" #include "sysemu/qtest.h" #include "qemu/error-report.h" +#include "exec/address-spaces.h" /* Bitbanded IO. Each word corresponds to a single bit. */ /* Get the byte address of the real memory for a bitband access. */ -static inline uint32_t bitband_addr(void * opaque, uint32_t addr) +static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset) { - uint32_t res; - - res = *(uint32_t *)opaque; - res |= (addr & 0x1ffffff) >> 5; - return res; - + return s->base | (offset & 0x1ffffff) >> 5; } -static uint32_t bitband_readb(void *opaque, hwaddr offset) +static MemTxResult bitband_read(void *opaque, hwaddr offset, + uint64_t *data, unsigned size, MemTxAttrs attrs) { - uint8_t v; - cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1); - return (v & (1 << ((offset >> 2) & 7))) != 0; + BitBandState *s = opaque; + uint8_t buf[4]; + MemTxResult res; + int bitpos, bit; + hwaddr addr; + + assert(size <= 4); + + /* Find address in underlying memory and round down to multiple of size */ + addr = bitband_addr(s, offset) & (-size); + res = address_space_read(s->source_as, addr, attrs, buf, size); + if (res) { + return res; + } + /* Bit position in the N bytes read... */ + bitpos = (offset >> 2) & ((size * 8) - 1); + /* ...converted to byte in buffer and bit in byte */ + bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1; + *data = bit; + return MEMTX_OK; } -static void bitband_writeb(void *opaque, hwaddr offset, - uint32_t value) +static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size, MemTxAttrs attrs) { - uint32_t addr; - uint8_t mask; - uint8_t v; - addr = bitband_addr(opaque, offset); - mask = (1 << ((offset >> 2) & 7)); - cpu_physical_memory_read(addr, &v, 1); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 1); + BitBandState *s = opaque; + uint8_t buf[4]; + MemTxResult res; + int bitpos, bit; + hwaddr addr; + + assert(size <= 4); + + /* Find address in underlying memory and round down to multiple of size */ + addr = bitband_addr(s, offset) & (-size); + res = address_space_read(s->source_as, addr, attrs, buf, size); + if (res) { + return res; + } + /* Bit position in the N bytes read... */ + bitpos = (offset >> 2) & ((size * 8) - 1); + /* ...converted to byte in buffer and bit in byte */ + bit = 1 << (bitpos & 7); + if (value & 1) { + buf[bitpos >> 3] |= bit; + } else { + buf[bitpos >> 3] &= ~bit; + } + return address_space_write(s->source_as, addr, attrs, buf, size); } -static uint32_t bitband_readw(void *opaque, hwaddr offset) +static const MemoryRegionOps bitband_ops = { + .read_with_attrs = bitband_read, + .write_with_attrs = bitband_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 1, + .impl.max_access_size = 4, + .valid.min_access_size = 1, + .valid.max_access_size = 4, +}; + +static void bitband_init(Object *obj) { - uint32_t addr; - uint16_t mask; - uint16_t v; - addr = bitband_addr(opaque, offset) & ~1; - mask = (1 << ((offset >> 2) & 15)); - mask = tswap16(mask); - cpu_physical_memory_read(addr, &v, 2); - return (v & mask) != 0; + BitBandState *s = BITBAND(obj); + SysBusDevice *dev = SYS_BUS_DEVICE(obj); + + object_property_add_link(obj, "source-memory", + TYPE_MEMORY_REGION, + (Object **)&s->source_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + memory_region_init_io(&s->iomem, obj, &bitband_ops, s, + "bitband", 0x02000000); + sysbus_init_mmio(dev, &s->iomem); } -static void bitband_writew(void *opaque, hwaddr offset, - uint32_t value) +static void bitband_realize(DeviceState *dev, Error **errp) { - uint32_t addr; - uint16_t mask; - uint16_t v; - addr = bitband_addr(opaque, offset) & ~1; - mask = (1 << ((offset >> 2) & 15)); - mask = tswap16(mask); - cpu_physical_memory_read(addr, &v, 2); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 2); + BitBandState *s = BITBAND(dev); + + if (!s->source_memory) { + error_setg(errp, "source-memory property not set"); + return; + } + + s->source_as = address_space_init_shareable(s->source_memory, + "bitband-source"); } -static uint32_t bitband_readl(void *opaque, hwaddr offset) +/* Board init. */ + +static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = { + 0x20000000, 0x40000000 +}; + +static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = { + 0x22000000, 0x42000000 +}; + +static void armv7m_instance_init(Object *obj) { - uint32_t addr; - uint32_t mask; - uint32_t v; - addr = bitband_addr(opaque, offset) & ~3; - mask = (1 << ((offset >> 2) & 31)); - mask = tswap32(mask); - cpu_physical_memory_read(addr, &v, 4); - return (v & mask) != 0; + ARMv7MState *s = ARMV7M(obj); + int i; + + /* Can't init the cpu here, we don't yet know which model to use */ + + object_property_add_link(obj, "memory", + TYPE_MEMORY_REGION, + (Object **)&s->board_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX); + + object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic"); + qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default()); + object_property_add_alias(obj, "num-irq", + OBJECT(&s->nvic), "num-irq", &error_abort); + + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { + object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND); + qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default()); + } } -static void bitband_writel(void *opaque, hwaddr offset, - uint32_t value) +static void armv7m_realize(DeviceState *dev, Error **errp) { - uint32_t addr; - uint32_t mask; - uint32_t v; - addr = bitband_addr(opaque, offset) & ~3; - mask = (1 << ((offset >> 2) & 31)); - mask = tswap32(mask); - cpu_physical_memory_read(addr, &v, 4); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 4); -} + ARMv7MState *s = ARMV7M(dev); + SysBusDevice *sbd; + Error *err = NULL; + int i; + char **cpustr; + ObjectClass *oc; + const char *typename; + CPUClass *cc; + + if (!s->board_memory) { + error_setg(errp, "memory property was not set"); + return; + } -static const MemoryRegionOps bitband_ops = { - .old_mmio = { - .read = { bitband_readb, bitband_readw, bitband_readl, }, - .write = { bitband_writeb, bitband_writew, bitband_writel, }, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; + memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1); -#define TYPE_BITBAND "ARM,bitband-memory" -#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND) + cpustr = g_strsplit(s->cpu_model, ",", 2); -typedef struct { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ + oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]); + if (!oc) { + error_setg(errp, "Unknown CPU model %s", cpustr[0]); + g_strfreev(cpustr); + return; + } - MemoryRegion iomem; - uint32_t base; -} BitBandState; + cc = CPU_CLASS(oc); + typename = object_class_get_name(oc); + cc->parse_features(typename, cpustr[1], &err); + g_strfreev(cpustr); + if (err) { + error_propagate(errp, err); + return; + } -static void bitband_init(Object *obj) -{ - BitBandState *s = BITBAND(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + s->cpu = ARM_CPU(object_new(typename)); + if (!s->cpu) { + error_setg(errp, "Unknown CPU model %s", s->cpu_model); + return; + } - memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base, - "bitband", 0x02000000); - sysbus_init_mmio(dev, &s->iomem); + object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory", + &error_abort); + object_property_set_bool(OBJECT(s->cpu), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + /* Note that we must realize the NVIC after the CPU */ + object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + /* Alias the NVIC's input and output GPIOs as our own so the board + * code can wire them up. (We do this in realize because the + * NVIC doesn't create the input GPIO array until realize.) + */ + qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL); + qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ"); + + /* Wire the NVIC up to the CPU */ + sbd = SYS_BUS_DEVICE(&s->nvic); + sysbus_connect_irq(sbd, 0, + qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ)); + s->cpu->env.nvic = &s->nvic; + + memory_region_add_subregion(&s->container, 0xe000e000, + sysbus_mmio_get_region(sbd, 0)); + + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { + Object *obj = OBJECT(&s->bitband[i]); + SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]); + + object_property_set_int(obj, bitband_input_addr[i], "base", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + object_property_set_link(obj, OBJECT(s->board_memory), + "source-memory", &error_abort); + object_property_set_bool(obj, true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->container, bitband_output_addr[i], + sysbus_mmio_get_region(sbd, 0)); + } } -static void armv7m_bitband_init(void) -{ - DeviceState *dev; +static Property armv7m_properties[] = { + DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model), + DEFINE_PROP_END_OF_LIST(), +}; - dev = qdev_create(NULL, TYPE_BITBAND); - qdev_prop_set_uint32(dev, "base", 0x20000000); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000); +static void armv7m_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); - dev = qdev_create(NULL, TYPE_BITBAND); - qdev_prop_set_uint32(dev, "base", 0x40000000); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000); + dc->realize = armv7m_realize; + dc->props = armv7m_properties; } -/* Board init. */ +static const TypeInfo armv7m_info = { + .name = TYPE_ARMV7M, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ARMv7MState), + .instance_init = armv7m_instance_init, + .class_init = armv7m_class_init, +}; static void armv7m_reset(void *opaque) { @@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque) /* Init CPU and memory for a v7-M based board. mem_size is in bytes. - Returns the NVIC array. */ + Returns the ARMv7M device. */ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model) { - ARMCPU *cpu; - CPUARMState *env; - DeviceState *nvic; - int image_size; - uint64_t entry; - uint64_t lowaddr; - int big_endian; + DeviceState *armv7m; if (cpu_model == NULL) { - cpu_model = "cortex-m3"; - } - cpu = cpu_arm_init(cpu_model); - if (cpu == NULL) { - fprintf(stderr, "Unable to find CPU definition\n"); - exit(1); + cpu_model = "cortex-m3"; } - env = &cpu->env; - armv7m_bitband_init(); + armv7m = qdev_create(NULL, "armv7m"); + qdev_prop_set_uint32(armv7m, "num-irq", num_irq); + qdev_prop_set_string(armv7m, "cpu-model", cpu_model); + object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()), + "memory", &error_abort); + /* This will exit with an error if the user passed us a bad cpu_model */ + qdev_init_nofail(armv7m); + + armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size); + return armv7m; +} - nvic = qdev_create(NULL, "armv7m_nvic"); - qdev_prop_set_uint32(nvic, "num-irq", num_irq); - env->nvic = nvic; - qdev_init_nofail(nvic); - sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0, - qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ)); +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size) +{ + int image_size; + uint64_t entry; + uint64_t lowaddr; + int big_endian; #ifdef TARGET_WORDS_BIGENDIAN big_endian = 1; @@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, } } + /* CPU objects (unlike devices) are not automatically reset on system + * reset, so we must always register a handler to do so. Unlike + * A-profile CPUs, we don't need to do anything special in the + * handler to arrange that it starts correctly. + * This is arguably the wrong place to do this, but it matches the + * way A-profile does it. Note that this means that every M profile + * board must call this function! + */ qemu_register_reset(armv7m_reset, cpu); - return nvic; } static Property bitband_properties[] = { @@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = bitband_realize; dc->props = bitband_properties; } @@ -251,6 +371,7 @@ static const TypeInfo bitband_info = { static void armv7m_register_types(void) { type_register_static(&bitband_info); + type_register_static(&armv7m_info); } type_init(armv7m_register_types) diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index 9ed22d5bc8..369ef1e3bd 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -96,6 +96,11 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL); qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default()); + /* SDHOST */ + object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST); + object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL); + qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default()); + /* DMA Channels */ object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA); object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL); @@ -103,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_const_link(OBJECT(&s->dma), "dma-mr", OBJECT(&s->gpu_bus_mr), &error_abort); + + /* GPIO */ + object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO); + object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL); + qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default()); + + object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci", + OBJECT(&s->sdhci.sdbus), &error_abort); + object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost", + OBJECT(&s->sdhost.sdbus), &error_abort); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -267,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0, qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_ARASANSDIO)); - object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus", - &err); + + /* SDHOST */ + object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err); if (err) { error_propagate(errp, err); return; } + memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_SDIO)); + /* DMA Channels */ object_property_set_bool(OBJECT(&s->dma), true, "realized", &err); if (err) { @@ -292,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) BCM2835_IC_GPU_IRQ, INTERRUPT_DMA0 + n)); } + + /* GPIO */ + object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0)); + + object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus", + &err); + if (err) { + error_propagate(errp, err); + return; + } } static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data) diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c index 23d792837f..3cfe332dd1 100644 --- a/hw/arm/netduino2.c +++ b/hw/arm/netduino2.c @@ -27,17 +27,18 @@ #include "hw/boards.h" #include "qemu/error-report.h" #include "hw/arm/stm32f205_soc.h" +#include "hw/arm/arm.h" static void netduino2_init(MachineState *machine) { DeviceState *dev; dev = qdev_create(NULL, TYPE_STM32F205_SOC); - if (machine->kernel_filename) { - qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename); - } qdev_prop_set_string(dev, "cpu-model", "cortex-m3"); object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal); + + armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, + FLASH_SIZE); } static void netduino2_machine_init(MachineClass *mc) diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c index 38425bda6c..6e1260d2ed 100644 --- a/hw/arm/stm32f205_soc.c +++ b/hw/arm/stm32f205_soc.c @@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj) STM32F205State *s = STM32F205_SOC(obj); int i; + object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M); + qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default()); + object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG); qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default()); @@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj) static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) { STM32F205State *s = STM32F205_SOC(dev_soc); - DeviceState *dev, *nvic; + DeviceState *dev, *armv7m; SysBusDevice *busdev; Error *err = NULL; int i; @@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) vmstate_register_ram_global(sram); memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram); - nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96, - s->kernel_filename, s->cpu_model); + armv7m = DEVICE(&s->armv7m); + qdev_prop_set_uint32(armv7m, "num-irq", 96); + qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model); + object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()), + "memory", &error_abort); + object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } /* System configuration controller */ dev = DEVICE(&s->syscfg); @@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, 0x40013800); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71)); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71)); /* Attach UART (uses USART registers) and USART controllers */ for (i = 0; i < STM_NUM_USARTS; i++) { @@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, usart_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i])); } /* Timer 2 to 5 */ @@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, timer_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i])); } /* ADC 1 to 3 */ @@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) return; } qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0, - qdev_get_gpio_in(nvic, ADC_IRQ)); + qdev_get_gpio_in(armv7m, ADC_IRQ)); for (i = 0; i < STM_NUM_ADCS; i++) { dev = DEVICE(&(s->adc[i])); @@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, spi_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i])); } } static Property stm32f205_soc_properties[] = { - DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename), DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/block.c b/hw/block/block.c index 8dc9d84a39..27878d0087 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf) } } -void blkconf_apply_backend_options(BlockConf *conf) +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp) { BlockBackend *blk = conf->blk; BlockdevOnError rerror, werror; + uint64_t perm, shared_perm; bool wce; + int ret; + + perm = BLK_PERM_CONSISTENT_READ; + if (!readonly) { + perm |= BLK_PERM_WRITE; + } + + shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD; + if (resizable) { + shared_perm |= BLK_PERM_RESIZE; + } + if (conf->share_rw) { + shared_perm |= BLK_PERM_WRITE; + } + + ret = blk_set_perm(blk, perm, shared_perm, errp); + if (ret < 0) { + return; + } switch (conf->wce) { case ON_OFF_AUTO_ON: wce = true; break; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 17d29e7bc5..a328693d15 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -186,6 +186,7 @@ typedef enum FDiskFlags { struct FDrive { FDCtrl *fdctrl; BlockBackend *blk; + BlockConf *conf; /* Drive status */ FloppyDriveType drive; /* CMOS drive type */ uint8_t perpendicular; /* 2.88 MB access mode */ @@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv) } } -static void fd_change_cb(void *opaque, bool load) +static void fd_change_cb(void *opaque, bool load, Error **errp) { FDrive *drive = opaque; + Error *local_err = NULL; + + if (!load) { + blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort); + } else { + blkconf_apply_backend_options(drive->conf, + blk_is_read_only(drive->blk), false, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } drive->media_changed = 1; drive->media_validated = false; @@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev) FloppyDrive *dev = FLOPPY_DRIVE(qdev); FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus); FDrive *drive; + Error *local_err = NULL; int ret; if (dev->unit == -1) { @@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); } @@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev) * blkconf_apply_backend_options(). */ dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO; dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO; - blkconf_apply_backend_options(&dev->conf); + + blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us * for empty drives. */ @@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev) return -1; } + drive->conf = &dev->conf; drive->blk = dev->conf.blk; drive->fdctrl = bus->fdc; diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 2d6eb46a04..190573cefa 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp) { Flash *s = M25P80(ss); M25P80Class *mc = M25P80_GET_CLASS(s); + int ret; s->pi = mc->pi; @@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp) s->dirty_page = -1; if (s->blk) { + uint64_t perm = BLK_PERM_CONSISTENT_READ | + (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + DB_PRINT_L(0, "Binding to IF_MTD drive\n"); s->storage = blk_blockalign(s->blk, s->size); diff --git a/hw/block/nand.c b/hw/block/nand.c index c69e6755d9..0d33ac281f 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp) { int pagesize; NANDFlashState *s = NAND(dev); + int ret; + s->buswidth = nand_flash_ids[s->chip_id].width >> 3; s->size = nand_flash_ids[s->chip_id].size << 20; @@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp) error_setg(errp, "Can't use a read-only drive"); return; } + ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } if (blk_getlength(s->blk) >= (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { pagesize = 0; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ae91a18f17..ae303d44e5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev) int i; int64_t bs_size; uint8_t *pci_conf; + Error *local_err = NULL; if (!n->conf.blk) { return -1; @@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev) return -1; } blkconf_blocksizes(&n->conf); - blkconf_apply_backend_options(&n->conf); + blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } pci_conf = pci_dev->config; pci_conf[PCI_INTERRUPT_PIN] = 1; diff --git a/hw/block/onenand.c b/hw/block/onenand.c index 8d8422739e..ddf5492426 100644 --- a/hw/block/onenand.c +++ b/hw/block/onenand.c @@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd) OneNANDState *s = ONE_NAND(dev); uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7)); void *ram; + Error *local_err = NULL; s->base = (hwaddr)-1; s->rdy = NULL; @@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd) error_report("Can't use a read-only drive"); return -1; } + blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } s->blk_cur = s->blk; } s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT), diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 71b98a3eef..594d4cf6fe 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, total_len); @@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) } } - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - /* Default to devices being used at their maximum device width. This was * assumed before the device_width support was added. */ diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index ef71322759..e6c5c6c25d 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); pfl->chip_len = chip_len; + + if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len); @@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) pfl->rom_mode = 1; sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); pfl->wcycle = 0; pfl->cmd = 0; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 843bd2fa73..98c16a7a9a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } blkconf_serial(&conf->conf, &conf->serial); - blkconf_apply_backend_options(&conf->conf); + blkconf_apply_backend_options(&conf->conf, + blk_is_read_only(conf->conf.blk), true, + &err); + if (err) { + error_propagate(errp, err); + return; + } s->original_wce = blk_enable_write_cache(conf->conf.blk); blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); if (err) { diff --git a/hw/core/loader.c b/hw/core/loader.c index 8b980e91fb..bf17b42cbe 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -435,6 +435,19 @@ int load_elf_as(const char *filename, uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb, int data_swab, AddressSpace *as) { + return load_elf_ram(filename, translate_fn, translate_opaque, + pentry, lowaddr, highaddr, big_endian, elf_machine, + clear_lsb, data_swab, as, true); +} + +/* return < 0 if error, otherwise the number of bytes loaded in memory */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom) +{ int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED; uint8_t e_ident[EI_NIDENT]; @@ -473,11 +486,11 @@ int load_elf_as(const char *filename, if (e_ident[EI_CLASS] == ELFCLASS64) { ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } else { ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } fail: diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 94f4d8bde4..c34be1c1ba 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, { BlockBackend *blk; bool blk_created = false; + int ret; blk = blk_by_name(str); if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(0, BLK_PERM_ALL); blk_created = true; + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } } } if (!blk) { diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 06ba02e2a3..923e626333 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -102,9 +102,23 @@ static void bus_add_child(BusState *bus, DeviceState *child) void qdev_set_parent_bus(DeviceState *dev, BusState *bus) { + bool replugging = dev->parent_bus != NULL; + + if (replugging) { + /* Keep a reference to the device while it's not plugged into + * any bus, to avoid it potentially evaporating when it is + * dereffed in bus_remove_child(). + */ + object_ref(OBJECT(dev)); + bus_remove_child(dev->parent_bus, dev); + object_unref(OBJECT(dev->parent_bus)); + } dev->parent_bus = bus; object_ref(OBJECT(bus)); bus_add_child(bus, dev); + if (replugging) { + object_unref(OBJECT(dev)); + } } /* Create a new device. This only initializes the device state diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs index a43c7cf442..fa0a72e6d0 100644 --- a/hw/gpio/Makefile.objs +++ b/hw/gpio/Makefile.objs @@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o obj-$(CONFIG_OMAP) += omap_gpio.o obj-$(CONFIG_IMX) += imx_gpio.o +obj-$(CONFIG_RASPI) += bcm2835_gpio.o diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c new file mode 100644 index 0000000000..acc2e3cf9e --- /dev/null +++ b/hw/gpio/bcm2835_gpio.c @@ -0,0 +1,353 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/sd/sd.h" +#include "hw/gpio/bcm2835_gpio.h" + +#define GPFSEL0 0x00 +#define GPFSEL1 0x04 +#define GPFSEL2 0x08 +#define GPFSEL3 0x0C +#define GPFSEL4 0x10 +#define GPFSEL5 0x14 +#define GPSET0 0x1C +#define GPSET1 0x20 +#define GPCLR0 0x28 +#define GPCLR1 0x2C +#define GPLEV0 0x34 +#define GPLEV1 0x38 +#define GPEDS0 0x40 +#define GPEDS1 0x44 +#define GPREN0 0x4C +#define GPREN1 0x50 +#define GPFEN0 0x58 +#define GPFEN1 0x5C +#define GPHEN0 0x64 +#define GPHEN1 0x68 +#define GPLEN0 0x70 +#define GPLEN1 0x74 +#define GPAREN0 0x7C +#define GPAREN1 0x80 +#define GPAFEN0 0x88 +#define GPAFEN1 0x8C +#define GPPUD 0x94 +#define GPPUDCLK0 0x98 +#define GPPUDCLK1 0x9C + +static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg) +{ + int i; + uint32_t value = 0; + for (i = 0; i < 10; i++) { + uint32_t index = 10 * reg + i; + if (index < sizeof(s->fsel)) { + value |= (s->fsel[index] & 0x7) << (3 * i); + } + } + return value; +} + +static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value) +{ + int i; + for (i = 0; i < 10; i++) { + uint32_t index = 10 * reg + i; + if (index < sizeof(s->fsel)) { + int fsel = (value >> (3 * i)) & 0x7; + s->fsel[index] = fsel; + } + } + + /* SD controller selection (48-53) */ + if (s->sd_fsel != 0 + && (s->fsel[48] == 0) /* SD_CLK_R */ + && (s->fsel[49] == 0) /* SD_CMD_R */ + && (s->fsel[50] == 0) /* SD_DATA0_R */ + && (s->fsel[51] == 0) /* SD_DATA1_R */ + && (s->fsel[52] == 0) /* SD_DATA2_R */ + && (s->fsel[53] == 0) /* SD_DATA3_R */ + ) { + /* SDHCI controller selected */ + sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci); + s->sd_fsel = 0; + } else if (s->sd_fsel != 4 + && (s->fsel[48] == 4) /* SD_CLK_R */ + && (s->fsel[49] == 4) /* SD_CMD_R */ + && (s->fsel[50] == 4) /* SD_DATA0_R */ + && (s->fsel[51] == 4) /* SD_DATA1_R */ + && (s->fsel[52] == 4) /* SD_DATA2_R */ + && (s->fsel[53] == 4) /* SD_DATA3_R */ + ) { + /* SDHost controller selected */ + sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost); + s->sd_fsel = 4; + } +} + +static int gpfsel_is_out(BCM2835GpioState *s, int index) +{ + if (index >= 0 && index < 54) { + return s->fsel[index] == 1; + } + return 0; +} + +static void gpset(BCM2835GpioState *s, + uint32_t val, uint8_t start, uint8_t count, uint32_t *lev) +{ + uint32_t changes = val & ~*lev; + uint32_t cur = 1; + + int i; + for (i = 0; i < count; i++) { + if ((changes & cur) && (gpfsel_is_out(s, start + i))) { + qemu_set_irq(s->out[start + i], 1); + } + cur <<= 1; + } + + *lev |= val; +} + +static void gpclr(BCM2835GpioState *s, + uint32_t val, uint8_t start, uint8_t count, uint32_t *lev) +{ + uint32_t changes = val & *lev; + uint32_t cur = 1; + + int i; + for (i = 0; i < count; i++) { + if ((changes & cur) && (gpfsel_is_out(s, start + i))) { + qemu_set_irq(s->out[start + i], 0); + } + cur <<= 1; + } + + *lev &= ~val; +} + +static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835GpioState *s = (BCM2835GpioState *)opaque; + + switch (offset) { + case GPFSEL0: + case GPFSEL1: + case GPFSEL2: + case GPFSEL3: + case GPFSEL4: + case GPFSEL5: + return gpfsel_get(s, offset / 4); + case GPSET0: + case GPSET1: + /* Write Only */ + return 0; + case GPCLR0: + case GPCLR1: + /* Write Only */ + return 0; + case GPLEV0: + return s->lev0; + case GPLEV1: + return s->lev1; + case GPEDS0: + case GPEDS1: + case GPREN0: + case GPREN1: + case GPFEN0: + case GPFEN1: + case GPHEN0: + case GPHEN1: + case GPLEN0: + case GPLEN1: + case GPAREN0: + case GPAREN1: + case GPAFEN0: + case GPAFEN1: + case GPPUD: + case GPPUDCLK0: + case GPPUDCLK1: + /* Not implemented */ + return 0; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); + break; + } + + return 0; +} + +static void bcm2835_gpio_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835GpioState *s = (BCM2835GpioState *)opaque; + + switch (offset) { + case GPFSEL0: + case GPFSEL1: + case GPFSEL2: + case GPFSEL3: + case GPFSEL4: + case GPFSEL5: + gpfsel_set(s, offset / 4, value); + break; + case GPSET0: + gpset(s, value, 0, 32, &s->lev0); + break; + case GPSET1: + gpset(s, value, 32, 22, &s->lev1); + break; + case GPCLR0: + gpclr(s, value, 0, 32, &s->lev0); + break; + case GPCLR1: + gpclr(s, value, 32, 22, &s->lev1); + break; + case GPLEV0: + case GPLEV1: + /* Read Only */ + break; + case GPEDS0: + case GPEDS1: + case GPREN0: + case GPREN1: + case GPFEN0: + case GPFEN1: + case GPHEN0: + case GPHEN1: + case GPLEN0: + case GPLEN1: + case GPAREN0: + case GPAREN1: + case GPAFEN0: + case GPAFEN1: + case GPPUD: + case GPPUDCLK0: + case GPPUDCLK1: + /* Not implemented */ + break; + default: + goto err_out; + } + return; + +err_out: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); +} + +static void bcm2835_gpio_reset(DeviceState *dev) +{ + BCM2835GpioState *s = BCM2835_GPIO(dev); + + int i; + for (i = 0; i < 6; i++) { + gpfsel_set(s, i, 0); + } + + s->sd_fsel = 0; + + /* SDHCI is selected by default */ + sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci); + + s->lev0 = 0; + s->lev1 = 0; +} + +static const MemoryRegionOps bcm2835_gpio_ops = { + .read = bcm2835_gpio_read, + .write = bcm2835_gpio_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_bcm2835_gpio = { + .name = "bcm2835_gpio", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54), + VMSTATE_UINT32(lev0, BCM2835GpioState), + VMSTATE_UINT32(lev1, BCM2835GpioState), + VMSTATE_UINT8(sd_fsel, BCM2835GpioState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_gpio_init(Object *obj) +{ + BCM2835GpioState *s = BCM2835_GPIO(obj); + DeviceState *dev = DEVICE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), + TYPE_SD_BUS, DEVICE(s), "sd-bus"); + + memory_region_init_io(&s->iomem, obj, + &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + qdev_init_gpio_out(dev, s->out, 54); +} + +static void bcm2835_gpio_realize(DeviceState *dev, Error **errp) +{ + BCM2835GpioState *s = BCM2835_GPIO(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err); + if (obj == NULL) { + error_setg(errp, "%s: required sdhci link not found: %s", + __func__, error_get_pretty(err)); + return; + } + s->sdbus_sdhci = SD_BUS(obj); + + obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err); + if (obj == NULL) { + error_setg(errp, "%s: required sdhost link not found: %s", + __func__, error_get_pretty(err)); + return; + } + s->sdbus_sdhost = SD_BUS(obj); +} + +static void bcm2835_gpio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_bcm2835_gpio; + dc->realize = &bcm2835_gpio_realize; + dc->reset = &bcm2835_gpio_reset; +} + +static const TypeInfo bcm2835_gpio_info = { + .name = TYPE_BCM2835_GPIO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835GpioState), + .instance_init = bcm2835_gpio_init, + .class_init = bcm2835_gpio_class_init, +}; + +static void bcm2835_gpio_register_types(void) +{ + type_register_static(&bcm2835_gpio_info); +} + +type_init(bcm2835_gpio_register_types) diff --git a/hw/ide/core.c b/hw/ide/core.c index cfa5de6ebf..db509b3e15 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s) } /* called when the inserted state of the media has changed */ -static void ide_cd_change_cb(void *opaque, bool load) +static void ide_cd_change_cb(void *opaque, bool load, Error **errp) { IDEState *s = opaque; uint64_t nb_sectors; diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index dbaa75cf59..4383cd111d 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } else { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } } @@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD, + &err); + if (err) { + error_report_err(err); + return -1; + } if (ide_init_drive(s, dev->conf.blk, kind, dev->version, dev->serial, dev->model, dev->wwn, diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index 8948106ac4..adedd0da5f 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o -obj-$(CONFIG_STELLARIS) += armv7m_nvic.o +obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o obj-$(CONFIG_GRLIB) += grlib_irqmp.o obj-$(CONFIG_IOAPIC) += ioapic.o diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 16b9b0f7eb..c6493d6c07 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = { } }; +static int icc_sre_el1_reg_pre_load(void *opaque) +{ + GICv3CPUState *cs = opaque; + + /* + * If the sre_el1 subsection is not transferred this + * means SRE_EL1 is 0x7 (which might not be the same as + * our reset value). + */ + cs->icc_sre_el1 = 0x7; + return 0; +} + +static bool icc_sre_el1_reg_needed(void *opaque) +{ + GICv3CPUState *cs = opaque; + + return cs->icc_sre_el1 != 7; +} + +const VMStateDescription vmstate_gicv3_cpu_sre_el1 = { + .name = "arm_gicv3_cpu/sre_el1", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = icc_sre_el1_reg_pre_load, + .needed = icc_sre_el1_reg_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(icc_sre_el1, GICv3CPUState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_gicv3_cpu = { .name = "arm_gicv3_cpu", .version_id = 1, @@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = { .subsections = (const VMStateDescription * []) { &vmstate_gicv3_cpu_virt, NULL + }, + .subsections = (const VMStateDescription * []) { + &vmstate_gicv3_cpu_sre_el1, + NULL } }; @@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) s->cpu[i].cpu = cpu; s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); /* Pre-construct the GICR_TYPER: * For our implementation: diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index f775aba507..0b208560bd 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -19,6 +19,14 @@ #include "gicv3_internal.h" #include "cpu.h" +void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + + env->gicv3state = (void *)s; +}; + static GICv3CPUState *icc_cs_from_env(CPUARMState *env) { /* Given the CPU, find the right GICv3CPUState struct. diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index d69dc47370..81f0403117 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -23,8 +23,10 @@ #include "qapi/error.h" #include "hw/intc/arm_gicv3_common.h" #include "hw/sysbus.h" +#include "qemu/error-report.h" #include "sysemu/kvm.h" #include "kvm_arm.h" +#include "gicv3_internal.h" #include "vgic_common.h" #include "migration/migration.h" @@ -44,6 +46,32 @@ #define KVM_ARM_GICV3_GET_CLASS(obj) \ OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3) +#define KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \ + (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ICC_PMR_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0) +#define ICC_BPR0_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3) +#define ICC_AP0R_EL1(n) \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n) +#define ICC_AP1R_EL1(n) \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n) +#define ICC_BPR1_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3) +#define ICC_CTLR_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5) +#define ICC_IGRPEN0_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6) +#define ICC_IGRPEN1_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7) + typedef struct KVMARMGICv3Class { ARMGICv3CommonClass parent_class; DeviceRealize parent_realize; @@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level) kvm_arm_gic_set_irq(s->num_irq, irq, level); } +#define KVM_VGIC_ATTR(reg, typer) \ + ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg)) + +static inline void kvm_gicd_access(GICv3State *s, int offset, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + KVM_VGIC_ATTR(offset, 0), + val, write); +} + +static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer), + val, write); +} + +static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu, + uint64_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer), + val, write); +} + +static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) | + (VGIC_LEVEL_INFO_LINE_LEVEL << + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT), + val, write); +} + +/* Loop through each distributor IRQ related register; since bits + * corresponding to SPIs and PPIs are RAZ/WI when affinity routing + * is enabled, we skip those. + */ +#define for_each_dist_irq_reg(_irq, _max, _field_width) \ + for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width)) + +static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp) +{ + uint32_t reg, *field; + int irq; + + field = (uint32_t *)bmp; + for_each_dist_irq_reg(irq, s->num_irq, 8) { + kvm_gicd_access(s, offset, ®, false); + *field = reg; + offset += 4; + field++; + } +} + +static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp) +{ + uint32_t reg, *field; + int irq; + + field = (uint32_t *)bmp; + for_each_dist_irq_reg(irq, s->num_irq, 8) { + reg = *field; + kvm_gicd_access(s, offset, ®, true); + offset += 4; + field++; + } +} + +static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset, + uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 2) { + kvm_gicd_access(s, offset, ®, false); + reg = half_unshuffle32(reg >> 1); + if (irq % 32 != 0) { + reg = (reg << 16); + } + *gic_bmp_ptr32(bmp, irq) |= reg; + offset += 4; + } +} + +static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset, + uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 2) { + reg = *gic_bmp_ptr32(bmp, irq); + if (irq % 32 != 0) { + reg = (reg & 0xffff0000) >> 16; + } else { + reg = reg & 0xffff; + } + reg = half_shuffle32(reg) << 1; + kvm_gicd_access(s, offset, ®, true); + offset += 4; + } +} + +static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + kvm_gic_line_level_access(s, irq, 0, ®, false); + *gic_bmp_ptr32(bmp, irq) = reg; + } +} + +static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + reg = *gic_bmp_ptr32(bmp, irq); + kvm_gic_line_level_access(s, irq, 0, ®, true); + } +} + +/* Read a bitmap register group from the kernel VGIC. */ +static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + kvm_gicd_access(s, offset, ®, false); + *gic_bmp_ptr32(bmp, irq) = reg; + offset += 4; + } +} + +static void kvm_dist_putbmp(GICv3State *s, uint32_t offset, + uint32_t clroffset, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + /* If this bitmap is a set/clear register pair, first write to the + * clear-reg to clear all bits before using the set-reg to write + * the 1 bits. + */ + if (clroffset != 0) { + reg = 0; + kvm_gicd_access(s, clroffset, ®, true); + } + reg = *gic_bmp_ptr32(bmp, irq); + kvm_gicd_access(s, offset, ®, true); + offset += 4; + } +} + +static void kvm_arm_gicv3_check(GICv3State *s) +{ + uint32_t reg; + uint32_t num_irq; + + /* Sanity checking s->num_irq */ + kvm_gicd_access(s, GICD_TYPER, ®, false); + num_irq = ((reg & 0x1f) + 1) * 32; + + if (num_irq < s->num_irq) { + error_report("Model requests %u IRQs, but kernel supports max %u", + s->num_irq, num_irq); + abort(); + } +} + static void kvm_arm_gicv3_put(GICv3State *s) { - /* TODO */ - DPRINTF("Cannot put kernel gic state, no kernel interface\n"); + uint32_t regl, regh, reg; + uint64_t reg64, redist_typer; + int ncpu, i; + + kvm_arm_gicv3_check(s); + + kvm_gicr_access(s, GICR_TYPER, 0, ®l, false); + kvm_gicr_access(s, GICR_TYPER + 4, 0, ®h, false); + redist_typer = ((uint64_t)regh << 32) | regl; + + reg = s->gicd_ctlr; + kvm_gicd_access(s, GICD_CTLR, ®, true); + + if (redist_typer & GICR_TYPER_PLPIS) { + /* Set base addresses before LPIs are enabled by GICR_CTLR write */ + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + reg64 = c->gicr_propbaser; + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, true); + regh = (uint32_t)(reg64 >> 32); + kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, ®h, true); + + reg64 = c->gicr_pendbaser; + if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) { + /* Setting PTZ is advised if LPIs are disabled, to reduce + * GIC initialization time. + */ + reg64 |= GICR_PENDBASER_PTZ; + } + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PENDBASER, ncpu, ®l, true); + regh = (uint32_t)(reg64 >> 32); + kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, ®h, true); + } + } + + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + reg = c->gicr_ctlr; + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, true); + + reg = c->gicr_statusr[GICV3_NS]; + kvm_gicr_access(s, GICR_STATUSR, ncpu, ®, true); + + reg = c->gicr_waker; + kvm_gicr_access(s, GICR_WAKER, ncpu, ®, true); + + reg = c->gicr_igroupr0; + kvm_gicr_access(s, GICR_IGROUPR0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICENABLER0, ncpu, ®, true); + reg = c->gicr_ienabler0; + kvm_gicr_access(s, GICR_ISENABLER0, ncpu, ®, true); + + /* Restore config before pending so we treat level/edge correctly */ + reg = half_shuffle32(c->edge_trigger >> 16) << 1; + kvm_gicr_access(s, GICR_ICFGR1, ncpu, ®, true); + + reg = c->level; + kvm_gic_line_level_access(s, 0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICPENDR0, ncpu, ®, true); + reg = c->gicr_ipendr0; + kvm_gicr_access(s, GICR_ISPENDR0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, ®, true); + reg = c->gicr_iactiver0; + kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, ®, true); + + for (i = 0; i < GIC_INTERNAL; i += 4) { + reg = c->gicr_ipriorityr[i] | + (c->gicr_ipriorityr[i + 1] << 8) | + (c->gicr_ipriorityr[i + 2] << 16) | + (c->gicr_ipriorityr[i + 3] << 24); + kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, ®, true); + } + } + + /* Distributor state (shared between all CPUs */ + reg = s->gicd_statusr[GICV3_NS]; + kvm_gicd_access(s, GICD_STATUSR, ®, true); + + /* s->enable bitmap -> GICD_ISENABLERn */ + kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled); + + /* s->group bitmap -> GICD_IGROUPRn */ + kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group); + + /* Restore targets before pending to ensure the pending state is set on + * the appropriate CPU interfaces in the kernel + */ + + /* s->gicd_irouter[irq] -> GICD_IROUTERn + * We can't use kvm_dist_put() here because the registers are 64-bit + */ + for (i = GIC_INTERNAL; i < s->num_irq; i++) { + uint32_t offset; + + offset = GICD_IROUTER + (sizeof(uint32_t) * i); + reg = (uint32_t)s->gicd_irouter[i]; + kvm_gicd_access(s, offset, ®, true); + + offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4; + reg = (uint32_t)(s->gicd_irouter[i] >> 32); + kvm_gicd_access(s, offset, ®, true); + } + + /* s->trigger bitmap -> GICD_ICFGRn + * (restore configuration registers before pending IRQs so we treat + * level/edge correctly) + */ + kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger); + + /* s->level bitmap -> line_level */ + kvm_gic_put_line_level_bmp(s, s->level); + + /* s->pending bitmap -> GICD_ISPENDRn */ + kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending); + + /* s->active bitmap -> GICD_ISACTIVERn */ + kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active); + + /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */ + kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority); + + /* CPU Interface state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); + kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu, + &c->icc_igrpen[GICV3_G0], true); + kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu, + &c->icc_igrpen[GICV3_G1NS], true); + kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true); + kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true); + kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true); + + num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] & + ICC_CTLR_EL1_PRIBITS_MASK) >> + ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; + + switch (num_pri_bits) { + case 7: + reg64 = c->icc_apr[GICV3_G0][3]; + kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, ®64, true); + reg64 = c->icc_apr[GICV3_G0][2]; + kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, ®64, true); + case 6: + reg64 = c->icc_apr[GICV3_G0][1]; + kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, ®64, true); + default: + reg64 = c->icc_apr[GICV3_G0][0]; + kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, ®64, true); + } + + switch (num_pri_bits) { + case 7: + reg64 = c->icc_apr[GICV3_G1NS][3]; + kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, ®64, true); + reg64 = c->icc_apr[GICV3_G1NS][2]; + kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, ®64, true); + case 6: + reg64 = c->icc_apr[GICV3_G1NS][1]; + kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, ®64, true); + default: + reg64 = c->icc_apr[GICV3_G1NS][0]; + kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, ®64, true); + } + } } static void kvm_arm_gicv3_get(GICv3State *s) { - /* TODO */ - DPRINTF("Cannot get kernel gic state, no kernel interface\n"); + uint32_t regl, regh, reg; + uint64_t reg64, redist_typer; + int ncpu, i; + + kvm_arm_gicv3_check(s); + + kvm_gicr_access(s, GICR_TYPER, 0, ®l, false); + kvm_gicr_access(s, GICR_TYPER + 4, 0, ®h, false); + redist_typer = ((uint64_t)regh << 32) | regl; + + kvm_gicd_access(s, GICD_CTLR, ®, false); + s->gicd_ctlr = reg; + + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, false); + c->gicr_ctlr = reg; + + kvm_gicr_access(s, GICR_STATUSR, ncpu, ®, false); + c->gicr_statusr[GICV3_NS] = reg; + + kvm_gicr_access(s, GICR_WAKER, ncpu, ®, false); + c->gicr_waker = reg; + + kvm_gicr_access(s, GICR_IGROUPR0, ncpu, ®, false); + c->gicr_igroupr0 = reg; + kvm_gicr_access(s, GICR_ISENABLER0, ncpu, ®, false); + c->gicr_ienabler0 = reg; + kvm_gicr_access(s, GICR_ICFGR1, ncpu, ®, false); + c->edge_trigger = half_unshuffle32(reg >> 1) << 16; + kvm_gic_line_level_access(s, 0, ncpu, ®, false); + c->level = reg; + kvm_gicr_access(s, GICR_ISPENDR0, ncpu, ®, false); + c->gicr_ipendr0 = reg; + kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, ®, false); + c->gicr_iactiver0 = reg; + + for (i = 0; i < GIC_INTERNAL; i += 4) { + kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, ®, false); + c->gicr_ipriorityr[i] = extract32(reg, 0, 8); + c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8); + c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8); + c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8); + } + } + + if (redist_typer & GICR_TYPER_PLPIS) { + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, false); + kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, ®h, false); + c->gicr_propbaser = ((uint64_t)regh << 32) | regl; + + kvm_gicr_access(s, GICR_PENDBASER, ncpu, ®l, false); + kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, ®h, false); + c->gicr_pendbaser = ((uint64_t)regh << 32) | regl; + } + } + + /* Distributor state (shared between all CPUs */ + + kvm_gicd_access(s, GICD_STATUSR, ®, false); + s->gicd_statusr[GICV3_NS] = reg; + + /* GICD_IGROUPRn -> s->group bitmap */ + kvm_dist_getbmp(s, GICD_IGROUPR, s->group); + + /* GICD_ISENABLERn -> s->enabled bitmap */ + kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled); + + /* Line level of irq */ + kvm_gic_get_line_level_bmp(s, s->level); + /* GICD_ISPENDRn -> s->pending bitmap */ + kvm_dist_getbmp(s, GICD_ISPENDR, s->pending); + + /* GICD_ISACTIVERn -> s->active bitmap */ + kvm_dist_getbmp(s, GICD_ISACTIVER, s->active); + + /* GICD_ICFGRn -> s->trigger bitmap */ + kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger); + + /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */ + kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority); + + /* GICD_IROUTERn -> s->gicd_irouter[irq] */ + for (i = GIC_INTERNAL; i < s->num_irq; i++) { + uint32_t offset; + + offset = GICD_IROUTER + (sizeof(uint32_t) * i); + kvm_gicd_access(s, offset, ®l, false); + offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4; + kvm_gicd_access(s, offset, ®h, false); + s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl; + } + + /***************************************************************** + * CPU Interface(s) State + */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); + kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu, + &c->icc_igrpen[GICV3_G0], false); + kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu, + &c->icc_igrpen[GICV3_G1NS], false); + kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false); + kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false); + kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false); + num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] & + ICC_CTLR_EL1_PRIBITS_MASK) >> + ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; + + switch (num_pri_bits) { + case 7: + kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, ®64, false); + c->icc_apr[GICV3_G0][3] = reg64; + kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, ®64, false); + c->icc_apr[GICV3_G0][2] = reg64; + case 6: + kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, ®64, false); + c->icc_apr[GICV3_G0][1] = reg64; + default: + kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, ®64, false); + c->icc_apr[GICV3_G0][0] = reg64; + } + + switch (num_pri_bits) { + case 7: + kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][3] = reg64; + kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][2] = reg64; + case 6: + kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][1] = reg64; + default: + kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][0] = reg64; + } + } +} + +static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu; + GICv3State *s; + GICv3CPUState *c; + + c = (GICv3CPUState *)env->gicv3state; + s = c->gic; + cpu = ARM_CPU(c->cpu); + + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), + &c->icc_ctlr_el1[GICV3_NS], false); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + c->icc_pmr_el1 = 0; + c->icc_bpr[GICV3_G0] = GIC_MIN_BPR; + c->icc_bpr[GICV3_G1] = GIC_MIN_BPR; + c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR; + + c->icc_sre_el1 = 0x7; + memset(c->icc_apr, 0, sizeof(c->icc_apr)); + memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen)); } static void kvm_arm_gicv3_reset(DeviceState *dev) @@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev) DPRINTF("Reset\n"); kgc->parent_reset(dev); + + if (s->migration_blocker) { + DPRINTF("Cannot put kernel gic state, no kernel interface\n"); + return; + } + kvm_arm_gicv3_put(s); } +/* + * CPU interface registers of GIC needs to be reset on CPU reset. + * For the calling arm_gicv3_icc_reset() on CPU reset, we register + * below ARMCPRegInfo. As we reset the whole cpu interface under single + * register reset, we define only one register of CPU interface instead + * of defining all the registers. + */ +static const ARMCPRegInfo gicv3_cpuif_reginfo[] = { + { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4, + /* + * If ARM_CP_NOP is used, resetfn is not called, + * So ARM_CP_NO_RAW is appropriate type. + */ + .type = ARM_CP_NO_RAW, + .access = PL1_RW, + .readfn = arm_cp_read_zero, + .writefn = arm_cp_write_ignore, + /* + * We hang the whole cpu interface reset routine off here + * rather than parcelling it out into one little function + * per register + */ + .resetfn = arm_gicv3_icc_reset, + }, + REGINFO_SENTINEL +}; + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) { GICv3State *s = KVM_ARM_GICV3(dev); @@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); - /* Block migration of a KVM GICv3 device: the API for saving and restoring - * the state in the kernel is not yet finalised in the kernel or - * implemented in QEMU. - */ - error_setg(&s->migration_blocker, "vGICv3 migration is not implemented"); - migrate_add_blocker(s->migration_blocker, &local_err); - if (local_err) { - error_propagate(errp, local_err); - error_free(s->migration_blocker); - return; + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); + + define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); } /* Try to create the device via the device control API */ @@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) kvm_irqchip_commit_routes(kvm_state); } + + if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_CTLR)) { + error_setg(&s->migration_blocker, "This operating system kernel does " + "not support vGICv3 migration"); + migrate_add_blocker(s->migration_blocker, &local_err); + if (local_err) { + error_propagate(errp, local_err); + error_free(s->migration_blocker); + return; + } + } } static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 76097b4830..32ffa0bf35 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -17,8 +17,8 @@ #include "hw/sysbus.h" #include "qemu/timer.h" #include "hw/arm/arm.h" +#include "hw/arm/armv7m_nvic.h" #include "target/arm/cpu.h" -#include "exec/address-spaces.h" #include "qemu/log.h" #include "trace.h" @@ -47,7 +47,6 @@ * "exception" more or less interchangeably. */ #define NVIC_FIRST_IRQ 16 -#define NVIC_MAX_VECTORS 512 #define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ) /* Effective running priority of the CPU when no exception is active @@ -55,116 +54,10 @@ */ #define NVIC_NOEXC_PRIO 0x100 -typedef struct VecInfo { - /* Exception priorities can range from -3 to 255; only the unmodifiable - * priority values for RESET, NMI and HardFault can be negative. - */ - int16_t prio; - uint8_t enabled; - uint8_t pending; - uint8_t active; - uint8_t level; /* exceptions <=15 never set level */ -} VecInfo; - -typedef struct NVICState { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ - - ARMCPU *cpu; - - VecInfo vectors[NVIC_MAX_VECTORS]; - uint32_t prigroup; - - /* vectpending and exception_prio are both cached state that can - * be recalculated from the vectors[] array and the prigroup field. - */ - unsigned int vectpending; /* highest prio pending enabled exception */ - int exception_prio; /* group prio of the highest prio active exception */ - - struct { - uint32_t control; - uint32_t reload; - int64_t tick; - QEMUTimer *timer; - } systick; - - MemoryRegion sysregmem; - MemoryRegion container; - - uint32_t num_irq; - qemu_irq excpout; - qemu_irq sysresetreq; -} NVICState; - -#define TYPE_NVIC "armv7m_nvic" - -#define NVIC(obj) \ - OBJECT_CHECK(NVICState, (obj), TYPE_NVIC) - static const uint8_t nvic_id[] = { 0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1 }; -/* qemu timers run at 1GHz. We want something closer to 1MHz. */ -#define SYSTICK_SCALE 1000ULL - -#define SYSTICK_ENABLE (1 << 0) -#define SYSTICK_TICKINT (1 << 1) -#define SYSTICK_CLKSOURCE (1 << 2) -#define SYSTICK_COUNTFLAG (1 << 16) - -int system_clock_scale; - -/* Conversion factor from qemu timer to SysTick frequencies. */ -static inline int64_t systick_scale(NVICState *s) -{ - if (s->systick.control & SYSTICK_CLKSOURCE) - return system_clock_scale; - else - return 1000; -} - -static void systick_reload(NVICState *s, int reset) -{ - /* The Cortex-M3 Devices Generic User Guide says that "When the - * ENABLE bit is set to 1, the counter loads the RELOAD value from the - * SYST RVR register and then counts down". So, we need to check the - * ENABLE bit before reloading the value. - */ - if ((s->systick.control & SYSTICK_ENABLE) == 0) { - return; - } - - if (reset) - s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - s->systick.tick += (s->systick.reload + 1) * systick_scale(s); - timer_mod(s->systick.timer, s->systick.tick); -} - -static void systick_timer_tick(void * opaque) -{ - NVICState *s = (NVICState *)opaque; - s->systick.control |= SYSTICK_COUNTFLAG; - if (s->systick.control & SYSTICK_TICKINT) { - /* Trigger the interrupt. */ - armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); - } - if (s->systick.reload == 0) { - s->systick.control &= ~SYSTICK_ENABLE; - } else { - systick_reload(s, 0); - } -} - -static void systick_reset(NVICState *s) -{ - s->systick.control = 0; - s->systick.reload = 0; - s->systick.tick = 0; - timer_del(s->systick.timer); -} - static int nvic_pending_prio(NVICState *s) { /* return the priority of the current pending interrupt, @@ -510,30 +403,6 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset) switch (offset) { case 4: /* Interrupt Control Type. */ return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1; - case 0x10: /* SysTick Control and Status. */ - val = s->systick.control; - s->systick.control &= ~SYSTICK_COUNTFLAG; - return val; - case 0x14: /* SysTick Reload Value. */ - return s->systick.reload; - case 0x18: /* SysTick Current Value. */ - { - int64_t t; - if ((s->systick.control & SYSTICK_ENABLE) == 0) - return 0; - t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (t >= s->systick.tick) - return 0; - val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1; - /* The interrupt in triggered when the timer reaches zero. - However the counter is not reloaded until the next clock - tick. This is a hack to return zero during the first tick. */ - if (val > s->systick.reload) - val = 0; - return val; - } - case 0x1c: /* SysTick Calibration Value. */ - return 10000; case 0xd00: /* CPUID Base. */ return cpu->midr; case 0xd04: /* Interrupt Control State. */ @@ -668,40 +537,8 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset) static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value) { ARMCPU *cpu = s->cpu; - uint32_t oldval; + switch (offset) { - case 0x10: /* SysTick Control and Status. */ - oldval = s->systick.control; - s->systick.control &= 0xfffffff8; - s->systick.control |= value & 7; - if ((oldval ^ value) & SYSTICK_ENABLE) { - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (value & SYSTICK_ENABLE) { - if (s->systick.tick) { - s->systick.tick += now; - timer_mod(s->systick.timer, s->systick.tick); - } else { - systick_reload(s, 1); - } - } else { - timer_del(s->systick.timer); - s->systick.tick -= now; - if (s->systick.tick < 0) - s->systick.tick = 0; - } - } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { - /* This is a hack. Force the timer to be reloaded - when the reference clock is changed. */ - systick_reload(s, 1); - } - break; - case 0x14: /* SysTick Reload Value. */ - s->systick.reload = value; - break; - case 0x18: /* SysTick Current Value. Writes reload the timer. */ - systick_reload(s, 1); - s->systick.control &= ~SYSTICK_COUNTFLAG; - break; case 0xd04: /* Interrupt Control State. */ if (value & (1 << 31)) { armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI); @@ -1000,16 +837,12 @@ static const VMStateDescription vmstate_VecInfo = { static const VMStateDescription vmstate_nvic = { .name = "armv7m_nvic", - .version_id = 3, - .minimum_version_id = 3, + .version_id = 4, + .minimum_version_id = 4, .post_load = &nvic_post_load, .fields = (VMStateField[]) { VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1, vmstate_VecInfo, VecInfo), - VMSTATE_UINT32(systick.control, NVICState), - VMSTATE_UINT32(systick.reload, NVICState), - VMSTATE_INT64(systick.tick, NVICState), - VMSTATE_TIMER_PTR(systick.timer, NVICState), VMSTATE_UINT32(prigroup, NVICState), VMSTATE_END_OF_LIST() } @@ -1047,13 +880,26 @@ static void armv7m_nvic_reset(DeviceState *dev) s->exception_prio = NVIC_NOEXC_PRIO; s->vectpending = 0; +} - systick_reset(s); +static void nvic_systick_trigger(void *opaque, int n, int level) +{ + NVICState *s = opaque; + + if (level) { + /* SysTick just asked us to pend its exception. + * (This is different from an external interrupt line's + * behaviour.) + */ + armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); + } } static void armv7m_nvic_realize(DeviceState *dev, Error **errp) { NVICState *s = NVIC(dev); + SysBusDevice *systick_sbd; + Error *err = NULL; s->cpu = ARM_CPU(qemu_get_cpu(0)); assert(s->cpu); @@ -1068,10 +914,19 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) /* include space for internal exception vectors */ s->num_irq += NVIC_FIRST_IRQ; + object_property_set_bool(OBJECT(&s->systick), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + systick_sbd = SYS_BUS_DEVICE(&s->systick); + sysbus_connect_irq(systick_sbd, 0, + qdev_get_gpio_in_named(dev, "systick-trigger", 0)); + /* The NVIC and System Control Space (SCS) starts at 0xe000e000 * and looks like this: * 0x004 - ICTR - * 0x010 - 0x1c - systick + * 0x010 - 0xff - systick * 0x100..0x7ec - NVIC * 0x7f0..0xcff - Reserved * 0xd00..0xd3c - SCS registers @@ -1089,12 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s, "nvic_sysregs", 0x1000); memory_region_add_subregion(&s->container, 0, &s->sysregmem); + memory_region_add_subregion_overlap(&s->container, 0x10, + sysbus_mmio_get_region(systick_sbd, 0), + 1); - /* Map the whole thing into system memory at the location required - * by the v7M architecture. - */ - memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container); - s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container); } static void armv7m_nvic_instance_init(Object *obj) @@ -1109,8 +963,12 @@ static void armv7m_nvic_instance_init(Object *obj) NVICState *nvic = NVIC(obj); SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK); + qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default()); + sysbus_init_irq(sbd, &nvic->excpout); qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1); + qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1); } static void armv7m_nvic_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h index aeb801d133..05303a55c8 100644 --- a/hw/intc/gicv3_internal.h +++ b/hw/intc/gicv3_internal.h @@ -138,6 +138,7 @@ #define ICC_CTLR_EL1_EOIMODE (1U << 1) #define ICC_CTLR_EL1_PMHE (1U << 6) #define ICC_CTLR_EL1_PRIBITS_SHIFT 8 +#define ICC_CTLR_EL1_PRIBITS_MASK (7U << ICC_CTLR_EL1_PRIBITS_SHIFT) #define ICC_CTLR_EL1_IDBITS_SHIFT 11 #define ICC_CTLR_EL1_SEIS (1U << 14) #define ICC_CTLR_EL1_A3V (1U << 15) @@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) } } +void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 65ba188555..aa5d2c1f5f 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp) { sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); + int ret; if (nvram->blk) { nvram->size = blk_getlength(nvram->blk); + + ret = blk_set_perm(nvram->blk, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } } else { nvram->size = DEFAULT_NVRAM_SIZE; } diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 2e2664f22e..7978c7d52a 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -20,6 +20,7 @@ #include "hw/s390x/virtio-ccw.h" #include "hw/s390x/css.h" #include "ipl.h" +#include "qemu/error-report.h" #define KERN_IMAGE_START 0x010000UL #define KERN_PARM_AREA 0x010480UL @@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = { DEFINE_PROP_STRING("initrd", S390IPLState, initrd), DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline), DEFINE_PROP_STRING("firmware", S390IPLState, firmware), + DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw), DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false), DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration, true), @@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) TYPE_VIRTIO_CCW_DEVICE); SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), TYPE_SCSI_DEVICE); + VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st), + TYPE_VIRTIO_NET); + + if (vn) { + ipl->netboot = true; + } if (virtio_ccw_dev) { CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev); @@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) return false; } +static int load_netboot_image(Error **errp) +{ + S390IPLState *ipl = get_ipl_device(); + char *netboot_filename; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *mr = NULL; + void *ram_ptr = NULL; + int img_size = -1; + + mr = memory_region_find(sysmem, 0, 1).mr; + if (!mr) { + error_setg(errp, "Failed to find memory region at address 0"); + return -1; + } + + ram_ptr = memory_region_get_ram_ptr(mr); + if (!ram_ptr) { + error_setg(errp, "No RAM found"); + goto unref_mr; + } + + netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw); + if (netboot_filename == NULL) { + error_setg(errp, "Could not find network bootloader"); + goto unref_mr; + } + + img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr, + NULL, NULL, 1, EM_S390, 0, 0, NULL, false); + + if (img_size < 0) { + img_size = load_image_size(netboot_filename, ram_ptr, ram_size); + ipl->start_addr = KERN_IMAGE_START; + } + + if (img_size < 0) { + error_setg(errp, "Failed to load network bootloader"); + } + + g_free(netboot_filename); + +unref_mr: + memory_region_unref(mr); + return img_size; +} + +static bool is_virtio_net_device(IplParameterBlock *iplb) +{ + uint8_t cssid; + uint8_t ssid; + uint16_t devno; + uint16_t schid; + SubchDev *sch = NULL; + + if (iplb->pbt != S390_IPL_TYPE_CCW) { + return false; + } + + devno = be16_to_cpu(iplb->ccw.devno); + ssid = iplb->ccw.ssid & 3; + + for (schid = 0; schid < MAX_SCHID; schid++) { + for (cssid = 0; cssid < MAX_CSSID; cssid++) { + sch = css_find_subch(1, cssid, ssid, schid); + + if (sch && sch->devno == devno) { + return sch->id.cu_model == VIRTIO_ID_NET; + } + } + } + return false; +} + void s390_ipl_update_diag308(IplParameterBlock *iplb) { S390IPLState *ipl = get_ipl_device(); ipl->iplb = *iplb; ipl->iplb_valid = true; + ipl->netboot = is_virtio_net_device(iplb); } IplParameterBlock *s390_ipl_get_iplb(void) @@ -287,6 +369,7 @@ void s390_reipl_request(void) void s390_ipl_prepare_cpu(S390CPU *cpu) { S390IPLState *ipl = get_ipl_device(); + Error *err = NULL; cpu->env.psw.addr = ipl->start_addr; cpu->env.psw.mask = IPL_PSW_MASK; @@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu) ipl->iplb_valid = s390_gen_initial_iplb(ipl); } } + if (ipl->netboot) { + if (load_netboot_image(&err) < 0) { + error_report_err(err); + vm_stop(RUN_STATE_INTERNAL_ERROR); + } + ipl->iplb.ccw.netboot_start_addr = ipl->start_addr; + } } static void s390_ipl_reset(DeviceState *dev) diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index c89109585a..46930e4c64 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -16,7 +16,8 @@ #include "cpu.h" struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; @@ -100,12 +101,14 @@ struct S390IPLState { IplParameterBlock iplb; bool iplb_valid; bool reipl_requested; + bool netboot; /*< public >*/ char *kernel; char *initrd; char *cmdline; char *firmware; + char *netboot_fw; uint8_t cssid; uint8_t ssid; uint16_t devno; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 4f0d62b2d8..40914fde6f 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine) /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, "s390-ccw.img", true); + machine->initrd_filename, "s390-ccw.img", + "s390-netboot.img", true); s390_flic_init(); dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index 9cfb09057e..afa4148e6b 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios) { Object *new = object_new(TYPE_S390_IPL); @@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename, } qdev_prop_set_string(dev, "cmdline", kernel_cmdline); qdev_prop_set_string(dev, "firmware", firmware); + qdev_prop_set_string(dev, "netboot_fw", netboot_fw); qdev_prop_set_bit(dev, "enforce_bios", enforce_bios); object_property_add_child(qdev_get_machine(), TYPE_S390_IPL, new, NULL); diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h index f588b80a6e..f2377a3e0e 100644 --- a/hw/s390x/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios); void s390_create_virtio_net(BusState *bus, const char *name); void s390_nmi(NMIState *n, int cpu_index, Error **errp); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bbfb5dc289..a53f058621 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque) } } -static void scsi_cd_change_media_cb(void *opaque, bool load) +static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) { SCSIDiskState *s = opaque; @@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, + blk_is_read_only(s->qdev.conf.blk), + dev->type == TYPE_DISK, &err); + if (err) { + error_propagate(errp, err); + return; + } if (s->qdev.conf.discard_granularity == -1) { s->qdev.conf.discard_granularity = @@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); if (!dev->conf.blk) { - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } s->qdev.blocksize = 2048; diff --git a/hw/sd/core.c b/hw/sd/core.c index 14c2bdf27b..295dc44ab7 100644 --- a/hw/sd/core.c +++ b/hw/sd/core.c @@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly) } } +void sdbus_reparent_card(SDBus *from, SDBus *to) +{ + SDState *card = get_card(from); + SDCardClass *sc; + bool readonly; + + /* We directly reparent the card object rather than implementing this + * as a hotpluggable connection because we don't want to expose SD cards + * to users as being hotpluggable, and we can get away with it in this + * limited use case. This could perhaps be implemented more cleanly in + * future by adding support to the hotplug infrastructure for "device + * can be hotplugged only via code, not by user". + */ + + if (!card) { + return; + } + + sc = SD_CARD_GET_CLASS(card); + readonly = sc->get_readonly(card); + + sdbus_set_inserted(from, false); + qdev_set_parent_bus(DEVICE(card), &to->qbus); + sdbus_set_inserted(to, true); + sdbus_set_readonly(to, readonly); +} + static const TypeInfo sd_bus_info = { .name = TYPE_SD_BUS, .parent = TYPE_BUS, diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 8e88e8311a..ba47bff4db 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd) return sd->wp_switch; } -static void sd_cardchange(void *opaque, bool load) +static void sd_cardchange(void *opaque, bool load, Error **errp) { SDState *sd = opaque; DeviceState *dev = DEVICE(sd); @@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj) static void sd_realize(DeviceState *dev, Error **errp) { SDState *sd = SD_CARD(dev); + int ret; if (sd->blk && blk_is_read_only(sd->blk)) { error_setg(errp, "Cannot use read-only drive as SD card"); @@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp) } if (sd->blk) { + ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } blk_set_dev_ops(sd->blk, &sd_block_ops, sd); } } diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs index fc9966880f..dd6f27e2a3 100644 --- a/hw/timer/Makefile.objs +++ b/hw/timer/Makefile.objs @@ -1,5 +1,6 @@ common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o +common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o common-obj-$(CONFIG_CADENCE) += cadence_ttc.o common-obj-$(CONFIG_DS1338) += ds1338.o diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c new file mode 100644 index 0000000000..df8d2804b3 --- /dev/null +++ b/hw/timer/armv7m_systick.c @@ -0,0 +1,240 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#include "qemu/osdep.h" +#include "hw/timer/armv7m_systick.h" +#include "qemu-common.h" +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qemu/log.h" +#include "trace.h" + +/* qemu timers run at 1GHz. We want something closer to 1MHz. */ +#define SYSTICK_SCALE 1000ULL + +#define SYSTICK_ENABLE (1 << 0) +#define SYSTICK_TICKINT (1 << 1) +#define SYSTICK_CLKSOURCE (1 << 2) +#define SYSTICK_COUNTFLAG (1 << 16) + +int system_clock_scale; + +/* Conversion factor from qemu timer to SysTick frequencies. */ +static inline int64_t systick_scale(SysTickState *s) +{ + if (s->control & SYSTICK_CLKSOURCE) { + return system_clock_scale; + } else { + return 1000; + } +} + +static void systick_reload(SysTickState *s, int reset) +{ + /* The Cortex-M3 Devices Generic User Guide says that "When the + * ENABLE bit is set to 1, the counter loads the RELOAD value from the + * SYST RVR register and then counts down". So, we need to check the + * ENABLE bit before reloading the value. + */ + trace_systick_reload(); + + if ((s->control & SYSTICK_ENABLE) == 0) { + return; + } + + if (reset) { + s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + s->tick += (s->reload + 1) * systick_scale(s); + timer_mod(s->timer, s->tick); +} + +static void systick_timer_tick(void *opaque) +{ + SysTickState *s = (SysTickState *)opaque; + + trace_systick_timer_tick(); + + s->control |= SYSTICK_COUNTFLAG; + if (s->control & SYSTICK_TICKINT) { + /* Tell the NVIC to pend the SysTick exception */ + qemu_irq_pulse(s->irq); + } + if (s->reload == 0) { + s->control &= ~SYSTICK_ENABLE; + } else { + systick_reload(s, 0); + } +} + +static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size) +{ + SysTickState *s = opaque; + uint32_t val; + + switch (addr) { + case 0x0: /* SysTick Control and Status. */ + val = s->control; + s->control &= ~SYSTICK_COUNTFLAG; + break; + case 0x4: /* SysTick Reload Value. */ + val = s->reload; + break; + case 0x8: /* SysTick Current Value. */ + { + int64_t t; + + if ((s->control & SYSTICK_ENABLE) == 0) { + val = 0; + break; + } + t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (t >= s->tick) { + val = 0; + break; + } + val = ((s->tick - (t + 1)) / systick_scale(s)) + 1; + /* The interrupt in triggered when the timer reaches zero. + However the counter is not reloaded until the next clock + tick. This is a hack to return zero during the first tick. */ + if (val > s->reload) { + val = 0; + } + break; + } + case 0xc: /* SysTick Calibration Value. */ + val = 10000; + break; + default: + val = 0; + qemu_log_mask(LOG_GUEST_ERROR, + "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr); + break; + } + + trace_systick_read(addr, val, size); + return val; +} + +static void systick_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + SysTickState *s = opaque; + + trace_systick_write(addr, value, size); + + switch (addr) { + case 0x0: /* SysTick Control and Status. */ + { + uint32_t oldval = s->control; + + s->control &= 0xfffffff8; + s->control |= value & 7; + if ((oldval ^ value) & SYSTICK_ENABLE) { + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (value & SYSTICK_ENABLE) { + if (s->tick) { + s->tick += now; + timer_mod(s->timer, s->tick); + } else { + systick_reload(s, 1); + } + } else { + timer_del(s->timer); + s->tick -= now; + if (s->tick < 0) { + s->tick = 0; + } + } + } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { + /* This is a hack. Force the timer to be reloaded + when the reference clock is changed. */ + systick_reload(s, 1); + } + break; + } + case 0x4: /* SysTick Reload Value. */ + s->reload = value; + break; + case 0x8: /* SysTick Current Value. Writes reload the timer. */ + systick_reload(s, 1); + s->control &= ~SYSTICK_COUNTFLAG; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps systick_ops = { + .read = systick_read, + .write = systick_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static void systick_reset(DeviceState *dev) +{ + SysTickState *s = SYSTICK(dev); + + s->control = 0; + s->reload = 0; + s->tick = 0; + timer_del(s->timer); +} + +static void systick_instance_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + SysTickState *s = SYSTICK(obj); + + memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->irq); + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s); +} + +static const VMStateDescription vmstate_systick = { + .name = "armv7m_systick", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(control, SysTickState), + VMSTATE_UINT32(reload, SysTickState), + VMSTATE_INT64(tick, SysTickState), + VMSTATE_TIMER_PTR(timer, SysTickState), + VMSTATE_END_OF_LIST() + } +}; + +static void systick_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_systick; + dc->reset = systick_reset; +} + +static const TypeInfo armv7m_systick_info = { + .name = TYPE_SYSTICK, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = systick_instance_init, + .instance_size = sizeof(SysTickState), + .class_init = systick_class_init, +}; + +static void armv7m_systick_register_types(void) +{ + type_register_static(&armv7m_systick_info); +} + +type_init(armv7m_systick_register_types) diff --git a/hw/timer/trace-events b/hw/timer/trace-events index 3495c41c18..d17cfe6b39 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d" aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32 aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32 aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64 + +# hw/timer/armv7m_systick.c +systick_reload(void) "systick reload" +systick_timer_tick(void) "systick reload" +systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index c607f7606d..a71b354fa6 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -603,7 +603,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp) blkconf_serial(&s->conf, &dev->serial); blkconf_blocksizes(&s->conf); - blkconf_apply_backend_options(&s->conf); + blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err); + if (err) { + error_propagate(errp, err); + return; + } /* * Hack alert: this pretends to be a block device, but it's really diff --git a/include/block/block.h b/include/block/block.h index bde5ebda18..c7c4a3ac3a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -82,6 +82,7 @@ typedef struct HDGeometry { } HDGeometry; #define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ @@ -187,6 +188,42 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MAX, } BlockOpType; +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * This permission is required to change the node that this BdrvChild + * points to. + */ + BLK_PERM_GRAPH_MOD = 0x10, + + BLK_PERM_ALL = 0x1f, +}; + /* disk I/O throttling */ void bdrv_init(void); void bdrv_init_with_whitelist(void); @@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new(void); -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new); @@ -210,7 +248,8 @@ BdrvChild *bdrv_open_child(const char *filename, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, @@ -484,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role); + const BdrvChildRole *child_role, + Error **errp); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); diff --git a/include/block/block_int.h b/include/block/block_int.h index 1670941da9..a57c0bfb55 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -320,6 +320,59 @@ struct BlockDriver { void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, Error **errp); + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @role. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + QLIST_ENTRY(BlockDriver) list; }; @@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier { } BdrvAioNotifier; struct BdrvChildRole { + /* If true, bdrv_replace_in_backing_chain() doesn't change the node this + * BdrvChild points to. */ + bool stay_at_node; + void (*inherit_options)(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); @@ -399,6 +456,12 @@ struct BdrvChildRole { * name), or NULL if the parent can't provide a better name. */ const char* (*get_name)(BdrvChild *child); + /* Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. */ + char* (*get_parent_desc)(BdrvChild *child); + /* * If this pair of functions is implemented, the parent doesn't issue new * requests after returning from .drained_begin() until .drained_end() is @@ -409,16 +472,32 @@ struct BdrvChildRole { */ void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); }; extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; +extern const BdrvChildRole child_backing; struct BdrvChild { BlockDriverState *bs; char *name; const BdrvChildRole *role; void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; }; @@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs, * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. * @errp: Error object. * */ void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp); + const char *filter_node_name, Error **errp); /** * commit_active_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, * See @BlockJobCreateFlags * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, - BlockCompletionFunc *cb, - void *opaque, Error **errp, bool auto_complete); + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, Error **errp, + bool auto_complete); /* * mirror_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @errp: Error object. * * Start a mirroring operation on @bs. Clusters that are allocated @@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp); + bool unmap, const char *filter_node_name, Error **errp); /* * backup_job_create: @@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque); + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +void bdrv_child_abort_perm_update(BdrvChild *c); +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to + * all children */ +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * (non-raw) image formats: Like above for bs->backing, but for bs->file it + * requires WRITE | RESIZE for read-write images, always requires + * CONSISTENT_READ and doesn't share WRITE. */ +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + const char *bdrv_get_parent_name(const BlockDriverState *bs); -void blk_dev_change_media_cb(BlockBackend *blk, bool load); +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); bool blk_dev_has_removable_media(BlockBackend *blk); bool blk_dev_has_tray(BlockBackend *blk); void blk_dev_eject_request(BlockBackend *blk, bool force); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 1acb256223..9e906f7d7e 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id); /** * block_job_add_bdrv: * @job: A block job + * @name: The name to assign to the new BdrvChild * @bs: A BlockDriverState that is involved in @job + * @perm, @shared_perm: Permissions to request on the node * * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. */ -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs); +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp); + +/** + * block_job_remove_all_bdrv: + * @job: The block job + * + * Remove all BlockDriverStates from the list of nodes that are involved in the + * job. This removes the blockers added with block_job_add_bdrv(). + */ +void block_job_remove_all_bdrv(BlockJob *job); /** * block_job_set_speed: diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 82238229c6..3f86cc5acc 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -119,6 +119,7 @@ struct BlockJobDriver { * generated automatically. * @job_type: The class object for the newly-created job. * @bs: The block + * @perm, @shared_perm: Permissions to request for @bs * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. @@ -134,7 +135,8 @@ struct BlockJobDriver { * called from a wrapper that is specific to the job type. */ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp); /** diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index c175c0e999..a3f79d3379 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -26,6 +26,18 @@ typedef enum { /* armv7m.c */ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model); +/** + * armv7m_load_kernel: + * @cpu: CPU + * @kernel_filename: file to load + * @mem_size: mem_size: maximum image size to load + * + * Load the guest image for an ARMv7M system. This must be called by + * any ARMv7M board, either directly or via armv7m_init(). (This is + * necessary to ensure that the CPU resets correctly on system reset, + * as well as for kernel loading.) + */ +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size); /* * struct used as a parameter of the arm_load_kernel machine init diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h new file mode 100644 index 0000000000..a9b3f2ab35 --- /dev/null +++ b/include/hw/arm/armv7m.h @@ -0,0 +1,63 @@ +/* + * ARMv7M CPU object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell <peter.maydell@linaro.org> + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_H +#define HW_ARM_ARMV7M_H + +#include "hw/sysbus.h" +#include "hw/arm/armv7m_nvic.h" + +#define TYPE_BITBAND "ARM,bitband-memory" +#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND) + +typedef struct { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + AddressSpace *source_as; + MemoryRegion iomem; + uint32_t base; + MemoryRegion *source_memory; +} BitBandState; + +#define TYPE_ARMV7M "armv7m" +#define ARMV7M(obj) OBJECT_CHECK(ARMv7MState, (obj), TYPE_ARMV7M) + +#define ARMV7M_NUM_BITBANDS 2 + +/* ARMv7M container object. + * + Unnamed GPIO input lines: external IRQ lines for the NVIC + * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ + * + Property "cpu-model": CPU model to instantiate + * + Property "num-irq": number of external IRQ lines + * + Property "memory": MemoryRegion defining the physical address space + * that CPU accesses see. (The NVIC, bitbanding and other CPU-internal + * devices will be automatically layered on top of this view.) + */ +typedef struct ARMv7MState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + NVICState nvic; + BitBandState bitband[ARMV7M_NUM_BITBANDS]; + ARMCPU *cpu; + + /* MemoryRegion we pass to the CPU, with our devices layered on + * top of the ones the board provides in board_memory. + */ + MemoryRegion container; + + /* Properties */ + char *cpu_model; + /* MemoryRegion the board provides to us (with its devices, RAM, etc) */ + MemoryRegion *board_memory; +} ARMv7MState; + +#endif diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/arm/armv7m_nvic.h new file mode 100644 index 0000000000..1d145fb75f --- /dev/null +++ b/include/hw/arm/armv7m_nvic.h @@ -0,0 +1,62 @@ +/* + * ARMv7M NVIC object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell <peter.maydell@linaro.org> + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_NVIC_H +#define HW_ARM_ARMV7M_NVIC_H + +#include "target/arm/cpu.h" +#include "hw/sysbus.h" +#include "hw/timer/armv7m_systick.h" + +#define TYPE_NVIC "armv7m_nvic" + +#define NVIC(obj) \ + OBJECT_CHECK(NVICState, (obj), TYPE_NVIC) + +/* Highest permitted number of exceptions (architectural limit) */ +#define NVIC_MAX_VECTORS 512 + +typedef struct VecInfo { + /* Exception priorities can range from -3 to 255; only the unmodifiable + * priority values for RESET, NMI and HardFault can be negative. + */ + int16_t prio; + uint8_t enabled; + uint8_t pending; + uint8_t active; + uint8_t level; /* exceptions <=15 never set level */ +} VecInfo; + +typedef struct NVICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + ARMCPU *cpu; + + VecInfo vectors[NVIC_MAX_VECTORS]; + uint32_t prigroup; + + /* vectpending and exception_prio are both cached state that can + * be recalculated from the vectors[] array and the prigroup field. + */ + unsigned int vectpending; /* highest prio pending enabled exception */ + int exception_prio; /* group prio of the highest prio active exception */ + + MemoryRegion sysregmem; + MemoryRegion container; + + uint32_t num_irq; + qemu_irq excpout; + qemu_irq sysresetreq; + + SysTickState systick; +} NVICState; + +#endif diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index 31241c799d..122b286de7 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -22,6 +22,8 @@ #include "hw/misc/bcm2835_rng.h" #include "hw/misc/bcm2835_mbox.h" #include "hw/sd/sdhci.h" +#include "hw/sd/bcm2835_sdhost.h" +#include "hw/gpio/bcm2835_gpio.h" #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals" #define BCM2835_PERIPHERALS(obj) \ @@ -45,6 +47,8 @@ typedef struct BCM2835PeripheralState { BCM2835RngState rng; BCM2835MboxState mboxes; SDHCIState sdhci; + BCM2835SDHostState sdhost; + BCM2835GpioState gpio; } BCM2835PeripheralState; #endif /* BCM2835_PERIPHERALS_H */ diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h index 133214195b..e2dce1122e 100644 --- a/include/hw/arm/stm32f205_soc.h +++ b/include/hw/arm/stm32f205_soc.h @@ -31,6 +31,7 @@ #include "hw/adc/stm32f2xx_adc.h" #include "hw/or-irq.h" #include "hw/ssi/stm32f2xx_spi.h" +#include "hw/arm/armv7m.h" #define TYPE_STM32F205_SOC "stm32f205-soc" #define STM32F205_SOC(obj) \ @@ -51,9 +52,10 @@ typedef struct STM32F205State { SysBusDevice parent_obj; /*< public >*/ - char *kernel_filename; char *cpu_model; + ARMv7MState armv7m; + STM32F2XXSyscfgState syscfg; STM32F2XXUsartState usart[STM_NUM_USARTS]; STM32F2XXTimerState timer[STM_NUM_TIMERS]; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index df9d207d81..f3f6e8ef02 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -26,6 +26,7 @@ typedef struct BlockConf { /* geometry, not all devices use this */ uint32_t cyls, heads, secs; OnOffAuto wce; + bool share_rw; BlockdevOnError rerror; BlockdevOnError werror; } BlockConf; @@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ DEFINE_PROP_UINT32("discard_granularity", _state, \ _conf.discard_granularity, -1), \ - DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO) + DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ + ON_OFF_AUTO_AUTO), \ + DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ @@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); void blkconf_blocksizes(BlockConf *conf); -void blkconf_apply_backend_options(BlockConf *conf); +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp); /* Hard disk geometry */ diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 25659b93be..a172a6068a 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd, int must_swab, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as) + AddressSpace *as, bool load_rom) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; @@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd, *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; } - snprintf(label, sizeof(label), "phdr #%d: %s", i, name); + if (load_rom) { + snprintf(label, sizeof(label), "phdr #%d: %s", i, name); - /* rom_add_elf_program() seize the ownership of 'data' */ - rom_add_elf_program(label, data, file_size, mem_size, addr, as); + /* rom_add_elf_program() seize the ownership of 'data' */ + rom_add_elf_program(label, data, file_size, mem_size, addr, as); + } else { + cpu_physical_memory_write(addr, data, file_size); + g_free(data); + } total_size += mem_size; if (addr < low) diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h new file mode 100644 index 0000000000..9f8e0c720c --- /dev/null +++ b/include/hw/gpio/bcm2835_gpio.h @@ -0,0 +1,39 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_GPIO_H +#define BCM2835_GPIO_H + +#include "hw/sd/sd.h" + +typedef struct BCM2835GpioState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + + /* SDBus selector */ + SDBus sdbus; + SDBus *sdbus_sdhci; + SDBus *sdbus_sdhost; + + uint8_t fsel[54]; + uint32_t lev0, lev1; + uint8_t sd_fsel; + qemu_irq out[54]; +} BCM2835GpioState; + +#define TYPE_BCM2835_GPIO "bcm2835_gpio" +#define BCM2835_GPIO(obj) \ + OBJECT_CHECK(BCM2835GpioState, (obj), TYPE_BCM2835_GPIO) + +#endif diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index 4156051d98..bccdfe17c6 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -172,6 +172,7 @@ struct GICv3CPUState { uint8_t gicr_ipriorityr[GIC_INTERNAL]; /* CPU interface */ + uint64_t icc_sre_el1; uint64_t icc_ctlr_el1[2]; uint64_t icc_pmr_el1; uint64_t icc_bpr[3]; diff --git a/include/hw/loader.h b/include/hw/loader.h index 40c4153e58..490c9ff8e6 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_WRONG_ENDIAN -4 const char *load_elf_strerror(int error); -/** load_elf_as: +/** load_elf_ram: * @filename: Path of ELF file * @translate_fn: optional function to translate load addresses * @translate_opaque: opaque data passed to @translate_fn @@ -81,6 +81,7 @@ const char *load_elf_strerror(int error); * words and 3 for within doublewords. * @as: The AddressSpace to load the ELF to. The value of address_space_memory * is used if nothing is supplied here. + * @load_rom : Load ELF binary as ROM * * Load an ELF file's contents to the emulated system's address space. * Clients may optionally specify a callback to perform address @@ -93,6 +94,16 @@ const char *load_elf_strerror(int error); * If @elf_machine is EM_NONE then the machine type will be read from the * ELF header and no checks will be carried out against the machine type. */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom); + +/** load_elf_as: + * Same as load_elf_ram(), but always loads the elf as ROM + */ int load_elf_as(const char *filename, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h index 79909b2478..96caefe373 100644 --- a/include/hw/sd/sd.h +++ b/include/hw/sd/sd.h @@ -140,6 +140,17 @@ uint8_t sdbus_read_data(SDBus *sd); bool sdbus_data_ready(SDBus *sd); bool sdbus_get_inserted(SDBus *sd); bool sdbus_get_readonly(SDBus *sd); +/** + * sdbus_reparent_card: Reparent an SD card from one controller to another + * @from: controller bus to remove card from + * @to: controller bus to move card to + * + * Reparent an SD card, effectively unplugging it from one controller + * and inserting it into another. This is useful for SoCs like the + * bcm2835 which have two SD controllers and connect a single SD card + * to them, selected by the guest reprogramming GPIO line routing. + */ +void sdbus_reparent_card(SDBus *from, SDBus *to); /* Functions to be used by SD devices to report back to qdevified controllers */ void sdbus_set_inserted(SDBus *sd, bool inserted); diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h new file mode 100644 index 0000000000..cca04defd8 --- /dev/null +++ b/include/hw/timer/armv7m_systick.h @@ -0,0 +1,34 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#ifndef HW_TIMER_ARMV7M_SYSTICK_H +#define HW_TIMER_ARMV7M_SYSTICK_H + +#include "hw/sysbus.h" + +#define TYPE_SYSTICK "armv7m_systick" + +#define SYSTICK(obj) OBJECT_CHECK(SysTickState, (obj), TYPE_SYSTICK) + +typedef struct SysTickState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t control; + uint32_t reload; + int64_t tick; + QEMUTimer *timer; + MemoryRegion iomem; + qemu_irq irq; +} SysTickState; + +#endif diff --git a/include/qemu-io.h b/include/qemu-io.h index 4d402b9b01..196fde0f3a 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -36,6 +36,7 @@ typedef struct cmdinfo { const char *args; const char *oneline; helpfunc_t help; + uint64_t perm; } cmdinfo_t; extern bool qemuio_misalign; diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h index 47b38fb816..eca9a2ca22 100644 --- a/include/standard-headers/asm-x86/hyperv.h +++ b/include/standard-headers/asm-x86/hyperv.h @@ -73,6 +73,9 @@ */ #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) +/* Crash MSR available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -144,6 +147,11 @@ */ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) +/* + * Crash notification flag. + */ +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) + /* MSR used to identify the guest OS. */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index 5c10f7e25d..c8b3338375 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -640,7 +640,7 @@ * Control a data application associated with the currently viewed channel, * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ -#define KEY_DATA 0x275 +#define KEY_DATA 0x277 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index e5a2e68b22..634c9c44ed 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -23,6 +23,14 @@ #define LINUX_PCI_REGS_H /* + * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of + * configuration space. PCI-X Mode 2 and PCIe devices have 4096 bytes of + * configuration space. + */ +#define PCI_CFG_SPACE_SIZE 256 +#define PCI_CFG_SPACE_EXP_SIZE 4096 + +/* * Under PCI, each device has 256 bytes of configuration address space, * of which the first 64 bytes are standardized as follows: */ @@ -674,6 +682,7 @@ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ +#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM @@ -965,6 +974,7 @@ #define PCI_EXP_DPC_STATUS 8 /* DPC Status */ #define PCI_EXP_DPC_STATUS_TRIGGER 0x01 /* Trigger Status */ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x08 /* Interrupt Status */ +#define PCI_EXP_DPC_RP_BUSY 0x10 /* Root Port Busy */ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ @@ -977,4 +987,19 @@ #define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ #define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ +/* L1 PM Substates */ +#define PCI_L1SS_CAP 4 /* capability register */ +#define PCI_L1SS_CAP_PCIPM_L1_2 1 /* PCI PM L1.2 Support */ +#define PCI_L1SS_CAP_PCIPM_L1_1 2 /* PCI PM L1.1 Support */ +#define PCI_L1SS_CAP_ASPM_L1_2 4 /* ASPM L1.2 Support */ +#define PCI_L1SS_CAP_ASPM_L1_1 8 /* ASPM L1.1 Support */ +#define PCI_L1SS_CAP_L1_PM_SS 16 /* L1 PM Substates Support */ +#define PCI_L1SS_CTL1 8 /* Control Register 1 */ +#define PCI_L1SS_CTL1_PCIPM_L1_2 1 /* PCI PM L1.2 Enable */ +#define PCI_L1SS_CTL1_PCIPM_L1_1 2 /* PCI PM L1.1 Support */ +#define PCI_L1SS_CTL1_ASPM_L1_2 4 /* ASPM L1.2 Support */ +#define PCI_L1SS_CTL1_ASPM_L1_1 8 /* ASPM L1.1 Support */ +#define PCI_L1SS_CTL1_L1SS_MASK 0x0000000F +#define PCI_L1SS_CTL2 0xC /* Control Register 2 */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index fe74e422d4..6d5c3b2d4f 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -43,4 +43,5 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ + #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index f365a51acf..096c17fce0 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -34,7 +34,7 @@ typedef struct BlockDevOps { * changes. Sure would be useful if it did. * Device models with removable media must implement this callback. */ - void (*change_media_cb)(void *opaque, bool load); + void (*change_media_cb)(void *opaque, bool load, Error **errp); /* * Runs when an eject request is issued from the monitor, the tray * is closed, and the medium is locked. @@ -84,7 +84,7 @@ typedef struct BlockBackendPublic { QLIST_ENTRY(BlockBackendPublic) round_robin; } BlockBackendPublic; -BlockBackend *blk_new(void); +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); @@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); BlockDriverState *blk_bs(BlockBackend *blk); void blk_remove_bs(BlockBackend *blk); -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 2fb7859465..1101d55d2f 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -87,9 +87,11 @@ struct kvm_regs { /* Supported VGICv3 address types */ #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 +#define KVM_VGIC_ITS_ADDR_TYPE 4 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) +#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K) #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ @@ -179,10 +181,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h new file mode 100644 index 0000000000..13a74afd02 --- /dev/null +++ b/linux-headers/asm-arm/unistd-common.h @@ -0,0 +1,357 @@ +#ifndef _ASM_ARM_UNISTD_COMMON_H +#define _ASM_ARM_UNISTD_COMMON_H 1 + +#define __NR_restart_syscall (__NR_SYSCALL_BASE + 0) +#define __NR_exit (__NR_SYSCALL_BASE + 1) +#define __NR_fork (__NR_SYSCALL_BASE + 2) +#define __NR_read (__NR_SYSCALL_BASE + 3) +#define __NR_write (__NR_SYSCALL_BASE + 4) +#define __NR_open (__NR_SYSCALL_BASE + 5) +#define __NR_close (__NR_SYSCALL_BASE + 6) +#define __NR_creat (__NR_SYSCALL_BASE + 8) +#define __NR_link (__NR_SYSCALL_BASE + 9) +#define __NR_unlink (__NR_SYSCALL_BASE + 10) +#define __NR_execve (__NR_SYSCALL_BASE + 11) +#define __NR_chdir (__NR_SYSCALL_BASE + 12) +#define __NR_mknod (__NR_SYSCALL_BASE + 14) +#define __NR_chmod (__NR_SYSCALL_BASE + 15) +#define __NR_lchown (__NR_SYSCALL_BASE + 16) +#define __NR_lseek (__NR_SYSCALL_BASE + 19) +#define __NR_getpid (__NR_SYSCALL_BASE + 20) +#define __NR_mount (__NR_SYSCALL_BASE + 21) +#define __NR_setuid (__NR_SYSCALL_BASE + 23) +#define __NR_getuid (__NR_SYSCALL_BASE + 24) +#define __NR_ptrace (__NR_SYSCALL_BASE + 26) +#define __NR_pause (__NR_SYSCALL_BASE + 29) +#define __NR_access (__NR_SYSCALL_BASE + 33) +#define __NR_nice (__NR_SYSCALL_BASE + 34) +#define __NR_sync (__NR_SYSCALL_BASE + 36) +#define __NR_kill (__NR_SYSCALL_BASE + 37) +#define __NR_rename (__NR_SYSCALL_BASE + 38) +#define __NR_mkdir (__NR_SYSCALL_BASE + 39) +#define __NR_rmdir (__NR_SYSCALL_BASE + 40) +#define __NR_dup (__NR_SYSCALL_BASE + 41) +#define __NR_pipe (__NR_SYSCALL_BASE + 42) +#define __NR_times (__NR_SYSCALL_BASE + 43) +#define __NR_brk (__NR_SYSCALL_BASE + 45) +#define __NR_setgid (__NR_SYSCALL_BASE + 46) +#define __NR_getgid (__NR_SYSCALL_BASE + 47) +#define __NR_geteuid (__NR_SYSCALL_BASE + 49) +#define __NR_getegid (__NR_SYSCALL_BASE + 50) +#define __NR_acct (__NR_SYSCALL_BASE + 51) +#define __NR_umount2 (__NR_SYSCALL_BASE + 52) +#define __NR_ioctl (__NR_SYSCALL_BASE + 54) +#define __NR_fcntl (__NR_SYSCALL_BASE + 55) +#define __NR_setpgid (__NR_SYSCALL_BASE + 57) +#define __NR_umask (__NR_SYSCALL_BASE + 60) +#define __NR_chroot (__NR_SYSCALL_BASE + 61) +#define __NR_ustat (__NR_SYSCALL_BASE + 62) +#define __NR_dup2 (__NR_SYSCALL_BASE + 63) +#define __NR_getppid (__NR_SYSCALL_BASE + 64) +#define __NR_getpgrp (__NR_SYSCALL_BASE + 65) +#define __NR_setsid (__NR_SYSCALL_BASE + 66) +#define __NR_sigaction (__NR_SYSCALL_BASE + 67) +#define __NR_setreuid (__NR_SYSCALL_BASE + 70) +#define __NR_setregid (__NR_SYSCALL_BASE + 71) +#define __NR_sigsuspend (__NR_SYSCALL_BASE + 72) +#define __NR_sigpending (__NR_SYSCALL_BASE + 73) +#define __NR_sethostname (__NR_SYSCALL_BASE + 74) +#define __NR_setrlimit (__NR_SYSCALL_BASE + 75) +#define __NR_getrusage (__NR_SYSCALL_BASE + 77) +#define __NR_gettimeofday (__NR_SYSCALL_BASE + 78) +#define __NR_settimeofday (__NR_SYSCALL_BASE + 79) +#define __NR_getgroups (__NR_SYSCALL_BASE + 80) +#define __NR_setgroups (__NR_SYSCALL_BASE + 81) +#define __NR_symlink (__NR_SYSCALL_BASE + 83) +#define __NR_readlink (__NR_SYSCALL_BASE + 85) +#define __NR_uselib (__NR_SYSCALL_BASE + 86) +#define __NR_swapon (__NR_SYSCALL_BASE + 87) +#define __NR_reboot (__NR_SYSCALL_BASE + 88) +#define __NR_munmap (__NR_SYSCALL_BASE + 91) +#define __NR_truncate (__NR_SYSCALL_BASE + 92) +#define __NR_ftruncate (__NR_SYSCALL_BASE + 93) +#define __NR_fchmod (__NR_SYSCALL_BASE + 94) +#define __NR_fchown (__NR_SYSCALL_BASE + 95) +#define __NR_getpriority (__NR_SYSCALL_BASE + 96) +#define __NR_setpriority (__NR_SYSCALL_BASE + 97) +#define __NR_statfs (__NR_SYSCALL_BASE + 99) +#define __NR_fstatfs (__NR_SYSCALL_BASE + 100) +#define __NR_syslog (__NR_SYSCALL_BASE + 103) +#define __NR_setitimer (__NR_SYSCALL_BASE + 104) +#define __NR_getitimer (__NR_SYSCALL_BASE + 105) +#define __NR_stat (__NR_SYSCALL_BASE + 106) +#define __NR_lstat (__NR_SYSCALL_BASE + 107) +#define __NR_fstat (__NR_SYSCALL_BASE + 108) +#define __NR_vhangup (__NR_SYSCALL_BASE + 111) +#define __NR_wait4 (__NR_SYSCALL_BASE + 114) +#define __NR_swapoff (__NR_SYSCALL_BASE + 115) +#define __NR_sysinfo (__NR_SYSCALL_BASE + 116) +#define __NR_fsync (__NR_SYSCALL_BASE + 118) +#define __NR_sigreturn (__NR_SYSCALL_BASE + 119) +#define __NR_clone (__NR_SYSCALL_BASE + 120) +#define __NR_setdomainname (__NR_SYSCALL_BASE + 121) +#define __NR_uname (__NR_SYSCALL_BASE + 122) +#define __NR_adjtimex (__NR_SYSCALL_BASE + 124) +#define __NR_mprotect (__NR_SYSCALL_BASE + 125) +#define __NR_sigprocmask (__NR_SYSCALL_BASE + 126) +#define __NR_init_module (__NR_SYSCALL_BASE + 128) +#define __NR_delete_module (__NR_SYSCALL_BASE + 129) +#define __NR_quotactl (__NR_SYSCALL_BASE + 131) +#define __NR_getpgid (__NR_SYSCALL_BASE + 132) +#define __NR_fchdir (__NR_SYSCALL_BASE + 133) +#define __NR_bdflush (__NR_SYSCALL_BASE + 134) +#define __NR_sysfs (__NR_SYSCALL_BASE + 135) +#define __NR_personality (__NR_SYSCALL_BASE + 136) +#define __NR_setfsuid (__NR_SYSCALL_BASE + 138) +#define __NR_setfsgid (__NR_SYSCALL_BASE + 139) +#define __NR__llseek (__NR_SYSCALL_BASE + 140) +#define __NR_getdents (__NR_SYSCALL_BASE + 141) +#define __NR__newselect (__NR_SYSCALL_BASE + 142) +#define __NR_flock (__NR_SYSCALL_BASE + 143) +#define __NR_msync (__NR_SYSCALL_BASE + 144) +#define __NR_readv (__NR_SYSCALL_BASE + 145) +#define __NR_writev (__NR_SYSCALL_BASE + 146) +#define __NR_getsid (__NR_SYSCALL_BASE + 147) +#define __NR_fdatasync (__NR_SYSCALL_BASE + 148) +#define __NR__sysctl (__NR_SYSCALL_BASE + 149) +#define __NR_mlock (__NR_SYSCALL_BASE + 150) +#define __NR_munlock (__NR_SYSCALL_BASE + 151) +#define __NR_mlockall (__NR_SYSCALL_BASE + 152) +#define __NR_munlockall (__NR_SYSCALL_BASE + 153) +#define __NR_sched_setparam (__NR_SYSCALL_BASE + 154) +#define __NR_sched_getparam (__NR_SYSCALL_BASE + 155) +#define __NR_sched_setscheduler (__NR_SYSCALL_BASE + 156) +#define __NR_sched_getscheduler (__NR_SYSCALL_BASE + 157) +#define __NR_sched_yield (__NR_SYSCALL_BASE + 158) +#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE + 159) +#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE + 160) +#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE + 161) +#define __NR_nanosleep (__NR_SYSCALL_BASE + 162) +#define __NR_mremap (__NR_SYSCALL_BASE + 163) +#define __NR_setresuid (__NR_SYSCALL_BASE + 164) +#define __NR_getresuid (__NR_SYSCALL_BASE + 165) +#define __NR_poll (__NR_SYSCALL_BASE + 168) +#define __NR_nfsservctl (__NR_SYSCALL_BASE + 169) +#define __NR_setresgid (__NR_SYSCALL_BASE + 170) +#define __NR_getresgid (__NR_SYSCALL_BASE + 171) +#define __NR_prctl (__NR_SYSCALL_BASE + 172) +#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173) +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) +#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE + 177) +#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE + 178) +#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) +#define __NR_pread64 (__NR_SYSCALL_BASE + 180) +#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) +#define __NR_chown (__NR_SYSCALL_BASE + 182) +#define __NR_getcwd (__NR_SYSCALL_BASE + 183) +#define __NR_capget (__NR_SYSCALL_BASE + 184) +#define __NR_capset (__NR_SYSCALL_BASE + 185) +#define __NR_sigaltstack (__NR_SYSCALL_BASE + 186) +#define __NR_sendfile (__NR_SYSCALL_BASE + 187) +#define __NR_vfork (__NR_SYSCALL_BASE + 190) +#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) +#define __NR_mmap2 (__NR_SYSCALL_BASE + 192) +#define __NR_truncate64 (__NR_SYSCALL_BASE + 193) +#define __NR_ftruncate64 (__NR_SYSCALL_BASE + 194) +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#define __NR_lstat64 (__NR_SYSCALL_BASE + 196) +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#define __NR_lchown32 (__NR_SYSCALL_BASE + 198) +#define __NR_getuid32 (__NR_SYSCALL_BASE + 199) +#define __NR_getgid32 (__NR_SYSCALL_BASE + 200) +#define __NR_geteuid32 (__NR_SYSCALL_BASE + 201) +#define __NR_getegid32 (__NR_SYSCALL_BASE + 202) +#define __NR_setreuid32 (__NR_SYSCALL_BASE + 203) +#define __NR_setregid32 (__NR_SYSCALL_BASE + 204) +#define __NR_getgroups32 (__NR_SYSCALL_BASE + 205) +#define __NR_setgroups32 (__NR_SYSCALL_BASE + 206) +#define __NR_fchown32 (__NR_SYSCALL_BASE + 207) +#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) +#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209) +#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) +#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211) +#define __NR_chown32 (__NR_SYSCALL_BASE + 212) +#define __NR_setuid32 (__NR_SYSCALL_BASE + 213) +#define __NR_setgid32 (__NR_SYSCALL_BASE + 214) +#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) +#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#define __NR_pivot_root (__NR_SYSCALL_BASE + 218) +#define __NR_mincore (__NR_SYSCALL_BASE + 219) +#define __NR_madvise (__NR_SYSCALL_BASE + 220) +#define __NR_fcntl64 (__NR_SYSCALL_BASE + 221) +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#define __NR_readahead (__NR_SYSCALL_BASE + 225) +#define __NR_setxattr (__NR_SYSCALL_BASE + 226) +#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) +#define __NR_fsetxattr (__NR_SYSCALL_BASE + 228) +#define __NR_getxattr (__NR_SYSCALL_BASE + 229) +#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) +#define __NR_fgetxattr (__NR_SYSCALL_BASE + 231) +#define __NR_listxattr (__NR_SYSCALL_BASE + 232) +#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) +#define __NR_flistxattr (__NR_SYSCALL_BASE + 234) +#define __NR_removexattr (__NR_SYSCALL_BASE + 235) +#define __NR_lremovexattr (__NR_SYSCALL_BASE + 236) +#define __NR_fremovexattr (__NR_SYSCALL_BASE + 237) +#define __NR_tkill (__NR_SYSCALL_BASE + 238) +#define __NR_sendfile64 (__NR_SYSCALL_BASE + 239) +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) +#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) +#define __NR_io_setup (__NR_SYSCALL_BASE + 243) +#define __NR_io_destroy (__NR_SYSCALL_BASE + 244) +#define __NR_io_getevents (__NR_SYSCALL_BASE + 245) +#define __NR_io_submit (__NR_SYSCALL_BASE + 246) +#define __NR_io_cancel (__NR_SYSCALL_BASE + 247) +#define __NR_exit_group (__NR_SYSCALL_BASE + 248) +#define __NR_lookup_dcookie (__NR_SYSCALL_BASE + 249) +#define __NR_epoll_create (__NR_SYSCALL_BASE + 250) +#define __NR_epoll_ctl (__NR_SYSCALL_BASE + 251) +#define __NR_epoll_wait (__NR_SYSCALL_BASE + 252) +#define __NR_remap_file_pages (__NR_SYSCALL_BASE + 253) +#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) +#define __NR_timer_create (__NR_SYSCALL_BASE + 257) +#define __NR_timer_settime (__NR_SYSCALL_BASE + 258) +#define __NR_timer_gettime (__NR_SYSCALL_BASE + 259) +#define __NR_timer_getoverrun (__NR_SYSCALL_BASE + 260) +#define __NR_timer_delete (__NR_SYSCALL_BASE + 261) +#define __NR_clock_settime (__NR_SYSCALL_BASE + 262) +#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) +#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) +#define __NR_clock_nanosleep (__NR_SYSCALL_BASE + 265) +#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) +#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) +#define __NR_tgkill (__NR_SYSCALL_BASE + 268) +#define __NR_utimes (__NR_SYSCALL_BASE + 269) +#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE + 270) +#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE + 271) +#define __NR_pciconfig_read (__NR_SYSCALL_BASE + 272) +#define __NR_pciconfig_write (__NR_SYSCALL_BASE + 273) +#define __NR_mq_open (__NR_SYSCALL_BASE + 274) +#define __NR_mq_unlink (__NR_SYSCALL_BASE + 275) +#define __NR_mq_timedsend (__NR_SYSCALL_BASE + 276) +#define __NR_mq_timedreceive (__NR_SYSCALL_BASE + 277) +#define __NR_mq_notify (__NR_SYSCALL_BASE + 278) +#define __NR_mq_getsetattr (__NR_SYSCALL_BASE + 279) +#define __NR_waitid (__NR_SYSCALL_BASE + 280) +#define __NR_socket (__NR_SYSCALL_BASE + 281) +#define __NR_bind (__NR_SYSCALL_BASE + 282) +#define __NR_connect (__NR_SYSCALL_BASE + 283) +#define __NR_listen (__NR_SYSCALL_BASE + 284) +#define __NR_accept (__NR_SYSCALL_BASE + 285) +#define __NR_getsockname (__NR_SYSCALL_BASE + 286) +#define __NR_getpeername (__NR_SYSCALL_BASE + 287) +#define __NR_socketpair (__NR_SYSCALL_BASE + 288) +#define __NR_send (__NR_SYSCALL_BASE + 289) +#define __NR_sendto (__NR_SYSCALL_BASE + 290) +#define __NR_recv (__NR_SYSCALL_BASE + 291) +#define __NR_recvfrom (__NR_SYSCALL_BASE + 292) +#define __NR_shutdown (__NR_SYSCALL_BASE + 293) +#define __NR_setsockopt (__NR_SYSCALL_BASE + 294) +#define __NR_getsockopt (__NR_SYSCALL_BASE + 295) +#define __NR_sendmsg (__NR_SYSCALL_BASE + 296) +#define __NR_recvmsg (__NR_SYSCALL_BASE + 297) +#define __NR_semop (__NR_SYSCALL_BASE + 298) +#define __NR_semget (__NR_SYSCALL_BASE + 299) +#define __NR_semctl (__NR_SYSCALL_BASE + 300) +#define __NR_msgsnd (__NR_SYSCALL_BASE + 301) +#define __NR_msgrcv (__NR_SYSCALL_BASE + 302) +#define __NR_msgget (__NR_SYSCALL_BASE + 303) +#define __NR_msgctl (__NR_SYSCALL_BASE + 304) +#define __NR_shmat (__NR_SYSCALL_BASE + 305) +#define __NR_shmdt (__NR_SYSCALL_BASE + 306) +#define __NR_shmget (__NR_SYSCALL_BASE + 307) +#define __NR_shmctl (__NR_SYSCALL_BASE + 308) +#define __NR_add_key (__NR_SYSCALL_BASE + 309) +#define __NR_request_key (__NR_SYSCALL_BASE + 310) +#define __NR_keyctl (__NR_SYSCALL_BASE + 311) +#define __NR_semtimedop (__NR_SYSCALL_BASE + 312) +#define __NR_vserver (__NR_SYSCALL_BASE + 313) +#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) +#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) +#define __NR_inotify_init (__NR_SYSCALL_BASE + 316) +#define __NR_inotify_add_watch (__NR_SYSCALL_BASE + 317) +#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE + 318) +#define __NR_mbind (__NR_SYSCALL_BASE + 319) +#define __NR_get_mempolicy (__NR_SYSCALL_BASE + 320) +#define __NR_set_mempolicy (__NR_SYSCALL_BASE + 321) +#define __NR_openat (__NR_SYSCALL_BASE + 322) +#define __NR_mkdirat (__NR_SYSCALL_BASE + 323) +#define __NR_mknodat (__NR_SYSCALL_BASE + 324) +#define __NR_fchownat (__NR_SYSCALL_BASE + 325) +#define __NR_futimesat (__NR_SYSCALL_BASE + 326) +#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327) +#define __NR_unlinkat (__NR_SYSCALL_BASE + 328) +#define __NR_renameat (__NR_SYSCALL_BASE + 329) +#define __NR_linkat (__NR_SYSCALL_BASE + 330) +#define __NR_symlinkat (__NR_SYSCALL_BASE + 331) +#define __NR_readlinkat (__NR_SYSCALL_BASE + 332) +#define __NR_fchmodat (__NR_SYSCALL_BASE + 333) +#define __NR_faccessat (__NR_SYSCALL_BASE + 334) +#define __NR_pselect6 (__NR_SYSCALL_BASE + 335) +#define __NR_ppoll (__NR_SYSCALL_BASE + 336) +#define __NR_unshare (__NR_SYSCALL_BASE + 337) +#define __NR_set_robust_list (__NR_SYSCALL_BASE + 338) +#define __NR_get_robust_list (__NR_SYSCALL_BASE + 339) +#define __NR_splice (__NR_SYSCALL_BASE + 340) +#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE + 341) +#define __NR_tee (__NR_SYSCALL_BASE + 342) +#define __NR_vmsplice (__NR_SYSCALL_BASE + 343) +#define __NR_move_pages (__NR_SYSCALL_BASE + 344) +#define __NR_getcpu (__NR_SYSCALL_BASE + 345) +#define __NR_epoll_pwait (__NR_SYSCALL_BASE + 346) +#define __NR_kexec_load (__NR_SYSCALL_BASE + 347) +#define __NR_utimensat (__NR_SYSCALL_BASE + 348) +#define __NR_signalfd (__NR_SYSCALL_BASE + 349) +#define __NR_timerfd_create (__NR_SYSCALL_BASE + 350) +#define __NR_eventfd (__NR_SYSCALL_BASE + 351) +#define __NR_fallocate (__NR_SYSCALL_BASE + 352) +#define __NR_timerfd_settime (__NR_SYSCALL_BASE + 353) +#define __NR_timerfd_gettime (__NR_SYSCALL_BASE + 354) +#define __NR_signalfd4 (__NR_SYSCALL_BASE + 355) +#define __NR_eventfd2 (__NR_SYSCALL_BASE + 356) +#define __NR_epoll_create1 (__NR_SYSCALL_BASE + 357) +#define __NR_dup3 (__NR_SYSCALL_BASE + 358) +#define __NR_pipe2 (__NR_SYSCALL_BASE + 359) +#define __NR_inotify_init1 (__NR_SYSCALL_BASE + 360) +#define __NR_preadv (__NR_SYSCALL_BASE + 361) +#define __NR_pwritev (__NR_SYSCALL_BASE + 362) +#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363) +#define __NR_perf_event_open (__NR_SYSCALL_BASE + 364) +#define __NR_recvmmsg (__NR_SYSCALL_BASE + 365) +#define __NR_accept4 (__NR_SYSCALL_BASE + 366) +#define __NR_fanotify_init (__NR_SYSCALL_BASE + 367) +#define __NR_fanotify_mark (__NR_SYSCALL_BASE + 368) +#define __NR_prlimit64 (__NR_SYSCALL_BASE + 369) +#define __NR_name_to_handle_at (__NR_SYSCALL_BASE + 370) +#define __NR_open_by_handle_at (__NR_SYSCALL_BASE + 371) +#define __NR_clock_adjtime (__NR_SYSCALL_BASE + 372) +#define __NR_syncfs (__NR_SYSCALL_BASE + 373) +#define __NR_sendmmsg (__NR_SYSCALL_BASE + 374) +#define __NR_setns (__NR_SYSCALL_BASE + 375) +#define __NR_process_vm_readv (__NR_SYSCALL_BASE + 376) +#define __NR_process_vm_writev (__NR_SYSCALL_BASE + 377) +#define __NR_kcmp (__NR_SYSCALL_BASE + 378) +#define __NR_finit_module (__NR_SYSCALL_BASE + 379) +#define __NR_sched_setattr (__NR_SYSCALL_BASE + 380) +#define __NR_sched_getattr (__NR_SYSCALL_BASE + 381) +#define __NR_renameat2 (__NR_SYSCALL_BASE + 382) +#define __NR_seccomp (__NR_SYSCALL_BASE + 383) +#define __NR_getrandom (__NR_SYSCALL_BASE + 384) +#define __NR_memfd_create (__NR_SYSCALL_BASE + 385) +#define __NR_bpf (__NR_SYSCALL_BASE + 386) +#define __NR_execveat (__NR_SYSCALL_BASE + 387) +#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388) +#define __NR_membarrier (__NR_SYSCALL_BASE + 389) +#define __NR_mlock2 (__NR_SYSCALL_BASE + 390) +#define __NR_copy_file_range (__NR_SYSCALL_BASE + 391) +#define __NR_preadv2 (__NR_SYSCALL_BASE + 392) +#define __NR_pwritev2 (__NR_SYSCALL_BASE + 393) +#define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394) +#define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395) +#define __NR_pkey_free (__NR_SYSCALL_BASE + 396) + +#endif /* _ASM_ARM_UNISTD_COMMON_H */ diff --git a/linux-headers/asm-arm/unistd-eabi.h b/linux-headers/asm-arm/unistd-eabi.h new file mode 100644 index 0000000000..266f1fcdfb --- /dev/null +++ b/linux-headers/asm-arm/unistd-eabi.h @@ -0,0 +1,5 @@ +#ifndef _ASM_ARM_UNISTD_EABI_H +#define _ASM_ARM_UNISTD_EABI_H 1 + + +#endif /* _ASM_ARM_UNISTD_EABI_H */ diff --git a/linux-headers/asm-arm/unistd-oabi.h b/linux-headers/asm-arm/unistd-oabi.h new file mode 100644 index 0000000000..47d9afb96d --- /dev/null +++ b/linux-headers/asm-arm/unistd-oabi.h @@ -0,0 +1,17 @@ +#ifndef _ASM_ARM_UNISTD_OABI_H +#define _ASM_ARM_UNISTD_OABI_H 1 + +#define __NR_time (__NR_SYSCALL_BASE + 13) +#define __NR_umount (__NR_SYSCALL_BASE + 22) +#define __NR_stime (__NR_SYSCALL_BASE + 25) +#define __NR_alarm (__NR_SYSCALL_BASE + 27) +#define __NR_utime (__NR_SYSCALL_BASE + 30) +#define __NR_getrlimit (__NR_SYSCALL_BASE + 76) +#define __NR_select (__NR_SYSCALL_BASE + 82) +#define __NR_readdir (__NR_SYSCALL_BASE + 89) +#define __NR_mmap (__NR_SYSCALL_BASE + 90) +#define __NR_socketcall (__NR_SYSCALL_BASE + 102) +#define __NR_syscall (__NR_SYSCALL_BASE + 113) +#define __NR_ipc (__NR_SYSCALL_BASE + 117) + +#endif /* _ASM_ARM_UNISTD_OABI_H */ diff --git a/linux-headers/asm-arm/unistd.h b/linux-headers/asm-arm/unistd.h index ceb5450c81..155571b874 100644 --- a/linux-headers/asm-arm/unistd.h +++ b/linux-headers/asm-arm/unistd.h @@ -17,409 +17,14 @@ #if defined(__thumb__) || defined(__ARM_EABI__) #define __NR_SYSCALL_BASE 0 +#include <asm/unistd-eabi.h> #else #define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE +#include <asm/unistd-oabi.h> #endif -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall (__NR_SYSCALL_BASE+ 0) -#define __NR_exit (__NR_SYSCALL_BASE+ 1) -#define __NR_fork (__NR_SYSCALL_BASE+ 2) -#define __NR_read (__NR_SYSCALL_BASE+ 3) -#define __NR_write (__NR_SYSCALL_BASE+ 4) -#define __NR_open (__NR_SYSCALL_BASE+ 5) -#define __NR_close (__NR_SYSCALL_BASE+ 6) - /* 7 was sys_waitpid */ -#define __NR_creat (__NR_SYSCALL_BASE+ 8) -#define __NR_link (__NR_SYSCALL_BASE+ 9) -#define __NR_unlink (__NR_SYSCALL_BASE+ 10) -#define __NR_execve (__NR_SYSCALL_BASE+ 11) -#define __NR_chdir (__NR_SYSCALL_BASE+ 12) -#define __NR_time (__NR_SYSCALL_BASE+ 13) -#define __NR_mknod (__NR_SYSCALL_BASE+ 14) -#define __NR_chmod (__NR_SYSCALL_BASE+ 15) -#define __NR_lchown (__NR_SYSCALL_BASE+ 16) - /* 17 was sys_break */ - /* 18 was sys_stat */ -#define __NR_lseek (__NR_SYSCALL_BASE+ 19) -#define __NR_getpid (__NR_SYSCALL_BASE+ 20) -#define __NR_mount (__NR_SYSCALL_BASE+ 21) -#define __NR_umount (__NR_SYSCALL_BASE+ 22) -#define __NR_setuid (__NR_SYSCALL_BASE+ 23) -#define __NR_getuid (__NR_SYSCALL_BASE+ 24) -#define __NR_stime (__NR_SYSCALL_BASE+ 25) -#define __NR_ptrace (__NR_SYSCALL_BASE+ 26) -#define __NR_alarm (__NR_SYSCALL_BASE+ 27) - /* 28 was sys_fstat */ -#define __NR_pause (__NR_SYSCALL_BASE+ 29) -#define __NR_utime (__NR_SYSCALL_BASE+ 30) - /* 31 was sys_stty */ - /* 32 was sys_gtty */ -#define __NR_access (__NR_SYSCALL_BASE+ 33) -#define __NR_nice (__NR_SYSCALL_BASE+ 34) - /* 35 was sys_ftime */ -#define __NR_sync (__NR_SYSCALL_BASE+ 36) -#define __NR_kill (__NR_SYSCALL_BASE+ 37) -#define __NR_rename (__NR_SYSCALL_BASE+ 38) -#define __NR_mkdir (__NR_SYSCALL_BASE+ 39) -#define __NR_rmdir (__NR_SYSCALL_BASE+ 40) -#define __NR_dup (__NR_SYSCALL_BASE+ 41) -#define __NR_pipe (__NR_SYSCALL_BASE+ 42) -#define __NR_times (__NR_SYSCALL_BASE+ 43) - /* 44 was sys_prof */ -#define __NR_brk (__NR_SYSCALL_BASE+ 45) -#define __NR_setgid (__NR_SYSCALL_BASE+ 46) -#define __NR_getgid (__NR_SYSCALL_BASE+ 47) - /* 48 was sys_signal */ -#define __NR_geteuid (__NR_SYSCALL_BASE+ 49) -#define __NR_getegid (__NR_SYSCALL_BASE+ 50) -#define __NR_acct (__NR_SYSCALL_BASE+ 51) -#define __NR_umount2 (__NR_SYSCALL_BASE+ 52) - /* 53 was sys_lock */ -#define __NR_ioctl (__NR_SYSCALL_BASE+ 54) -#define __NR_fcntl (__NR_SYSCALL_BASE+ 55) - /* 56 was sys_mpx */ -#define __NR_setpgid (__NR_SYSCALL_BASE+ 57) - /* 58 was sys_ulimit */ - /* 59 was sys_olduname */ -#define __NR_umask (__NR_SYSCALL_BASE+ 60) -#define __NR_chroot (__NR_SYSCALL_BASE+ 61) -#define __NR_ustat (__NR_SYSCALL_BASE+ 62) -#define __NR_dup2 (__NR_SYSCALL_BASE+ 63) -#define __NR_getppid (__NR_SYSCALL_BASE+ 64) -#define __NR_getpgrp (__NR_SYSCALL_BASE+ 65) -#define __NR_setsid (__NR_SYSCALL_BASE+ 66) -#define __NR_sigaction (__NR_SYSCALL_BASE+ 67) - /* 68 was sys_sgetmask */ - /* 69 was sys_ssetmask */ -#define __NR_setreuid (__NR_SYSCALL_BASE+ 70) -#define __NR_setregid (__NR_SYSCALL_BASE+ 71) -#define __NR_sigsuspend (__NR_SYSCALL_BASE+ 72) -#define __NR_sigpending (__NR_SYSCALL_BASE+ 73) -#define __NR_sethostname (__NR_SYSCALL_BASE+ 74) -#define __NR_setrlimit (__NR_SYSCALL_BASE+ 75) -#define __NR_getrlimit (__NR_SYSCALL_BASE+ 76) /* Back compat 2GB limited rlimit */ -#define __NR_getrusage (__NR_SYSCALL_BASE+ 77) -#define __NR_gettimeofday (__NR_SYSCALL_BASE+ 78) -#define __NR_settimeofday (__NR_SYSCALL_BASE+ 79) -#define __NR_getgroups (__NR_SYSCALL_BASE+ 80) -#define __NR_setgroups (__NR_SYSCALL_BASE+ 81) -#define __NR_select (__NR_SYSCALL_BASE+ 82) -#define __NR_symlink (__NR_SYSCALL_BASE+ 83) - /* 84 was sys_lstat */ -#define __NR_readlink (__NR_SYSCALL_BASE+ 85) -#define __NR_uselib (__NR_SYSCALL_BASE+ 86) -#define __NR_swapon (__NR_SYSCALL_BASE+ 87) -#define __NR_reboot (__NR_SYSCALL_BASE+ 88) -#define __NR_readdir (__NR_SYSCALL_BASE+ 89) -#define __NR_mmap (__NR_SYSCALL_BASE+ 90) -#define __NR_munmap (__NR_SYSCALL_BASE+ 91) -#define __NR_truncate (__NR_SYSCALL_BASE+ 92) -#define __NR_ftruncate (__NR_SYSCALL_BASE+ 93) -#define __NR_fchmod (__NR_SYSCALL_BASE+ 94) -#define __NR_fchown (__NR_SYSCALL_BASE+ 95) -#define __NR_getpriority (__NR_SYSCALL_BASE+ 96) -#define __NR_setpriority (__NR_SYSCALL_BASE+ 97) - /* 98 was sys_profil */ -#define __NR_statfs (__NR_SYSCALL_BASE+ 99) -#define __NR_fstatfs (__NR_SYSCALL_BASE+100) - /* 101 was sys_ioperm */ -#define __NR_socketcall (__NR_SYSCALL_BASE+102) -#define __NR_syslog (__NR_SYSCALL_BASE+103) -#define __NR_setitimer (__NR_SYSCALL_BASE+104) -#define __NR_getitimer (__NR_SYSCALL_BASE+105) -#define __NR_stat (__NR_SYSCALL_BASE+106) -#define __NR_lstat (__NR_SYSCALL_BASE+107) -#define __NR_fstat (__NR_SYSCALL_BASE+108) - /* 109 was sys_uname */ - /* 110 was sys_iopl */ -#define __NR_vhangup (__NR_SYSCALL_BASE+111) - /* 112 was sys_idle */ -#define __NR_syscall (__NR_SYSCALL_BASE+113) /* syscall to call a syscall! */ -#define __NR_wait4 (__NR_SYSCALL_BASE+114) -#define __NR_swapoff (__NR_SYSCALL_BASE+115) -#define __NR_sysinfo (__NR_SYSCALL_BASE+116) -#define __NR_ipc (__NR_SYSCALL_BASE+117) -#define __NR_fsync (__NR_SYSCALL_BASE+118) -#define __NR_sigreturn (__NR_SYSCALL_BASE+119) -#define __NR_clone (__NR_SYSCALL_BASE+120) -#define __NR_setdomainname (__NR_SYSCALL_BASE+121) -#define __NR_uname (__NR_SYSCALL_BASE+122) - /* 123 was sys_modify_ldt */ -#define __NR_adjtimex (__NR_SYSCALL_BASE+124) -#define __NR_mprotect (__NR_SYSCALL_BASE+125) -#define __NR_sigprocmask (__NR_SYSCALL_BASE+126) - /* 127 was sys_create_module */ -#define __NR_init_module (__NR_SYSCALL_BASE+128) -#define __NR_delete_module (__NR_SYSCALL_BASE+129) - /* 130 was sys_get_kernel_syms */ -#define __NR_quotactl (__NR_SYSCALL_BASE+131) -#define __NR_getpgid (__NR_SYSCALL_BASE+132) -#define __NR_fchdir (__NR_SYSCALL_BASE+133) -#define __NR_bdflush (__NR_SYSCALL_BASE+134) -#define __NR_sysfs (__NR_SYSCALL_BASE+135) -#define __NR_personality (__NR_SYSCALL_BASE+136) - /* 137 was sys_afs_syscall */ -#define __NR_setfsuid (__NR_SYSCALL_BASE+138) -#define __NR_setfsgid (__NR_SYSCALL_BASE+139) -#define __NR__llseek (__NR_SYSCALL_BASE+140) -#define __NR_getdents (__NR_SYSCALL_BASE+141) -#define __NR__newselect (__NR_SYSCALL_BASE+142) -#define __NR_flock (__NR_SYSCALL_BASE+143) -#define __NR_msync (__NR_SYSCALL_BASE+144) -#define __NR_readv (__NR_SYSCALL_BASE+145) -#define __NR_writev (__NR_SYSCALL_BASE+146) -#define __NR_getsid (__NR_SYSCALL_BASE+147) -#define __NR_fdatasync (__NR_SYSCALL_BASE+148) -#define __NR__sysctl (__NR_SYSCALL_BASE+149) -#define __NR_mlock (__NR_SYSCALL_BASE+150) -#define __NR_munlock (__NR_SYSCALL_BASE+151) -#define __NR_mlockall (__NR_SYSCALL_BASE+152) -#define __NR_munlockall (__NR_SYSCALL_BASE+153) -#define __NR_sched_setparam (__NR_SYSCALL_BASE+154) -#define __NR_sched_getparam (__NR_SYSCALL_BASE+155) -#define __NR_sched_setscheduler (__NR_SYSCALL_BASE+156) -#define __NR_sched_getscheduler (__NR_SYSCALL_BASE+157) -#define __NR_sched_yield (__NR_SYSCALL_BASE+158) -#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE+159) -#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE+160) -#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE+161) -#define __NR_nanosleep (__NR_SYSCALL_BASE+162) -#define __NR_mremap (__NR_SYSCALL_BASE+163) -#define __NR_setresuid (__NR_SYSCALL_BASE+164) -#define __NR_getresuid (__NR_SYSCALL_BASE+165) - /* 166 was sys_vm86 */ - /* 167 was sys_query_module */ -#define __NR_poll (__NR_SYSCALL_BASE+168) -#define __NR_nfsservctl (__NR_SYSCALL_BASE+169) -#define __NR_setresgid (__NR_SYSCALL_BASE+170) -#define __NR_getresgid (__NR_SYSCALL_BASE+171) -#define __NR_prctl (__NR_SYSCALL_BASE+172) -#define __NR_rt_sigreturn (__NR_SYSCALL_BASE+173) -#define __NR_rt_sigaction (__NR_SYSCALL_BASE+174) -#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE+175) -#define __NR_rt_sigpending (__NR_SYSCALL_BASE+176) -#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE+177) -#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE+178) -#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE+179) -#define __NR_pread64 (__NR_SYSCALL_BASE+180) -#define __NR_pwrite64 (__NR_SYSCALL_BASE+181) -#define __NR_chown (__NR_SYSCALL_BASE+182) -#define __NR_getcwd (__NR_SYSCALL_BASE+183) -#define __NR_capget (__NR_SYSCALL_BASE+184) -#define __NR_capset (__NR_SYSCALL_BASE+185) -#define __NR_sigaltstack (__NR_SYSCALL_BASE+186) -#define __NR_sendfile (__NR_SYSCALL_BASE+187) - /* 188 reserved */ - /* 189 reserved */ -#define __NR_vfork (__NR_SYSCALL_BASE+190) -#define __NR_ugetrlimit (__NR_SYSCALL_BASE+191) /* SuS compliant getrlimit */ -#define __NR_mmap2 (__NR_SYSCALL_BASE+192) -#define __NR_truncate64 (__NR_SYSCALL_BASE+193) -#define __NR_ftruncate64 (__NR_SYSCALL_BASE+194) -#define __NR_stat64 (__NR_SYSCALL_BASE+195) -#define __NR_lstat64 (__NR_SYSCALL_BASE+196) -#define __NR_fstat64 (__NR_SYSCALL_BASE+197) -#define __NR_lchown32 (__NR_SYSCALL_BASE+198) -#define __NR_getuid32 (__NR_SYSCALL_BASE+199) -#define __NR_getgid32 (__NR_SYSCALL_BASE+200) -#define __NR_geteuid32 (__NR_SYSCALL_BASE+201) -#define __NR_getegid32 (__NR_SYSCALL_BASE+202) -#define __NR_setreuid32 (__NR_SYSCALL_BASE+203) -#define __NR_setregid32 (__NR_SYSCALL_BASE+204) -#define __NR_getgroups32 (__NR_SYSCALL_BASE+205) -#define __NR_setgroups32 (__NR_SYSCALL_BASE+206) -#define __NR_fchown32 (__NR_SYSCALL_BASE+207) -#define __NR_setresuid32 (__NR_SYSCALL_BASE+208) -#define __NR_getresuid32 (__NR_SYSCALL_BASE+209) -#define __NR_setresgid32 (__NR_SYSCALL_BASE+210) -#define __NR_getresgid32 (__NR_SYSCALL_BASE+211) -#define __NR_chown32 (__NR_SYSCALL_BASE+212) -#define __NR_setuid32 (__NR_SYSCALL_BASE+213) -#define __NR_setgid32 (__NR_SYSCALL_BASE+214) -#define __NR_setfsuid32 (__NR_SYSCALL_BASE+215) -#define __NR_setfsgid32 (__NR_SYSCALL_BASE+216) -#define __NR_getdents64 (__NR_SYSCALL_BASE+217) -#define __NR_pivot_root (__NR_SYSCALL_BASE+218) -#define __NR_mincore (__NR_SYSCALL_BASE+219) -#define __NR_madvise (__NR_SYSCALL_BASE+220) -#define __NR_fcntl64 (__NR_SYSCALL_BASE+221) - /* 222 for tux */ - /* 223 is unused */ -#define __NR_gettid (__NR_SYSCALL_BASE+224) -#define __NR_readahead (__NR_SYSCALL_BASE+225) -#define __NR_setxattr (__NR_SYSCALL_BASE+226) -#define __NR_lsetxattr (__NR_SYSCALL_BASE+227) -#define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -#define __NR_getxattr (__NR_SYSCALL_BASE+229) -#define __NR_lgetxattr (__NR_SYSCALL_BASE+230) -#define __NR_fgetxattr (__NR_SYSCALL_BASE+231) -#define __NR_listxattr (__NR_SYSCALL_BASE+232) -#define __NR_llistxattr (__NR_SYSCALL_BASE+233) -#define __NR_flistxattr (__NR_SYSCALL_BASE+234) -#define __NR_removexattr (__NR_SYSCALL_BASE+235) -#define __NR_lremovexattr (__NR_SYSCALL_BASE+236) -#define __NR_fremovexattr (__NR_SYSCALL_BASE+237) -#define __NR_tkill (__NR_SYSCALL_BASE+238) -#define __NR_sendfile64 (__NR_SYSCALL_BASE+239) -#define __NR_futex (__NR_SYSCALL_BASE+240) -#define __NR_sched_setaffinity (__NR_SYSCALL_BASE+241) -#define __NR_sched_getaffinity (__NR_SYSCALL_BASE+242) -#define __NR_io_setup (__NR_SYSCALL_BASE+243) -#define __NR_io_destroy (__NR_SYSCALL_BASE+244) -#define __NR_io_getevents (__NR_SYSCALL_BASE+245) -#define __NR_io_submit (__NR_SYSCALL_BASE+246) -#define __NR_io_cancel (__NR_SYSCALL_BASE+247) -#define __NR_exit_group (__NR_SYSCALL_BASE+248) -#define __NR_lookup_dcookie (__NR_SYSCALL_BASE+249) -#define __NR_epoll_create (__NR_SYSCALL_BASE+250) -#define __NR_epoll_ctl (__NR_SYSCALL_BASE+251) -#define __NR_epoll_wait (__NR_SYSCALL_BASE+252) -#define __NR_remap_file_pages (__NR_SYSCALL_BASE+253) - /* 254 for set_thread_area */ - /* 255 for get_thread_area */ -#define __NR_set_tid_address (__NR_SYSCALL_BASE+256) -#define __NR_timer_create (__NR_SYSCALL_BASE+257) -#define __NR_timer_settime (__NR_SYSCALL_BASE+258) -#define __NR_timer_gettime (__NR_SYSCALL_BASE+259) -#define __NR_timer_getoverrun (__NR_SYSCALL_BASE+260) -#define __NR_timer_delete (__NR_SYSCALL_BASE+261) -#define __NR_clock_settime (__NR_SYSCALL_BASE+262) -#define __NR_clock_gettime (__NR_SYSCALL_BASE+263) -#define __NR_clock_getres (__NR_SYSCALL_BASE+264) -#define __NR_clock_nanosleep (__NR_SYSCALL_BASE+265) -#define __NR_statfs64 (__NR_SYSCALL_BASE+266) -#define __NR_fstatfs64 (__NR_SYSCALL_BASE+267) -#define __NR_tgkill (__NR_SYSCALL_BASE+268) -#define __NR_utimes (__NR_SYSCALL_BASE+269) -#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE+270) -#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE+271) -#define __NR_pciconfig_read (__NR_SYSCALL_BASE+272) -#define __NR_pciconfig_write (__NR_SYSCALL_BASE+273) -#define __NR_mq_open (__NR_SYSCALL_BASE+274) -#define __NR_mq_unlink (__NR_SYSCALL_BASE+275) -#define __NR_mq_timedsend (__NR_SYSCALL_BASE+276) -#define __NR_mq_timedreceive (__NR_SYSCALL_BASE+277) -#define __NR_mq_notify (__NR_SYSCALL_BASE+278) -#define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) -#define __NR_waitid (__NR_SYSCALL_BASE+280) -#define __NR_socket (__NR_SYSCALL_BASE+281) -#define __NR_bind (__NR_SYSCALL_BASE+282) -#define __NR_connect (__NR_SYSCALL_BASE+283) -#define __NR_listen (__NR_SYSCALL_BASE+284) -#define __NR_accept (__NR_SYSCALL_BASE+285) -#define __NR_getsockname (__NR_SYSCALL_BASE+286) -#define __NR_getpeername (__NR_SYSCALL_BASE+287) -#define __NR_socketpair (__NR_SYSCALL_BASE+288) -#define __NR_send (__NR_SYSCALL_BASE+289) -#define __NR_sendto (__NR_SYSCALL_BASE+290) -#define __NR_recv (__NR_SYSCALL_BASE+291) -#define __NR_recvfrom (__NR_SYSCALL_BASE+292) -#define __NR_shutdown (__NR_SYSCALL_BASE+293) -#define __NR_setsockopt (__NR_SYSCALL_BASE+294) -#define __NR_getsockopt (__NR_SYSCALL_BASE+295) -#define __NR_sendmsg (__NR_SYSCALL_BASE+296) -#define __NR_recvmsg (__NR_SYSCALL_BASE+297) -#define __NR_semop (__NR_SYSCALL_BASE+298) -#define __NR_semget (__NR_SYSCALL_BASE+299) -#define __NR_semctl (__NR_SYSCALL_BASE+300) -#define __NR_msgsnd (__NR_SYSCALL_BASE+301) -#define __NR_msgrcv (__NR_SYSCALL_BASE+302) -#define __NR_msgget (__NR_SYSCALL_BASE+303) -#define __NR_msgctl (__NR_SYSCALL_BASE+304) -#define __NR_shmat (__NR_SYSCALL_BASE+305) -#define __NR_shmdt (__NR_SYSCALL_BASE+306) -#define __NR_shmget (__NR_SYSCALL_BASE+307) -#define __NR_shmctl (__NR_SYSCALL_BASE+308) -#define __NR_add_key (__NR_SYSCALL_BASE+309) -#define __NR_request_key (__NR_SYSCALL_BASE+310) -#define __NR_keyctl (__NR_SYSCALL_BASE+311) -#define __NR_semtimedop (__NR_SYSCALL_BASE+312) -#define __NR_vserver (__NR_SYSCALL_BASE+313) -#define __NR_ioprio_set (__NR_SYSCALL_BASE+314) -#define __NR_ioprio_get (__NR_SYSCALL_BASE+315) -#define __NR_inotify_init (__NR_SYSCALL_BASE+316) -#define __NR_inotify_add_watch (__NR_SYSCALL_BASE+317) -#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE+318) -#define __NR_mbind (__NR_SYSCALL_BASE+319) -#define __NR_get_mempolicy (__NR_SYSCALL_BASE+320) -#define __NR_set_mempolicy (__NR_SYSCALL_BASE+321) -#define __NR_openat (__NR_SYSCALL_BASE+322) -#define __NR_mkdirat (__NR_SYSCALL_BASE+323) -#define __NR_mknodat (__NR_SYSCALL_BASE+324) -#define __NR_fchownat (__NR_SYSCALL_BASE+325) -#define __NR_futimesat (__NR_SYSCALL_BASE+326) -#define __NR_fstatat64 (__NR_SYSCALL_BASE+327) -#define __NR_unlinkat (__NR_SYSCALL_BASE+328) -#define __NR_renameat (__NR_SYSCALL_BASE+329) -#define __NR_linkat (__NR_SYSCALL_BASE+330) -#define __NR_symlinkat (__NR_SYSCALL_BASE+331) -#define __NR_readlinkat (__NR_SYSCALL_BASE+332) -#define __NR_fchmodat (__NR_SYSCALL_BASE+333) -#define __NR_faccessat (__NR_SYSCALL_BASE+334) -#define __NR_pselect6 (__NR_SYSCALL_BASE+335) -#define __NR_ppoll (__NR_SYSCALL_BASE+336) -#define __NR_unshare (__NR_SYSCALL_BASE+337) -#define __NR_set_robust_list (__NR_SYSCALL_BASE+338) -#define __NR_get_robust_list (__NR_SYSCALL_BASE+339) -#define __NR_splice (__NR_SYSCALL_BASE+340) -#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE+341) +#include <asm/unistd-common.h> #define __NR_sync_file_range2 __NR_arm_sync_file_range -#define __NR_tee (__NR_SYSCALL_BASE+342) -#define __NR_vmsplice (__NR_SYSCALL_BASE+343) -#define __NR_move_pages (__NR_SYSCALL_BASE+344) -#define __NR_getcpu (__NR_SYSCALL_BASE+345) -#define __NR_epoll_pwait (__NR_SYSCALL_BASE+346) -#define __NR_kexec_load (__NR_SYSCALL_BASE+347) -#define __NR_utimensat (__NR_SYSCALL_BASE+348) -#define __NR_signalfd (__NR_SYSCALL_BASE+349) -#define __NR_timerfd_create (__NR_SYSCALL_BASE+350) -#define __NR_eventfd (__NR_SYSCALL_BASE+351) -#define __NR_fallocate (__NR_SYSCALL_BASE+352) -#define __NR_timerfd_settime (__NR_SYSCALL_BASE+353) -#define __NR_timerfd_gettime (__NR_SYSCALL_BASE+354) -#define __NR_signalfd4 (__NR_SYSCALL_BASE+355) -#define __NR_eventfd2 (__NR_SYSCALL_BASE+356) -#define __NR_epoll_create1 (__NR_SYSCALL_BASE+357) -#define __NR_dup3 (__NR_SYSCALL_BASE+358) -#define __NR_pipe2 (__NR_SYSCALL_BASE+359) -#define __NR_inotify_init1 (__NR_SYSCALL_BASE+360) -#define __NR_preadv (__NR_SYSCALL_BASE+361) -#define __NR_pwritev (__NR_SYSCALL_BASE+362) -#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) -#define __NR_perf_event_open (__NR_SYSCALL_BASE+364) -#define __NR_recvmmsg (__NR_SYSCALL_BASE+365) -#define __NR_accept4 (__NR_SYSCALL_BASE+366) -#define __NR_fanotify_init (__NR_SYSCALL_BASE+367) -#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) -#define __NR_prlimit64 (__NR_SYSCALL_BASE+369) -#define __NR_name_to_handle_at (__NR_SYSCALL_BASE+370) -#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371) -#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372) -#define __NR_syncfs (__NR_SYSCALL_BASE+373) -#define __NR_sendmmsg (__NR_SYSCALL_BASE+374) -#define __NR_setns (__NR_SYSCALL_BASE+375) -#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) -#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) -#define __NR_kcmp (__NR_SYSCALL_BASE+378) -#define __NR_finit_module (__NR_SYSCALL_BASE+379) -#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) -#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) -#define __NR_renameat2 (__NR_SYSCALL_BASE+382) -#define __NR_seccomp (__NR_SYSCALL_BASE+383) -#define __NR_getrandom (__NR_SYSCALL_BASE+384) -#define __NR_memfd_create (__NR_SYSCALL_BASE+385) -#define __NR_bpf (__NR_SYSCALL_BASE+386) -#define __NR_execveat (__NR_SYSCALL_BASE+387) -#define __NR_userfaultfd (__NR_SYSCALL_BASE+388) -#define __NR_membarrier (__NR_SYSCALL_BASE+389) -#define __NR_mlock2 (__NR_SYSCALL_BASE+390) -#define __NR_copy_file_range (__NR_SYSCALL_BASE+391) -#define __NR_preadv2 (__NR_SYSCALL_BASE+392) -#define __NR_pwritev2 (__NR_SYSCALL_BASE+393) /* * The following SWIs are ARM private. @@ -431,22 +36,4 @@ #define __ARM_NR_usr32 (__ARM_NR_BASE+4) #define __ARM_NR_set_tls (__ARM_NR_BASE+5) -/* - * The following syscalls are obsolete and no longer available for EABI. - */ -#if defined(__ARM_EABI__) -#undef __NR_time -#undef __NR_umount -#undef __NR_stime -#undef __NR_alarm -#undef __NR_utime -#undef __NR_getrlimit -#undef __NR_select -#undef __NR_readdir -#undef __NR_mmap -#undef __NR_socketcall -#undef __NR_syscall -#undef __NR_ipc -#endif - #endif /* __ASM_ARM_UNISTD_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index fd5a2761a5..651ec30040 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -201,10 +201,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* Device Control API on vcpu fd */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index c93cf35ce3..4edbe4bb0e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -413,6 +413,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -573,6 +593,10 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) +/* POWER9 registers */ +#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) +#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ @@ -596,6 +620,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ @@ -608,5 +633,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h index 1e66eba4c6..598043c7b6 100644 --- a/linux-headers/asm-powerpc/unistd.h +++ b/linux-headers/asm-powerpc/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h index e41c5c1a28..3a5397988e 100644 --- a/linux-headers/asm-x86/kvm_para.h +++ b/linux-headers/asm-x86/kvm_para.h @@ -45,7 +45,18 @@ struct kvm_steal_time { __u64 steal; __u32 version; __u32 flags; - __u32 pad[12]; + __u8 preempted; + __u8 u8_pad[3]; + __u32 pad[11]; +}; + +#define KVM_CLOCK_PAIRING_WALLCLOCK 0 +struct kvm_clock_pairing { + __s64 sec; + __s64 nsec; + __u64 tsc; + __u32 flags; + __u32 pad[9]; }; #define KVM_STEAL_ALIGNMENT_BITS 5 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index bb0ed71223..4e082a81b4 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -218,7 +218,8 @@ struct kvm_hyperv_exit { struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 immediate_exit; + __u8 padding1[6]; /* out */ __u32 exit_reason; @@ -651,6 +652,9 @@ struct kvm_enable_cap { }; /* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + struct kvm_ppc_pvinfo { /* out */ __u32 flags; @@ -682,7 +686,12 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; #define KVMIO 0xAE @@ -870,6 +879,10 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_INSTR0 130 #define KVM_CAP_MSI_DEVID 131 #define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_SPAPR_RESIZE_HPT 133 +#define KVM_CAP_PPC_MMU_RADIX 134 +#define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 #ifdef KVM_CAP_IRQ_ROUTING @@ -1186,6 +1199,13 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) /* Available with KVM_CAP_PPC_RTAS */ #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) +/* Available with KVM_CAP_SPAPR_RESIZE_HPT */ +#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) +#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) +/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) +/* Available with KVM_CAP_PPC_RADIX_MMU */ +#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h index e61661edf3..15b24ff6cf 100644 --- a/linux-headers/linux/kvm_para.h +++ b/linux-headers/linux/kvm_para.h @@ -14,6 +14,7 @@ #define KVM_EFAULT EFAULT #define KVM_E2BIG E2BIG #define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 @@ -23,6 +24,7 @@ #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 #define KVM_HC_MIPS_EXIT_VM 7 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 /* * hypercalls use architecture specific diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 19e8453249..2ed5dc3775 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -11,13 +11,18 @@ #include <linux/types.h> -#define UFFD_API ((__u64)0xAA) /* - * After implementing the respective features it will become: - * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ - * UFFD_FEATURE_EVENT_FORK) + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In + * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ + * means the userland is reading). */ -#define UFFD_API_FEATURES (0) +#define UFFD_API ((__u64)0xAA) +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ + UFFD_FEATURE_EVENT_REMAP | \ + UFFD_FEATURE_EVENT_MADVDONTNEED | \ + UFFD_FEATURE_MISSING_HUGETLBFS | \ + UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -26,6 +31,9 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE) +#define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY) /* * Valid ioctl command number range with this API is from 0x00 to @@ -72,6 +80,21 @@ struct uffd_msg { } pagefault; struct { + __u32 ufd; + } fork; + + struct { + __u64 from; + __u64 to; + __u64 len; + } remap; + + struct { + __u64 start; + __u64 end; + } madv_dn; + + struct { /* unused reserved fields */ __u64 reserved1; __u64 reserved2; @@ -84,9 +107,9 @@ struct uffd_msg { * Start at 0x12 and not at 0 to be more strict against bugs. */ #define UFFD_EVENT_PAGEFAULT 0x12 -#if 0 /* not available yet */ #define UFFD_EVENT_FORK 0x13 -#endif +#define UFFD_EVENT_REMAP 0x14 +#define UFFD_EVENT_MADVDONTNEED 0x15 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -104,11 +127,37 @@ struct uffdio_api { * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE * are to be considered implicitly always enabled in all kernels as * long as the uffdio_api.api requested matches UFFD_API. + * + * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER + * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on + * hugetlbfs virtual memory ranges. Adding or not adding + * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has + * no real functional effect after UFFDIO_API returns, but + * it's only useful for an initial feature set probe at + * UFFDIO_API time. There are two ways to use it: + * + * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the + * uffdio_api.features before calling UFFDIO_API, an error + * will be returned by UFFDIO_API on a kernel without + * hugetlbfs missing support + * + * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in + * uffdio_api.features and instead it will be set by the + * kernel in the uffdio_api.features if the kernel supports + * it, so userland can later check if the feature flag is + * present in uffdio_api.features after UFFDIO_API + * succeeded. + * + * UFFD_FEATURE_MISSING_SHMEM works the same as + * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem + * (i.e. tmpfs and other shmem based APIs). */ -#if 0 /* not available yet */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) -#endif +#define UFFD_FEATURE_EVENT_REMAP (1<<2) +#define UFFD_FEATURE_EVENT_MADVDONTNEED (1<<3) +#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) +#define UFFD_FEATURE_MISSING_SHMEM (1<<5) __u64 features; __u64 ioctls; diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 759b850a3e..531cb2eda9 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -203,6 +203,16 @@ struct vfio_device_info { }; #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) +/* + * Vendor driver using Mediated device framework should provide device_api + * attribute in supported type attribute groups. Device API string should be one + * of the following corresponding to device flags in vfio_device_info structure. + */ + +#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" +#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" +#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) diff --git a/migration/block.c b/migration/block.c index ebc10e628d..1941bc2402 100644 --- a/migration/block.c +++ b/migration/block.c @@ -379,7 +379,7 @@ static void unset_dirty_tracking(void) } } -static void init_blk_migration(QEMUFile *f) +static int init_blk_migration(QEMUFile *f) { BlockDriverState *bs; BlkMigDevState *bmds; @@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f) BlkMigDevState *bmds; BlockDriverState *bs; } *bmds_bs; + Error *local_err = NULL; + int ret; block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f) sectors = bdrv_nb_sectors(bs); if (sectors <= 0) { + ret = sectors; goto out; } bmds = g_new0(BlkMigDevState, 1); - bmds->blk = blk_new(); + bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; @@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f) BlockDriverState *bs = bmds_bs[i].bs; if (bmds) { - blk_insert_bs(bmds->blk, bs); + ret = blk_insert_bs(bmds->blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto out; + } alloc_aio_bitmap(bmds); error_setg(&bmds->blocker, "block device is in use by migration"); @@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f) } } + ret = 0; out: g_free(bmds_bs); + return ret; } /* Called with no lock taken. */ @@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque) block_mig_state.submitted, block_mig_state.transferred); qemu_mutex_lock_iothread(); - init_blk_migration(f); + ret = init_blk_migration(f); + if (ret < 0) { + qemu_mutex_unlock_iothread(); + return ret; + } /* start track dirty blocks */ ret = set_dirty_tracking(); diff --git a/migration/colo.c b/migration/colo.c index 712308ed5e..c19eb3f073 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -19,6 +19,8 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "migration/failover.h" +#include "replication.h" +#include "qmp-commands.h" static bool vmstate_loading; @@ -147,6 +149,53 @@ void colo_do_failover(MigrationState *s) } } +void qmp_xen_set_replication(bool enable, bool primary, + bool has_failover, bool failover, + Error **errp) +{ + ReplicationMode mode = primary ? + REPLICATION_MODE_PRIMARY : + REPLICATION_MODE_SECONDARY; + + if (has_failover && enable) { + error_setg(errp, "Parameter 'failover' is only for" + " stopping replication"); + return; + } + + if (enable) { + replication_start_all(mode, errp); + } else { + if (!has_failover) { + failover = NULL; + } + replication_stop_all(failover, failover ? NULL : errp); + } +} + +ReplicationStatus *qmp_query_xen_replication_status(Error **errp) +{ + Error *err = NULL; + ReplicationStatus *s = g_new0(ReplicationStatus, 1); + + replication_get_error_all(&err); + if (err) { + s->error = true; + s->has_desc = true; + s->desc = g_strdup(error_get_pretty(err)); + } else { + s->error = false; + } + + error_free(err); + return s; +} + +void qmp_xen_colo_do_checkpoint(Error **errp) +{ + replication_do_checkpoint_all(errp); +} + static void colo_send_message(QEMUFile *f, COLOMessage msg, Error **errp) { @@ -984,8 +984,10 @@ static void qmp_unregister_commands_hack(void) #ifndef TARGET_ARM qmp_unregister_command("query-gic-capabilities"); #endif -#if !defined(TARGET_S390X) +#if !defined(TARGET_S390X) && !defined(TARGET_I386) qmp_unregister_command("query-cpu-model-expansion"); +#endif +#if !defined(TARGET_S390X) qmp_unregister_command("query-cpu-model-baseline"); qmp_unregister_command("query-cpu-model-comparison"); #endif diff --git a/nbd/server.c b/nbd/server.c index ac92fa0727..924a1fe2db 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, { BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); + uint64_t perm; + int ret; - blk = blk_new(); - blk_insert_bs(blk, bs); + /* Don't allow resize while the NBD server is running, otherwise we don't + * care what happens with the node. */ + perm = BLK_PERM_CONSISTENT_READ; + if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { + perm |= BLK_PERM_WRITE; + } + blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } blk_set_enable_write_cache(blk, !writethrough); exp->refcount = 1; diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin Binary files differindex 229b5af986..18666c9f2f 100644 --- a/pc-bios/bios-256k.bin +++ b/pc-bios/bios-256k.bin diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin Binary files differindex 9a9b0f0106..a394411fe5 100644 --- a/pc-bios/bios.bin +++ b/pc-bios/bios.bin diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex cf05bf0be2..2a4adfa654 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 611102e3ef..b21c877b53 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -724,11 +724,17 @@ static void zipl_load_vscsi(void) void zipl_load(void) { - if (virtio_get_device()->is_cdrom) { + VDev *vdev = virtio_get_device(); + + if (vdev->is_cdrom) { ipl_iso_el_torito(); panic("\n! Cannot IPL this ISO image !\n"); } + if (virtio_get_device_type() == VIRTIO_ID_NET) { + jump_to_IPL_code(vdev->netboot_start_addr); + } + ipl_scsi(); switch (virtio_get_device_type()) { diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h index 86abc56a90..890aed9ece 100644 --- a/pc-bios/s390-ccw/iplb.h +++ b/pc-bios/s390-ccw/iplb.h @@ -13,7 +13,8 @@ #define IPLB_H struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index 345b848752..0946766d86 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no) if (!virtio_is_supported(blk_schid)) { continue; } + /* Skip net devices since no IPLB is created and therefore no + * no network bootloader has been loaded + */ + if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) { + continue; + } if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) { return true; } @@ -67,6 +73,7 @@ static void virtio_setup(void) int ssid; bool found = false; uint16_t dev_no; + VDev *vdev = virtio_get_device(); /* * We unconditionally enable mss support. In every sane configuration, @@ -85,9 +92,6 @@ static void virtio_setup(void) found = find_dev(&schib, dev_no); break; case S390_IPL_TYPE_QEMU_SCSI: - { - VDev *vdev = virtio_get_device(); - vdev->scsi_device_selected = true; vdev->selected_scsi_device.channel = iplb.scsi.channel; vdev->selected_scsi_device.target = iplb.scsi.target; @@ -95,7 +99,6 @@ static void virtio_setup(void) blk_schid.ssid = iplb.scsi.ssid & 0x3; found = find_dev(&schib, iplb.scsi.devno); break; - } default: panic("List-directed IPL not supported yet!\n"); } @@ -111,9 +114,14 @@ static void virtio_setup(void) IPL_assert(found, "No virtio device found"); - virtio_setup_device(blk_schid); + if (virtio_get_device_type() == VIRTIO_ID_NET) { + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = iplb.ccw.netboot_start_addr; + } else { + virtio_setup_device(blk_schid); - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } } int main(void) diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index b333734955..6ee93d56db 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid) switch (vdev.senseid.cu_model) { case VIRTIO_ID_BLOCK: case VIRTIO_ID_SCSI: + case VIRTIO_ID_NET: return true; } } diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index eb35ea5faf..3388a423e5 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -276,6 +276,7 @@ struct VDev { uint8_t scsi_dev_heads; bool scsi_device_selected; ScsiDevice selected_scsi_device; + uint64_t netboot_start_addr; }; typedef struct VDev VDev; diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin Binary files differindex 9dadce2345..e6c42bd3c3 100644 --- a/pc-bios/vgabios-cirrus.bin +++ b/pc-bios/vgabios-cirrus.bin diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin Binary files differindex a89725c81c..915eba7c81 100644 --- a/pc-bios/vgabios-qxl.bin +++ b/pc-bios/vgabios-qxl.bin diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin Binary files differindex ea041412a2..40eca8c6d1 100644 --- a/pc-bios/vgabios-stdvga.bin +++ b/pc-bios/vgabios-stdvga.bin diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin Binary files differindex 71e22fc868..8b3abfa003 100644 --- a/pc-bios/vgabios-virtio.bin +++ b/pc-bios/vgabios-virtio.bin diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin Binary files differindex ad239cbfe8..6a90b945bd 100644 --- a/pc-bios/vgabios-vmware.bin +++ b/pc-bios/vgabios-vmware.bin diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin Binary files differindex 9947c2c26f..d3ed89d94b 100644 --- a/pc-bios/vgabios.bin +++ b/pc-bios/vgabios.bin diff --git a/qapi-schema.json b/qapi-schema.json index 150ee98e9e..5eb1b8b628 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4274,6 +4274,15 @@ # migration-safe, but allows tooling to get an insight and work with # model details. # +# Note: When a non-migration-safe CPU model is expanded in static mode, some +# features enabled by the CPU model may be omitted, because they can't be +# implemented by a static CPU model definition (e.g. cache info passthrough and +# PMU passthrough in x86). If you need an accurate representation of the +# features enabled by a non-migration-safe CPU model, use @full. If you need a +# static representation that will keep ABI compatibility even when changing QEMU +# version or machine-type, use @static (but keep in mind that some features may +# be omitted). +# # Since: 2.8.0 ## { 'enum': 'CpuModelExpansionType', @@ -5990,6 +5999,79 @@ { 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} } ## +# @xen-set-replication: +# +# Enable or disable replication. +# +# @enable: true to enable, false to disable. +# +# @primary: true for primary or false for secondary. +# +# @failover: #optional true to do failover, false to stop. but cannot be +# specified if 'enable' is true. default value is false. +# +# Returns: nothing. +# +# Example: +# +# -> { "execute": "xen-set-replication", +# "arguments": {"enable": true, "primary": false} } +# <- { "return": {} } +# +# Since: 2.9 +## +{ 'command': 'xen-set-replication', + 'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' } } + +## +# @ReplicationStatus: +# +# The result format for 'query-xen-replication-status'. +# +# @error: true if an error happened, false if replication is normal. +# +# @desc: #optional the human readable error description string, when +# @error is 'true'. +# +# Since: 2.9 +## +{ 'struct': 'ReplicationStatus', + 'data': { 'error': 'bool', '*desc': 'str' } } + +## +# @query-xen-replication-status: +# +# Query replication status while the vm is running. +# +# Returns: A @ReplicationResult object showing the status. +# +# Example: +# +# -> { "execute": "query-xen-replication-status" } +# <- { "return": { "error": false } } +# +# Since: 2.9 +## +{ 'command': 'query-xen-replication-status', + 'returns': 'ReplicationStatus' } + +## +# @xen-colo-do-checkpoint: +# +# Xen uses this command to notify replication to trigger a checkpoint. +# +# Returns: nothing. +# +# Example: +# +# -> { "execute": "xen-colo-do-checkpoint" } +# <- { "return": {} } +# +# Since: 2.9 +## +{ 'command': 'xen-colo-do-checkpoint' } + +## # @GICCapability: # # The struct describes capability for a specific GIC (Generic diff --git a/qapi/block-core.json b/qapi/block-core.json index cf24c04242..5cc992fb8f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1304,6 +1304,11 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the commit job inserts into the graph +# above @top. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: Nothing on success # If commit or stream is already active on this device, DeviceInUse # If @device does not exist, DeviceNotFound @@ -1323,7 +1328,8 @@ ## { 'command': 'block-commit', 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', - '*backing-file': 'str', '*speed': 'int' } } + '*backing-file': 'str', '*speed': 'int', + '*filter-node-name': 'str' } } ## # @drive-backup: @@ -1671,6 +1677,11 @@ # default 'report' (no limitations, since this applies to # a different block device than @device). # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the mirror job inserts into the graph +# above @device. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: nothing on success. # # Since: 2.6 @@ -1690,7 +1701,8 @@ 'sync': 'MirrorSyncMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } + '*on-target-error': 'BlockdevOnError', + '*filter-node-name': 'str' } } ## # @block_set_io_throttle: diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index f054599a91..9c9702cc62 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -40,9 +40,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, diff --git a/qemu-img.c b/qemu-img.c index df3aefd35a..98b836b030 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void) " kinds of errors, with a higher risk of choosing the wrong fix or\n" " hiding corruption that has already occurred.\n" "\n" + "Parameters to convert subcommand:\n" + " '-m' specifies how many coroutines work in parallel during the convert\n" + " process (defaults to 8)\n" + " '-W' allow to write to the target out of order rather than sequential\n" + "\n" "Parameters to snapshot subcommand:\n" " 'snapshot' is the name of the snapshot to create, apply or delete\n" " '-a' applies a snapshot (revert disk to saved state)\n" @@ -809,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp) { AioContext *aio_context = blk_get_aio_context(job->blk); + /* FIXME In error cases, the job simply goes away and we access a dangling + * pointer below. */ aio_context_acquire(aio_context); do { aio_poll(aio_context, true); @@ -830,6 +837,7 @@ static int img_commit(int argc, char **argv) const char *filename, *fmt, *cache, *base; BlockBackend *blk; BlockDriverState *bs, *base_bs; + BlockJob *job; bool progress = false, quiet = false, drop = false; bool writethrough; Error *local_err = NULL; @@ -950,8 +958,8 @@ static int img_commit(int argc, char **argv) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0, - BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi, - &local_err, false); + BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, + &cbi, &local_err, false); aio_context_release(aio_context); if (local_err) { goto done; @@ -965,7 +973,8 @@ static int img_commit(int argc, char **argv) bdrv_ref(bs); } - run_block_job(bs->job, &local_err); + job = block_job_get("commit"); + run_block_job(job, &local_err); if (local_err) { goto unref_backing; } @@ -1462,48 +1471,61 @@ enum ImgConvertBlockStatus { BLK_BACKING_FILE, }; +#define MAX_COROUTINES 16 + typedef struct ImgConvertState { BlockBackend **src; int64_t *src_sectors; - int src_cur, src_num; - int64_t src_cur_offset; + int src_num; int64_t total_sectors; int64_t allocated_sectors; + int64_t allocated_done; + int64_t sector_num; + int64_t wr_offs; enum ImgConvertBlockStatus status; int64_t sector_next_status; BlockBackend *target; bool has_zero_init; bool compressed; bool target_has_backing; + bool wr_in_order; int min_sparse; size_t cluster_sectors; size_t buf_sectors; + int num_coroutines; + int running_coroutines; + Coroutine *co[MAX_COROUTINES]; + int64_t wait_sector_num[MAX_COROUTINES]; + CoMutex lock; + int ret; } ImgConvertState; -static void convert_select_part(ImgConvertState *s, int64_t sector_num) +static void convert_select_part(ImgConvertState *s, int64_t sector_num, + int *src_cur, int64_t *src_cur_offset) { - assert(sector_num >= s->src_cur_offset); - while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) { - s->src_cur_offset += s->src_sectors[s->src_cur]; - s->src_cur++; - assert(s->src_cur < s->src_num); + *src_cur = 0; + *src_cur_offset = 0; + while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) { + *src_cur_offset += s->src_sectors[*src_cur]; + (*src_cur)++; + assert(*src_cur < s->src_num); } } static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) { - int64_t ret; - int n; + int64_t ret, src_cur_offset; + int n, src_cur; - convert_select_part(s, sector_num); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); assert(s->total_sectors > sector_num); n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (s->sector_next_status <= sector_num) { BlockDriverState *file; - ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]), - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status(blk_bs(s->src[src_cur]), + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1519,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) /* Check block status of the backing file chain to avoid * needlessly reading zeroes and limiting the iteration to the * buffer size */ - ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL, - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL, + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1558,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) return n; } -static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, - uint8_t *buf) +static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf) { - int n; - int ret; + int n, ret; + QEMUIOVector qiov; + struct iovec iov; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { BlockBackend *blk; - int64_t bs_sectors; + int src_cur; + int64_t bs_sectors, src_cur_offset; /* In the case of compression with multiple source files, we can get a * nb_sectors that spreads into the next part. So we must be able to * read across multiple BDSes for one convert_read() call. */ - convert_select_part(s, sector_num); - blk = s->src[s->src_cur]; - bs_sectors = s->src_sectors[s->src_cur]; - - n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset)); - ret = blk_pread(blk, - (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); + blk = s->src[src_cur]; + bs_sectors = s->src_sectors[src_cur]; + + n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_preadv( + blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1592,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, - const uint8_t *buf) + +static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf, + enum ImgConvertBlockStatus status) { int ret; + QEMUIOVector qiov; + struct iovec iov; while (nb_sectors > 0) { int n = nb_sectors; - - switch (s->status) { + switch (status) { case BLK_BACKING_FILE: /* If we have a backing file, leave clusters unallocated that are * unallocated in the source image, so that the backing file is @@ -1621,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, break; } - ret = blk_pwrite_compressed(s->target, - sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, + BDRV_REQ_WRITE_COMPRESSED); if (ret < 0) { return ret; } @@ -1636,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (!s->min_sparse || is_allocated_sectors_min(buf, n, &n, s->min_sparse)) { - ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS, 0); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1649,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (s->has_zero_init) { break; } - ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, - n << BDRV_SECTOR_BITS, 0); + ret = blk_co_pwrite_zeroes(s->target, + sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, 0); if (ret < 0) { return ret; } @@ -1665,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_do_copy(ImgConvertState *s) +static void coroutine_fn convert_co_do_copy(void *opaque) { + ImgConvertState *s = opaque; uint8_t *buf = NULL; - int64_t sector_num, allocated_done; - int ret; - int n; + int ret, i; + int index = -1; + + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] == qemu_coroutine_self()) { + index = i; + break; + } + } + assert(index >= 0); + + s->running_coroutines++; + buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); + + while (1) { + int n; + int64_t sector_num; + enum ImgConvertBlockStatus status; + + qemu_co_mutex_lock(&s->lock); + if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { + qemu_co_mutex_unlock(&s->lock); + goto out; + } + n = convert_iteration_sectors(s, s->sector_num); + if (n < 0) { + qemu_co_mutex_unlock(&s->lock); + s->ret = n; + goto out; + } + /* save current sector and allocation status to local variables */ + sector_num = s->sector_num; + status = s->status; + if (!s->min_sparse && s->status == BLK_ZERO) { + n = MIN(n, s->buf_sectors); + } + /* increment global sector counter so that other coroutines can + * already continue reading beyond this request */ + s->sector_num += n; + qemu_co_mutex_unlock(&s->lock); + + if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) { + s->allocated_done += n; + qemu_progress_print(100.0 * s->allocated_done / + s->allocated_sectors, 0); + } + + if (status == BLK_DATA) { + ret = convert_co_read(s, sector_num, n, buf); + if (ret < 0) { + error_report("error while reading sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + } else if (!s->min_sparse && status == BLK_ZERO) { + status = BLK_DATA; + memset(buf, 0x00, n * BDRV_SECTOR_SIZE); + } + + if (s->wr_in_order) { + /* keep writes in order */ + while (s->wr_offs != sector_num) { + if (s->ret != -EINPROGRESS) { + goto out; + } + s->wait_sector_num[index] = sector_num; + qemu_coroutine_yield(); + } + s->wait_sector_num[index] = -1; + } + + ret = convert_co_write(s, sector_num, n, buf, status); + if (ret < 0) { + error_report("error while writing sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + + if (s->wr_in_order) { + /* reenter the coroutine that might have waited + * for this write to complete */ + s->wr_offs = sector_num + n; + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) { + /* + * A -> B -> A cannot occur because A has + * s->wait_sector_num[i] == -1 during A -> B. Therefore + * B will never enter A during this time window. + */ + qemu_coroutine_enter(s->co[i]); + break; + } + } + } + } + +out: + qemu_vfree(buf); + s->co[index] = NULL; + s->running_coroutines--; + if (!s->running_coroutines && s->ret == -EINPROGRESS) { + /* the convert job finished successfully */ + s->ret = 0; + } +} + +static int convert_do_copy(ImgConvertState *s) +{ + int ret, i, n; + int64_t sector_num = 0; /* Check whether we have zero initialisation or can get it efficiently */ s->has_zero_init = s->min_sparse && !s->target_has_backing @@ -1691,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s) if (s->compressed) { if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) { error_report("invalid cluster size"); - ret = -EINVAL; - goto fail; + return -EINVAL; } s->buf_sectors = s->cluster_sectors; } - buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); - /* Calculate allocated sectors for progress */ - s->allocated_sectors = 0; - sector_num = 0; while (sector_num < s->total_sectors) { n = convert_iteration_sectors(s, sector_num); if (n < 0) { - ret = n; - goto fail; + return n; } if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) { @@ -1715,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s) } /* Do the copy */ - s->src_cur = 0; - s->src_cur_offset = 0; s->sector_next_status = 0; + s->ret = -EINPROGRESS; - sector_num = 0; - allocated_done = 0; - - while (sector_num < s->total_sectors) { - n = convert_iteration_sectors(s, sector_num); - if (n < 0) { - ret = n; - goto fail; - } - if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) - { - allocated_done += n; - qemu_progress_print(100.0 * allocated_done / s->allocated_sectors, - 0); - } - - if (s->status == BLK_DATA) { - ret = convert_read(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while reading sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } - } else if (!s->min_sparse && s->status == BLK_ZERO) { - n = MIN(n, s->buf_sectors); - memset(buf, 0, n * BDRV_SECTOR_SIZE); - s->status = BLK_DATA; - } - - ret = convert_write(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while writing sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } + qemu_co_mutex_init(&s->lock); + for (i = 0; i < s->num_coroutines; i++) { + s->co[i] = qemu_coroutine_create(convert_co_do_copy, s); + s->wait_sector_num[i] = -1; + qemu_coroutine_enter(s->co[i]); + } - sector_num += n; + while (s->ret == -EINPROGRESS) { + main_loop_wait(false); } - if (s->compressed) { + if (s->compressed && !s->ret) { /* signal EOF to align */ ret = blk_pwrite_compressed(s->target, 0, NULL, 0); if (ret < 0) { - goto fail; + return ret; } } - ret = 0; -fail: - qemu_vfree(buf); - return ret; + return s->ret; } static int img_convert(int argc, char **argv) @@ -1797,6 +1909,8 @@ static int img_convert(int argc, char **argv) QemuOpts *sn_opts = NULL; ImgConvertState state; bool image_opts = false; + bool wr_in_order = true; + long num_coroutines = 8; fmt = NULL; out_fmt = "raw"; @@ -1812,7 +1926,7 @@ static int img_convert(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn", + c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W", long_options, NULL); if (c == -1) { break; @@ -1904,6 +2018,18 @@ static int img_convert(int argc, char **argv) case 'n': skip_create = 1; break; + case 'm': + if (qemu_strtol(optarg, NULL, 0, &num_coroutines) || + num_coroutines < 1 || num_coroutines > MAX_COROUTINES) { + error_report("Invalid number of coroutines. Allowed number of" + " coroutines is between 1 and %d", MAX_COROUTINES); + ret = -1; + goto fail_getopt; + } + break; + case 'W': + wr_in_order = false; + break; case OPTION_OBJECT: opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); @@ -1923,6 +2049,12 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } + if (!wr_in_order && compress) { + error_report("Out of order write and compress are mutually exclusive"); + ret = -1; + goto fail_getopt; + } + /* Initialize before goto out */ if (quiet) { progress = 0; @@ -2163,6 +2295,8 @@ static int img_convert(int argc, char **argv) .min_sparse = min_sparse, .cluster_sectors = cluster_sectors, .buf_sectors = bufsectors, + .wr_in_order = wr_in_order, + .num_coroutines = num_coroutines, }; ret = convert_do_copy(&state); @@ -3289,7 +3423,7 @@ static int img_resize(int argc, char **argv) qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, - BDRV_O_RDWR, false, quiet); + BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet); if (!blk) { ret = -1; goto out; diff --git a/qemu-img.texi b/qemu-img.texi index 174aae38b7..c81db3e81c 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -137,6 +137,12 @@ Parameters to convert subcommand: @item -n Skip the creation of the target volume +@item -m +Number of parallel coroutines for the convert process +@item -W +Allow out-of-order writes to the destination. This option improves performance, +but is only recommended for preallocated devices like host devices or other +raw block devices. @end table Parameters to dd subcommand: @@ -296,7 +302,7 @@ Error on reading data @end table -@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated) to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} @@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target volume has already been created with site specific options that cannot be supplied through qemu-img. +Out of order writes can be enabled with @code{-W} to improve performance. +This is only recommended for preallocated devices like host devices or other +raw block devices. Out of order write does not work in combination with +creating compressed images. + +@var{num_coroutines} specifies how many coroutines work in parallel during +the convert process (defaults to 8). + @item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} Dd copies from @var{input} file to @var{output} file converting it from diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 7ac1576d4c..2c48f9ce1a 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, } return 0; } + + /* Request additional permissions if necessary for this command. The caller + * is responsible for restoring the original permissions afterwards if this + * is what it wants. */ + if (ct->perm && blk_is_available(blk)) { + uint64_t orig_perm, orig_shared_perm; + blk_get_perm(blk, &orig_perm, &orig_shared_perm); + + if (ct->perm & ~orig_perm) { + uint64_t new_perm; + Error *local_err = NULL; + int ret; + + new_perm = orig_perm | ct->perm; + + ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err); + if (ret < 0) { + error_report_err(local_err); + return 0; + } + } + } + optind = 0; return ct->cfunc(blk, argc, argv); } @@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = { .name = "write", .altname = "w", .cfunc = write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-bcCfquz] [-P pattern] off len", @@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t writev_cmd = { .name = "writev", .cfunc = writev_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfq] [-P pattern] off len [len..]", @@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t aio_write_cmd = { .name = "aio_write", .cfunc = aio_write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfiquz] [-P pattern] off len [len..]", @@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = { .name = "truncate", .altname = "t", .cfunc = truncate_f, + .perm = BLK_PERM_WRITE | BLK_PERM_RESIZE, .argmin = 1, .argmax = 1, .args = "off", @@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = { .name = "discard", .altname = "d", .cfunc = discard_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cq] off len", diff --git a/roms/seabios b/roms/seabios -Subproject 8891697e3f7d84355420573efd98e94f1473676 +Subproject 5f4c7b13cdf9c450eb55645f4362ea58fa61b79 diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 72cf1fbf0a..6a370a8669 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -75,7 +75,13 @@ for arch in $ARCHLIST; do continue fi - make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install + if [ "$arch" = x86 ]; then + arch_var=SRCARCH + else + arch_var=ARCH + fi + + make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" @@ -92,6 +98,11 @@ for arch in $ARCHLIST; do cp_portable "$tmpdir/include/asm/kvm_virtio.h" "$output/include/standard-headers/asm-s390/" cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" fi + if [ $arch = arm ]; then + cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi if [ $arch = x86 ]; then cp_portable "$tmpdir/include/asm/hyperv.h" "$output/include/standard-headers/asm-x86/" cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 9e7b2dfc83..25ceaabb5d 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -521,6 +521,8 @@ typedef struct CPUARMState { void *nvic; const struct arm_boot_info *boot_info; + /* Store GICv3CPUState to access from this struct */ + void *gicv3state; } CPUARMState; /** diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h index 8cd607e9a2..c2205e6077 100644 --- a/target/i386/cpu-qom.h +++ b/target/i386/cpu-qom.h @@ -48,7 +48,9 @@ typedef struct X86CPUDefinition X86CPUDefinition; * X86CPUClass: * @cpu_def: CPU model definition * @kvm_required: Whether CPU model requires KVM to be enabled. + * @ordering: Ordering on the "-cpu help" CPU model list. * @migration_safe: See CpuDefinitionInfo::migration_safe + * @static_model: See CpuDefinitionInfo::static * @parent_realize: The parent class' realize handler. * @parent_reset: The parent class' reset handler. * @@ -59,11 +61,15 @@ typedef struct X86CPUClass { CPUClass parent_class; /*< public >*/ - /* Should be eventually replaced by subclass-specific property defaults. */ + /* CPU definition, automatically loaded by instance_init if not NULL. + * Should be eventually replaced by subclass-specific property defaults. + */ X86CPUDefinition *cpu_def; bool kvm_required; + int ordering; bool migration_safe; + bool static_model; /* Optional description of CPU model. * If unavailable, cpu_def->model_id is used */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index b6f157dca3..89421c893b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -29,10 +29,16 @@ #include "qemu/option.h" #include "qemu/config-file.h" #include "qapi/qmp/qerror.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qbool.h" +#include "qapi/qmp/qint.h" +#include "qapi/qmp/qfloat.h" #include "qapi-types.h" #include "qapi-visit.h" #include "qapi/visitor.h" +#include "qom/qom-qobject.h" #include "sysemu/arch_init.h" #if defined(CONFIG_KVM) @@ -1503,15 +1509,15 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value) static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, bool migratable_only); -#ifdef CONFIG_KVM - static bool lmce_supported(void) { - uint64_t mce_cap; + uint64_t mce_cap = 0; +#ifdef CONFIG_KVM if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) { return false; } +#endif return !!(mce_cap & MCG_LMCE_P); } @@ -1531,51 +1537,28 @@ static int cpu_x86_fill_model_id(char *str) return 0; } -static X86CPUDefinition host_cpudef; - -static Property host_x86_cpu_properties[] = { +static Property max_x86_cpu_properties[] = { DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true), DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false), DEFINE_PROP_END_OF_LIST() }; -/* class_init for the "host" CPU model - * - * This function may be called before KVM is initialized. - */ -static void host_x86_cpu_class_init(ObjectClass *oc, void *data) +static void max_x86_cpu_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); X86CPUClass *xcc = X86_CPU_CLASS(oc); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - - xcc->kvm_required = true; - - host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx); - - host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); - host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); - host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); - host_cpudef.stepping = eax & 0x0F; - cpu_x86_fill_model_id(host_cpudef.model_id); + xcc->ordering = 9; - xcc->cpu_def = &host_cpudef; xcc->model_description = - "KVM processor with all supported host features " - "(only available in KVM mode)"; - - /* level, xlevel, xlevel2, and the feature words are initialized on - * instance_init, because they require KVM to be initialized. - */ + "Enables all features supported by the accelerator in the current host"; - dc->props = host_x86_cpu_properties; - /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */ - dc->cannot_destroy_with_object_finalize_yet = true; + dc->props = max_x86_cpu_properties; } -static void host_x86_cpu_initfn(Object *obj) +static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp); + +static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); CPUX86State *env = &cpu->env; @@ -1584,10 +1567,24 @@ static void host_x86_cpu_initfn(Object *obj) /* We can't fill the features array here because we don't know yet if * "migratable" is true or false. */ - cpu->host_features = true; + cpu->max_features = true; - /* If KVM is disabled, x86_cpu_realizefn() will report an error later */ if (kvm_enabled()) { + X86CPUDefinition host_cpudef = { }; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + + host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); + x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx); + + host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); + host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + host_cpudef.stepping = eax & 0x0F; + + cpu_x86_fill_model_id(host_cpudef.model_id); + + x86_cpu_load_def(cpu, &host_cpudef, &error_abort); + env->cpuid_min_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); env->cpuid_min_xlevel = @@ -1598,15 +1595,44 @@ static void host_x86_cpu_initfn(Object *obj) if (lmce_supported()) { object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort); } + } else { + object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD, + "vendor", &error_abort); + object_property_set_int(OBJECT(cpu), 6, "family", &error_abort); + object_property_set_int(OBJECT(cpu), 6, "model", &error_abort); + object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort); + object_property_set_str(OBJECT(cpu), + "QEMU TCG CPU version " QEMU_HW_VERSION, + "model-id", &error_abort); } object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort); } +static const TypeInfo max_x86_cpu_type_info = { + .name = X86_CPU_TYPE_NAME("max"), + .parent = TYPE_X86_CPU, + .instance_init = max_x86_cpu_initfn, + .class_init = max_x86_cpu_class_init, +}; + +#ifdef CONFIG_KVM + +static void host_x86_cpu_class_init(ObjectClass *oc, void *data) +{ + X86CPUClass *xcc = X86_CPU_CLASS(oc); + + xcc->kvm_required = true; + xcc->ordering = 8; + + xcc->model_description = + "KVM processor with all supported host features " + "(only available in KVM mode)"; +} + static const TypeInfo host_x86_cpu_type_info = { .name = X86_CPU_TYPE_NAME("host"), - .parent = TYPE_X86_CPU, - .instance_init = host_x86_cpu_initfn, + .parent = X86_CPU_TYPE_NAME("max"), .class_init = host_x86_cpu_class_init, }; @@ -2060,7 +2086,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } } -static void x86_cpu_load_features(X86CPU *cpu, Error **errp); +static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); static int x86_cpu_filter_features(X86CPU *cpu); /* Check for missing features that may prevent the CPU class from @@ -2083,9 +2109,9 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc)))); - x86_cpu_load_features(xc, &err); + x86_cpu_expand_features(xc, &err); if (err) { - /* Errors at x86_cpu_load_features should never happen, + /* Errors at x86_cpu_expand_features should never happen, * but in case it does, just report the model as not * runnable at all using the "type" property. */ @@ -2128,7 +2154,7 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset) } } -/* Sort alphabetically by type name, listing kvm_required models last. */ +/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) { ObjectClass *class_a = (ObjectClass *)a; @@ -2137,9 +2163,8 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) X86CPUClass *cc_b = X86_CPU_CLASS(class_b); const char *name_a, *name_b; - if (cc_a->kvm_required != cc_b->kvm_required) { - /* kvm_required items go last */ - return cc_a->kvm_required ? 1 : -1; + if (cc_a->ordering != cc_b->ordering) { + return cc_a->ordering - cc_b->ordering; } else { name_a = object_class_get_name(class_a); name_b = object_class_get_name(class_b); @@ -2161,7 +2186,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data) CPUListState *s = user_data; char *name = x86_cpu_class_get_model_name(cc); const char *desc = cc->model_description; - if (!desc) { + if (!desc && cc->cpu_def) { desc = cc->cpu_def->model_id; } @@ -2210,6 +2235,7 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data) info->q_typename = g_strdup(object_class_get_name(oc)); info->migration_safe = cc->migration_safe; info->has_migration_safe = true; + info->q_static = cc->static_model; entry = g_malloc0(sizeof(*entry)); entry->value = info; @@ -2247,31 +2273,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, return r; } -/* - * Filters CPU feature words based on host availability of each feature. - * - * Returns: 0 if all flags are supported by the host, non-zero otherwise. - */ -static int x86_cpu_filter_features(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - FeatureWord w; - int rv = 0; - - for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t host_feat = - x86_cpu_get_supported_feature_word(w, false); - uint32_t requested_features = env->features[w]; - env->features[w] &= host_feat; - cpu->filtered_features[w] = requested_features & ~env->features[w]; - if (cpu->filtered_features[w]) { - rv = 1; - } - } - - return rv; -} - static void x86_cpu_report_filtered_features(X86CPU *cpu) { FeatureWord w; @@ -2293,7 +2294,7 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) } } -/* Load data from X86CPUDefinition +/* Load data from X86CPUDefinition into a X86CPU object */ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) { @@ -2302,6 +2303,11 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) char host_vendor[CPUID_VENDOR_SZ + 1]; FeatureWord w; + /*NOTE: any property set by this function should be returned by + * x86_cpu_static_props(), so static expansion of + * query-cpu-model-expansion is always complete. + */ + /* CPU models only set _minimum_ values for level/xlevel: */ object_property_set_int(OBJECT(cpu), def->level, "min-level", errp); object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp); @@ -2346,6 +2352,212 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) } +/* Return a QDict containing keys for all properties that can be included + * in static expansion of CPU models. All properties set by x86_cpu_load_def() + * must be included in the dictionary. + */ +static QDict *x86_cpu_static_props(void) +{ + FeatureWord w; + int i; + static const char *props[] = { + "min-level", + "min-xlevel", + "family", + "model", + "stepping", + "model-id", + "vendor", + "lmce", + NULL, + }; + static QDict *d; + + if (d) { + return d; + } + + d = qdict_new(); + for (i = 0; props[i]; i++) { + qdict_put_obj(d, props[i], qnull()); + } + + for (w = 0; w < FEATURE_WORDS; w++) { + FeatureWordInfo *fi = &feature_word_info[w]; + int bit; + for (bit = 0; bit < 32; bit++) { + if (!fi->feat_names[bit]) { + continue; + } + qdict_put_obj(d, fi->feat_names[bit], qnull()); + } + } + + return d; +} + +/* Add an entry to @props dict, with the value for property. */ +static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop) +{ + QObject *value = object_property_get_qobject(OBJECT(cpu), prop, + &error_abort); + + qdict_put_obj(props, prop, value); +} + +/* Convert CPU model data from X86CPU object to a property dictionary + * that can recreate exactly the same CPU model. + */ +static void x86_cpu_to_dict(X86CPU *cpu, QDict *props) +{ + QDict *sprops = x86_cpu_static_props(); + const QDictEntry *e; + + for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) { + const char *prop = qdict_entry_key(e); + x86_cpu_expand_prop(cpu, props, prop); + } +} + +/* Convert CPU model data from X86CPU object to a property dictionary + * that can recreate exactly the same CPU model, including every + * writeable QOM property. + */ +static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props) +{ + ObjectPropertyIterator iter; + ObjectProperty *prop; + + object_property_iter_init(&iter, OBJECT(cpu)); + while ((prop = object_property_iter_next(&iter))) { + /* skip read-only or write-only properties */ + if (!prop->get || !prop->set) { + continue; + } + + /* "hotplugged" is the only property that is configurable + * on the command-line but will be set differently on CPUs + * created using "-cpu ... -smp ..." and by CPUs created + * on the fly by x86_cpu_from_model() for querying. Skip it. + */ + if (!strcmp(prop->name, "hotplugged")) { + continue; + } + x86_cpu_expand_prop(cpu, props, prop->name); + } +} + +static void object_apply_props(Object *obj, QDict *props, Error **errp) +{ + const QDictEntry *prop; + Error *err = NULL; + + for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) { + object_property_set_qobject(obj, qdict_entry_value(prop), + qdict_entry_key(prop), &err); + if (err) { + break; + } + } + + error_propagate(errp, err); +} + +/* Create X86CPU object according to model+props specification */ +static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp) +{ + X86CPU *xc = NULL; + X86CPUClass *xcc; + Error *err = NULL; + + xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model)); + if (xcc == NULL) { + error_setg(&err, "CPU model '%s' not found", model); + goto out; + } + + xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc)))); + if (props) { + object_apply_props(OBJECT(xc), props, &err); + if (err) { + goto out; + } + } + + x86_cpu_expand_features(xc, &err); + if (err) { + goto out; + } + +out: + if (err) { + error_propagate(errp, err); + object_unref(OBJECT(xc)); + xc = NULL; + } + return xc; +} + +CpuModelExpansionInfo * +arch_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + X86CPU *xc = NULL; + Error *err = NULL; + CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1); + QDict *props = NULL; + const char *base_name; + + xc = x86_cpu_from_model(model->name, + model->has_props ? + qobject_to_qdict(model->props) : + NULL, &err); + if (err) { + goto out; + } + + props = qdict_new(); + + switch (type) { + case CPU_MODEL_EXPANSION_TYPE_STATIC: + /* Static expansion will be based on "base" only */ + base_name = "base"; + x86_cpu_to_dict(xc, props); + break; + case CPU_MODEL_EXPANSION_TYPE_FULL: + /* As we don't return every single property, full expansion needs + * to keep the original model name+props, and add extra + * properties on top of that. + */ + base_name = model->name; + x86_cpu_to_dict_full(xc, props); + break; + default: + error_setg(&err, "Unsupportted expansion type"); + goto out; + } + + if (!props) { + props = qdict_new(); + } + x86_cpu_to_dict(xc, props); + + ret->model = g_new0(CpuModelInfo, 1); + ret->model->name = g_strdup(base_name); + ret->model->props = QOBJECT(props); + ret->model->has_props = true; + +out: + object_unref(OBJECT(xc)); + if (err) { + error_propagate(errp, err); + qapi_free_CpuModelExpansionInfo(ret); + ret = NULL; + } + return ret; +} + X86CPU *cpu_x86_init(const char *cpu_model) { return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model)); @@ -3095,20 +3307,59 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; } -/* Load CPUID data based on configured features */ -static void x86_cpu_load_features(X86CPU *cpu, Error **errp) +/***** Steps involved on loading and filtering CPUID data + * + * When initializing and realizing a CPU object, the steps + * involved in setting up CPUID data are: + * + * 1) Loading CPU model definition (X86CPUDefinition). This is + * implemented by x86_cpu_load_def() and should be completely + * transparent, as it is done automatically by instance_init. + * No code should need to look at X86CPUDefinition structs + * outside instance_init. + * + * 2) CPU expansion. This is done by realize before CPUID + * filtering, and will make sure host/accelerator data is + * loaded for CPU models that depend on host capabilities + * (e.g. "host"). Done by x86_cpu_expand_features(). + * + * 3) CPUID filtering. This initializes extra data related to + * CPUID, and checks if the host supports all capabilities + * required by the CPU. Runnability of a CPU model is + * determined at this step. Done by x86_cpu_filter_features(). + * + * Some operations don't require all steps to be performed. + * More precisely: + * + * - CPU instance creation (instance_init) will run only CPU + * model loading. CPU expansion can't run at instance_init-time + * because host/accelerator data may be not available yet. + * - CPU realization will perform both CPU model expansion and CPUID + * filtering, and return an error in case one of them fails. + * - query-cpu-definitions needs to run all 3 steps. It needs + * to run CPUID filtering, as the 'unavailable-features' + * field is set based on the filtering results. + * - The query-cpu-model-expansion QMP command only needs to run + * CPU model loading and CPU expansion. It should not filter + * any CPUID data based on host capabilities. + */ + +/* Expand CPU configuration data, based on configured features + * and host/accelerator capabilities when appropriate. + */ +static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { CPUX86State *env = &cpu->env; FeatureWord w; GList *l; Error *local_err = NULL; - /*TODO: cpu->host_features incorrectly overwrites features + /*TODO: cpu->max_features incorrectly overwrites features * set using "feat=on|off". Once we fix this, we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ - if (cpu->host_features) { + if (cpu->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { env->features[w] = x86_cpu_get_supported_feature_word(w, cpu->migratable); @@ -3173,6 +3424,32 @@ out: } } +/* + * Finishes initialization of CPUID data, filters CPU feature + * words based on host availability of each feature. + * + * Returns: 0 if all flags are supported by the host, non-zero otherwise. + */ +static int x86_cpu_filter_features(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + FeatureWord w; + int rv = 0; + + for (w = 0; w < FEATURE_WORDS; w++) { + uint32_t host_feat = + x86_cpu_get_supported_feature_word(w, false); + uint32_t requested_features = env->features[w]; + env->features[w] &= host_feat; + cpu->filtered_features[w] = requested_features & ~env->features[w]; + if (cpu->filtered_features[w]) { + rv = 1; + } + } + + return rv; +} + #define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \ (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \ (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3) @@ -3200,7 +3477,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) return; } - x86_cpu_load_features(cpu, &local_err); + x86_cpu_expand_features(cpu, &local_err); if (local_err) { goto out; } @@ -3619,7 +3896,9 @@ static void x86_cpu_initfn(Object *obj) object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort); object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort); - x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort); + if (xcc->cpu_def) { + x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort); + } } static int64_t x86_cpu_get_arch_id(CPUState *cs) @@ -3774,6 +4053,24 @@ static const TypeInfo x86_cpu_type_info = { .class_init = x86_cpu_common_class_init, }; + +/* "base" CPU model, used by query-cpu-model-expansion */ +static void x86_cpu_base_class_init(ObjectClass *oc, void *data) +{ + X86CPUClass *xcc = X86_CPU_CLASS(oc); + + xcc->static_model = true; + xcc->migration_safe = true; + xcc->model_description = "base CPU model type with no features enabled"; + xcc->ordering = 8; +} + +static const TypeInfo x86_base_cpu_type_info = { + .name = X86_CPU_TYPE_NAME("base"), + .parent = TYPE_X86_CPU, + .class_init = x86_cpu_base_class_init, +}; + static void x86_cpu_register_types(void) { int i; @@ -3782,6 +4079,8 @@ static void x86_cpu_register_types(void) for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) { x86_register_cpudef_type(&builtin_x86_defs[i]); } + type_register_static(&max_x86_cpu_type_info); + type_register_static(&x86_base_cpu_type_info); #ifdef CONFIG_KVM type_register_static(&host_x86_cpu_type_info); #endif diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 573f2aa988..12a39d590f 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1211,7 +1211,7 @@ struct X86CPU { bool enforce_cpuid; bool expose_kvm; bool migratable; - bool host_features; + bool max_features; /* Enable all supported features automatically */ uint32_t apic_id; /* Enables publishing of TSC increment and Local APIC bus frequencies to diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 6d227a5a6a..290de6dae6 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) } } -static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, +static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, TCGArg b, bool b_const, TCGLabel *l) { intptr_t offset; @@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, } } -static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, +static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, tcg_target_long bl, tcg_target_long bh, bool const_bl, bool const_bh, bool sub) diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker new file mode 100644 index 0000000000..bbb21ed088 --- /dev/null +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -0,0 +1,22 @@ +# +# Docker s390 cross-compiler target +# +# This docker target is based on stretch (testing) as the stable build +# doesn't have the cross compiler available. +# +FROM debian:testing-slim + +# Duplicate deb line as deb-src +RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list + +# Add the s390x architecture +RUN dpkg --add-architecture s390x + +# Grab the updated list of packages +RUN apt update +RUN apt dist-upgrade -yy +RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install +RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch + +# Specify the cross prefix for this image (see tests/docker/common.rc) +ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu- diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 4673b67f37..34e66db691 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1024' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1k' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte @@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. +qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a size -You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes. +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index e206ad6c29..c6f4eef215 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -179,7 +179,7 @@ q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: Can't use a read-only drive +(qemu) QEMU_PROG: Block node is read-only QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on @@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 1d3fd04b65..aafcd249f6 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") self.vm.launch() def tearDown(self): @@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase): qemu_img('create', '-f', fmt, blockdev_target_img, str(TestDriveCompression.image_len), *args) - self.vm.add_drive(blockdev_target_img, format=fmt) + self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") self.vm.launch() diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 08e4bb7218..182acb42cf 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ === Invalid command - snapshot node used as backing hd === -{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}} +{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} === Invalid command - snapshot node has a backing image === diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 index 3ba79f027a..6d8f0a1a84 100755 --- a/tests/qemu-iotests/141 +++ b/tests/qemu-iotests/141 @@ -67,7 +67,7 @@ test_blockjob() _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'x-blockdev-del', 'arguments': {'node-name': 'drv0'}}" \ - 'error' + 'error' | _filter_generated_node_ids _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'block-job-cancel', diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out index 195ca1a604..82e763b68d 100644 --- a/tests/qemu-iotests/141.out +++ b/tests/qemu-iotests/141.out @@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} @@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 6b7edaf28f..54b53293d7 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -28,6 +28,7 @@ Testing: opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 @@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 @@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 @@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 @@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 @@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 @@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 @@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 @@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 @@ -964,6 +1009,7 @@ Testing: -device floppy opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -device floppy,drive-type=120 @@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -device floppy,drive-type=144 @@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -device floppy,drive-type=288 @@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288 @@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512 @@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096 diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index f6dfd08746..4ccbda14af 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations, g_assert_nonnull(bs); snprintf(job_id, sizeof(job_id), "job%u", counter++); - s = block_job_create(job_id, &test_block_job_driver, bs, 0, - BLOCK_JOB_DEFAULT, test_block_job_cb, - data, &error_abort); + s = block_job_create(job_id, &test_block_job_driver, bs, + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, + test_block_job_cb, data, &error_abort); s->iterations = iterations; s->use_timer = use_timer; s->rc = rc; diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 068c9e419b..740e740398 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, BlockJob *job; Error *errp = NULL; - job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0, - BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp); + job = block_job_create(id, &test_block_job_driver, blk_bs(blk), + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb, + NULL, &errp); if (should_succeed) { g_assert_null(errp); g_assert_nonnull(job); @@ -53,13 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, * BlockDriverState inserted. */ static BlockBackend *create_blk(const char *name) { - BlockBackend *blk = blk_new(); + /* No I/O is performed on this device */ + BlockBackend *blk = blk_new(0, BLK_PERM_ALL); BlockDriverState *bs; bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); g_assert_nonnull(bs); - blk_insert_bs(blk, bs); + blk_insert_bs(blk, bs, &error_abort); bdrv_unref(bs); if (name) { diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 363b59a38f..bd7c501b2e 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -593,9 +593,10 @@ static void test_groups(void) BlockBackend *blk1, *blk2, *blk3; BlockBackendPublic *blkp1, *blkp2, *blkp3; - blk1 = blk_new(); - blk2 = blk_new(); - blk3 = blk_new(); + /* No actual I/O is performed on these devices */ + blk1 = blk_new(0, BLK_PERM_ALL); + blk2 = blk_new(0, BLK_PERM_ALL); + blk3 = blk_new(0, BLK_PERM_ALL); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); diff --git a/util/qemu-option.c b/util/qemu-option.c index 419f2528b8..5ce1b5c246 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value, err = qemu_strtosz(value, NULL, &size); if (err == -ERANGE) { - error_setg(errp, "Value '%s' is too large for parameter '%s'", + error_setg(errp, "Value '%s' is out of range for parameter '%s'", value, name); return; } |