diff options
199 files changed, 8435 insertions, 2250 deletions
diff --git a/.shippable.yml b/.shippable.yml index 1a1fd7a91d..653bd750fe 100644 --- a/.shippable.yml +++ b/.shippable.yml @@ -5,6 +5,8 @@ env: TARGET_LIST=arm-softmmu,arm-linux-user - IMAGE=debian-arm64-cross TARGET_LIST=aarch64-softmmu,aarch64-linux-user + - IMAGE=debian-s390x-cross + TARGET_LIST=s390x-softmmu,s390x-linux-user build: pre_ci: - make docker-image-${IMAGE} diff --git a/CODING_STYLE b/CODING_STYLE index f53180bf3f..2fa0c0b65b 100644 --- a/CODING_STYLE +++ b/CODING_STYLE @@ -116,3 +116,10 @@ if (a == 1) { Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read. Besides, good compilers already warn users when '==' is mis-typed as '=', even when the constant is on the right. + +7. Comment style + +We use traditional C-style /* */ comments and avoid // comments. + +Rationale: The // form is valid in C99, so this is purely a matter of +consistency of style. The checkpatch script will warn you about this. @@ -707,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) return 0; } +static char *bdrv_child_get_parent_desc(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + return g_strdup(bdrv_get_device_or_node_name(parent)); +} + static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -774,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_file = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -794,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, } const BdrvChildRole child_format = { + .get_parent_desc = bdrv_child_get_parent_desc, .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, }; +static void bdrv_backing_attach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + BlockDriverState *backing_hd = c->bs; + + assert(!parent->backing_blocker); + error_setg(&parent->backing_blocker, + "node is used as backing hd of '%s'", + bdrv_get_device_or_node_name(parent)); + + parent->open_flags &= ~BDRV_O_NO_BACKING; + pstrcpy(parent->backing_file, sizeof(parent->backing_file), + backing_hd->filename); + pstrcpy(parent->backing_format, sizeof(parent->backing_format), + backing_hd->drv ? backing_hd->drv->format_name : ""); + + bdrv_op_block_all(backing_hd, parent->backing_blocker); + /* Otherwise we won't be able to commit or stream */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, + parent->backing_blocker); + /* + * We do backup in 3 ways: + * 1. drive backup + * The target bs is new opened, and the source is top BDS + * 2. blockdev backup + * Both the source and the target are top BDSes. + * 3. internal backup(used for block replication) + * Both the source and the target are backing file + * + * In case 1 and 2, neither the source nor the target is the backing file. + * In case 3, we will block the top BDS, so there is only one block job + * for the top BDS and its backing chain. + */ + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, + parent->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, + parent->backing_blocker); +} + +static void bdrv_backing_detach(BdrvChild *c) +{ + BlockDriverState *parent = c->opaque; + + assert(parent->backing_blocker); + bdrv_op_unblock_all(c->bs, parent->backing_blocker); + error_free(parent->backing_blocker); + parent->backing_blocker = NULL; +} + /* * Returns the options and flags that bs->backing should get, based on the * given options and flags for the parent BDS @@ -823,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, *child_flags = flags; } -static const BdrvChildRole child_backing = { +const BdrvChildRole child_backing = { + .get_parent_desc = bdrv_child_get_parent_desc, + .attach = bdrv_backing_attach, + .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, @@ -1326,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +/* + * Check whether permissions on this node can be changed in a way that + * @cumulative_perms and @cumulative_shared_perms are the new cumulative + * permissions of all its parents. This involves checking whether all necessary + * permission changes to child nodes can be performed. + * + * A call to this function must always be followed by a call to bdrv_set_perm() + * or bdrv_abort_perm_update(). + */ +static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms, Error **errp) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + int ret; + + /* Write permissions never work with read-only images */ + if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && + bdrv_is_read_only(bs)) + { + error_setg(errp, "Block node is read-only"); + return -EPERM; + } + + /* Check this node */ + if (!drv) { + return 0; + } + + if (drv->bdrv_check_perm) { + return drv->bdrv_check_perm(bs, cumulative_perms, + cumulative_shared_perms, errp); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return 0; + } + + /* Check all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +/* + * Notifies drivers that after a previous bdrv_check_perm() call, the + * permission update is not performed and any preparations made for it (e.g. + * taken file locks) need to be undone. + * + * This function recursively notifies all child nodes. + */ +static void bdrv_abort_perm_update(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + if (drv->bdrv_abort_perm_update) { + drv->bdrv_abort_perm_update(bs); + } + + QLIST_FOREACH(c, &bs->children, next) { + bdrv_child_abort_perm_update(c); + } +} + +static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + uint64_t cumulative_shared_perms) +{ + BlockDriver *drv = bs->drv; + BdrvChild *c; + + if (!drv) { + return; + } + + /* Update this node */ + if (drv->bdrv_set_perm) { + drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms); + } + + /* Drivers that never have children can omit .bdrv_child_perm() */ + if (!drv->bdrv_child_perm) { + assert(QLIST_EMPTY(&bs->children)); + return; + } + + /* Update all children */ + QLIST_FOREACH(c, &bs->children, next) { + uint64_t cur_perm, cur_shared; + drv->bdrv_child_perm(bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); + bdrv_child_set_perm(c, cur_perm, cur_shared); + } +} + +static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm) +{ + BdrvChild *c; + uint64_t cumulative_perms = 0; + uint64_t cumulative_shared_perms = BLK_PERM_ALL; + + QLIST_FOREACH(c, &bs->parents, next_parent) { + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + *perm = cumulative_perms; + *shared_perm = cumulative_shared_perms; +} + +static char *bdrv_child_user_desc(BdrvChild *c) +{ + if (c->role->get_parent_desc) { + return c->role->get_parent_desc(c); + } + + return g_strdup("another user"); +} + +static char *bdrv_perm_names(uint64_t perm) +{ + struct perm_name { + uint64_t perm; + const char *name; + } permissions[] = { + { BLK_PERM_CONSISTENT_READ, "consistent read" }, + { BLK_PERM_WRITE, "write" }, + { BLK_PERM_WRITE_UNCHANGED, "write unchanged" }, + { BLK_PERM_RESIZE, "resize" }, + { BLK_PERM_GRAPH_MOD, "change children" }, + { 0, NULL } + }; + + char *result = g_strdup(""); + struct perm_name *p; + + for (p = permissions; p->name; p++) { + if (perm & p->perm) { + char *old = result; + result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name); + g_free(old); + } + } + + return result; +} + +/* + * Checks whether a new reference to @bs can be added if the new user requires + * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set, + * this old reference is ignored in the calculations; this allows checking + * permission updates for an existing reference. + * + * Needs to be followed by a call to either bdrv_set_perm() or + * bdrv_abort_perm_update(). */ +static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, + uint64_t new_shared_perm, + BdrvChild *ignore_child, Error **errp) +{ + BdrvChild *c; + uint64_t cumulative_perms = new_used_perm; + uint64_t cumulative_shared_perms = new_shared_perm; + + /* There is no reason why anyone couldn't tolerate write_unchanged */ + assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED); + + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c == ignore_child) { + continue; + } + + if ((new_used_perm & c->shared_perm) != new_used_perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which does not " + "allow '%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + if ((c->perm & new_shared_perm) != c->perm) { + char *user = bdrv_child_user_desc(c); + char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm); + error_setg(errp, "Conflicts with use by %s as '%s', which uses " + "'%s' on %s", + user, c->name, perm_names, bdrv_get_node_name(c->bs)); + g_free(user); + g_free(perm_names); + return -EPERM; + } + + cumulative_perms |= c->perm; + cumulative_shared_perms &= c->shared_perm; + } + + return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp); +} + +/* Needs to be followed by a call to either bdrv_child_set_perm() or + * bdrv_child_abort_perm_update(). */ +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + return bdrv_check_update_perm(c->bs, perm, shared, c, errp); +} + +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared) +{ + uint64_t cumulative_perms, cumulative_shared_perms; + + c->perm = perm; + c->shared_perm = shared; + + bdrv_get_cumulative_perm(c->bs, &cumulative_perms, + &cumulative_shared_perms); + bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms); +} + +void bdrv_child_abort_perm_update(BdrvChild *c) +{ + bdrv_abort_perm_update(c->bs); +} + +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp) +{ + int ret; + + ret = bdrv_child_check_perm(c, perm, shared, errp); + if (ret < 0) { + bdrv_child_abort_perm_update(c); + return ret; + } + + bdrv_child_set_perm(c, perm, shared); + + return 0; +} + +#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_WRITE_UNCHANGED \ + | BLK_PERM_RESIZE) +#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) + +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (c == NULL) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) | + (c->perm & DEFAULT_PERM_UNCHANGED); + *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | + (c->shared_perm & DEFAULT_PERM_UNCHANGED); +} + +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + bool backing = (role == &child_backing); + assert(role == &child_backing || role == &child_file); + + if (!backing) { + /* Apart from the modifications below, the same permissions are + * forwarded and left alone as for filters */ + bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); + + /* Format drivers may touch metadata even if the guest doesn't write */ + if (!bdrv_is_read_only(bs)) { + perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + + /* bs->file always needs to be consistent because of the metadata. We + * can never allow other users to resize or write to it. */ + perm |= BLK_PERM_CONSISTENT_READ; + shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + } else { + /* We want consistent read from backing files if the parent needs it. + * No other operations are performed on backing files. */ + perm &= BLK_PERM_CONSISTENT_READ; + + /* If the parent can deal with changing data, we're okay with a + * writable and resizable backing file. */ + /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */ + if (shared & BLK_PERM_WRITE) { + shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; + } else { + shared = 0; + } + + shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | + BLK_PERM_WRITE_UNCHANGED; + } + + *nperm = perm; + *nshared = shared; +} + +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, + bool check_new_perm) { BlockDriverState *old_bs = child->bs; + uint64_t perm, shared_perm; if (old_bs) { if (old_bs->quiesce_counter && child->role->drained_end) { child->role->drained_end(child); } + if (child->role->detach) { + child->role->detach(child); + } QLIST_REMOVE(child, next_parent); + + /* Update permissions for old node. This is guaranteed to succeed + * because we're just taking a parent away, so we're loosening + * restrictions. */ + bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); + bdrv_check_perm(old_bs, perm, shared_perm, &error_abort); + bdrv_set_perm(old_bs, perm, shared_perm); } child->bs = new_bs; @@ -1344,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) if (new_bs->quiesce_counter && child->role->drained_begin) { child->role->drained_begin(child); } + + bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); + if (check_new_perm) { + bdrv_check_perm(new_bs, perm, shared_perm, &error_abort); + } + bdrv_set_perm(new_bs, perm, shared_perm); + + if (child->role->attach) { + child->role->attach(child); + } } } BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque) + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp) { - BdrvChild *child = g_new(BdrvChild, 1); + BdrvChild *child; + int ret; + + ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); + if (ret < 0) { + bdrv_abort_perm_update(child_bs); + return NULL; + } + + child = g_new(BdrvChild, 1); *child = (BdrvChild) { - .bs = NULL, - .name = g_strdup(child_name), - .role = child_role, - .opaque = opaque, + .bs = NULL, + .name = g_strdup(child_name), + .role = child_role, + .perm = perm, + .shared_perm = shared_perm, + .opaque = opaque, }; - bdrv_replace_child(child, child_bs); + /* This performs the matching bdrv_set_perm() for the above check. */ + bdrv_replace_child(child, child_bs, false); return child; } @@ -1368,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role) + const BdrvChildRole *child_role, + Error **errp) { - BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role, - parent_bs); + BdrvChild *child; + uint64_t perm, shared_perm; + + bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); + + assert(parent_bs->drv); + parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, + perm, shared_perm, &perm, &shared_perm); + + child = bdrv_root_attach_child(child_bs, child_name, child_role, + perm, shared_perm, parent_bs, errp); + if (child == NULL) { + return NULL; + } + QLIST_INSERT_HEAD(&parent_bs->children, child, next); return child; } @@ -1383,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child) child->next.le_prev = NULL; } - bdrv_replace_child(child, NULL); + bdrv_replace_child(child, NULL, false); g_free(child->name); g_free(child); @@ -1447,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) * Sets the backing file link of a BDS. A new reference is created; callers * which don't need their own reference any more must call bdrv_unref(). */ -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) { if (backing_hd) { bdrv_ref(backing_hd); } if (bs->backing) { - assert(bs->backing_blocker); - bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker); bdrv_unref_child(bs, bs->backing); - } else if (backing_hd) { - error_setg(&bs->backing_blocker, - "node is used as backing hd of '%s'", - bdrv_get_device_or_node_name(bs)); } if (!backing_hd) { - error_free(bs->backing_blocker); - bs->backing_blocker = NULL; bs->backing = NULL; goto out; } - bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing); - bs->open_flags &= ~BDRV_O_NO_BACKING; - pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); - pstrcpy(bs->backing_format, sizeof(bs->backing_format), - backing_hd->drv ? backing_hd->drv->format_name : ""); - bdrv_op_block_all(backing_hd, bs->backing_blocker); - /* Otherwise we won't be able to commit or stream */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, - bs->backing_blocker); - /* - * We do backup in 3 ways: - * 1. drive backup - * The target bs is new opened, and the source is top BDS - * 2. blockdev backup - * Both the source and the target are top BDSes. - * 3. internal backup(used for block replication) - * Both the source and the target are backing file - * - * In case 1 and 2, neither the source nor the target is the backing file. - * In case 3, we will block the top BDS, so there is only one block job - * for the top BDS and its backing chain. - */ - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE, - bs->backing_blocker); - bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, - bs->backing_blocker); + bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing, + errp); + if (!bs->backing) { + bdrv_unref(backing_hd); + } + out: bdrv_refresh_limits(bs, NULL); } @@ -1580,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, /* Hook up the backing file link; drop our reference, bs owns the * backing_hd reference now */ - bdrv_set_backing_hd(bs, backing_hd); + bdrv_set_backing_hd(bs, backing_hd, &local_err); bdrv_unref(backing_hd); + if (local_err) { + ret = -EINVAL; + goto free_exit; + } qdict_del(parent_options, bdref_key); @@ -1648,6 +2059,7 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { + BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -1656,7 +2068,13 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - return bdrv_attach_child(parent, bs, bdref_key, child_role); + c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); + if (!c) { + bdrv_unref(bs); + return NULL; + } + + return c; } static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, @@ -1669,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot; + Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -1718,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, * call bdrv_unref() on it), so in order to be able to return one, we have * to increase bs_snapshot's refcount here */ bdrv_ref(bs_snapshot); - bdrv_append(bs_snapshot, bs); + bdrv_append(bs_snapshot, bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto out; + } g_free(tmp_filename); return bs_snapshot; @@ -1862,9 +2286,12 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, goto fail; } if (file_bs != NULL) { - file = blk_new(); - blk_insert_bs(file, file_bs); + file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); + if (local_err) { + goto fail; + } qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file_bs))); @@ -2405,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs) bs->drv->bdrv_close(bs); bs->drv = NULL; - bdrv_set_backing_hd(bs, NULL); + bdrv_set_backing_hd(bs, NULL, &error_abort); if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); @@ -2465,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from, BdrvChild *c, *next, *to_c; QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + if (c->role->stay_at_node) { + continue; + } if (c->role == &child_backing) { - /* @from is generally not allowed to be a backing file, except for - * when @to is the overlay. In that case, @from may not be replaced - * by @to as @to's backing node. */ + /* If @from is a backing file of @to, ignore the child to avoid + * creating a loop. We only want to change the pointer of other + * parents. */ QLIST_FOREACH(to_c, &to->children, next) { if (to_c == c) { break; @@ -2479,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from, } } - assert(c->role != &child_backing); bdrv_ref(to); - bdrv_replace_child(c, to); + /* FIXME Are we sure that bdrv_replace_child() can't run into + * &error_abort because of permissions? */ + bdrv_replace_child(c, to, true); bdrv_unref(from); } } @@ -2502,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from, * parents of bs_top after bdrv_append() returns. If the caller needs to keep a * reference of its own, it must call bdrv_ref(). */ -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) { - assert(!bdrv_requests_pending(bs_top)); - assert(!bdrv_requests_pending(bs_new)); + Error *local_err = NULL; - bdrv_ref(bs_top); + assert(!atomic_read(&bs_top->in_flight)); + assert(!atomic_read(&bs_new->in_flight)); + + bdrv_set_backing_hd(bs_new, bs_top, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } change_parent_backing_link(bs_top, bs_new); - bdrv_set_backing_hd(bs_new, bs_top); - bdrv_unref(bs_top); /* bs_new is now referenced by its new parents, we don't need the * additional reference any more. */ +out: bdrv_unref(bs_new); } @@ -2658,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base, const char *backing_file_str) { BlockDriverState *new_top_bs = NULL; + Error *local_err = NULL; int ret = -EIO; if (!top->drv || !base->drv) { @@ -2690,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, if (ret) { goto exit; } - bdrv_set_backing_hd(new_top_bs, base); + + bdrv_set_backing_hd(new_top_bs, base, &local_err); + if (local_err) { + ret = -EPERM; + error_report_err(local_err); + goto exit; + } ret = 0; exit: @@ -2705,6 +3149,9 @@ int bdrv_truncate(BdrvChild *child, int64_t offset) BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; int ret; + + assert(child->perm & BLK_PERM_RESIZE); + if (!drv) return -ENOMEDIUM; if (!drv->bdrv_truncate) diff --git a/block/backup.c b/block/backup.c index fe010e78e3..d1ab617c7e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - job = block_job_create(job_id, &backup_job_driver, bs, speed, - creation_flags, cb, opaque, errp); + /* job->common.len is fixed, so we can't allow resize */ + job = block_job_create(job_id, &backup_job_driver, bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD, + speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } - job->target = blk_new(); - blk_insert_bs(job->target, target); + /* The target must match the source in size, so no resize here either */ + job->target = blk_new(BLK_PERM_WRITE, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(job->target, target, errp); + if (ret < 0) { + goto error; + } job->on_source_error = on_source_error; job->on_target_error = on_target_error; @@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } - block_job_add_bdrv(&job->common, target); + /* Required permissions are already taken with target's blk_new() */ + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); job->common.len = len; block_job_txn_add_job(txn, &job->common); diff --git a/block/blkdebug.c b/block/blkdebug.c index 6117ce5fca..67e8024e36 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, .bdrv_reopen_prepare = blkdebug_reopen_prepare, + .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_getlength = blkdebug_getlength, .bdrv_truncate = blkdebug_truncate, .bdrv_refresh_filename = blkdebug_refresh_filename, diff --git a/block/blkreplay.c b/block/blkreplay.c index cfc8c5be02..e1102119fb 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = { .bdrv_file_open = blkreplay_open, .bdrv_close = blkreplay_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkreplay_getlength, .bdrv_co_preadv = blkreplay_co_preadv, diff --git a/block/blkverify.c b/block/blkverify.c index 43a940c2f5..9a1e21c6ad 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_parse_filename = blkverify_parse_filename, .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = blkverify_getlength, .bdrv_refresh_filename = blkverify_refresh_filename, diff --git a/block/block-backend.c b/block/block-backend.c index 492e71e41f..daa7908d01 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -59,6 +59,9 @@ struct BlockBackend { bool iostatus_enabled; BlockDeviceIoStatus iostatus; + uint64_t perm; + uint64_t shared_perm; + bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; @@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); +static char *blk_get_attached_dev_id(BlockBackend *blk); /* All BlockBackends */ static QTAILQ_HEAD(, BlockBackend) block_backends = @@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); +static char *blk_root_get_parent_desc(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + char *dev_id; + + if (blk->name) { + return g_strdup(blk->name); + } + + dev_id = blk_get_attached_dev_id(blk); + if (*dev_id) { + return dev_id; + } else { + /* TODO Callback into the BB owner for something more detailed */ + g_free(dev_id); + return g_strdup("a block device"); + } +} + static const char *blk_root_get_name(BdrvChild *child) { return blk_name(child->opaque); @@ -110,6 +133,7 @@ static const BdrvChildRole child_root = { .change_media = blk_root_change_media, .resize = blk_root_resize, .get_name = blk_root_get_name, + .get_parent_desc = blk_root_get_parent_desc, .drained_begin = blk_root_drained_begin, .drained_end = blk_root_drained_end, @@ -117,15 +141,23 @@ static const BdrvChildRole child_root = { /* * Create a new BlockBackend with a reference count of one. - * Store an error through @errp on failure, unless it's null. + * + * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions + * to request for a block driver node that is attached to this BlockBackend. + * @shared_perm is a bitmask which describes which permissions may be granted + * to other users of the attached node. + * Both sets of permissions can be changed later using blk_set_perm(). + * * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(void) +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk->perm = perm; + blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); qemu_co_queue_init(&blk->public.throttled_reqs[0]); @@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, { BlockBackend *blk; BlockDriverState *bs; + uint64_t perm; + + /* blk_new_open() is mainly used in .bdrv_create implementations and the + * tools where sharing isn't a concern because the BDS stays private, so we + * just request permission according to the flags. + * + * The exceptions are xen_disk and blockdev_init(); in these cases, the + * caller of blk_new_open() doesn't make use of the permissions, but they + * shouldn't hurt either. We can still share everything here because the + * guest devices will add their own blockers if they can't share. */ + perm = BLK_PERM_CONSISTENT_READ; + if (flags & BDRV_O_RDWR) { + perm |= BLK_PERM_WRITE; + } + if (flags & BDRV_O_RESIZE) { + perm |= BLK_PERM_RESIZE; + } - blk = blk_new(); + blk = blk_new(perm, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + perm, BLK_PERM_ALL, blk, &error_abort); return blk; } @@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk) /* * Associates a new BlockDriverState with @blk. */ -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->perm, blk->shared_perm, blk, errp); + if (blk->root == NULL) { + return -EPERM; + } bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { throttle_timers_attach_aio_context( &blk->public.throttle_timers, bdrv_get_aio_context(bs)); } + + return 0; +} + +/* + * Sets the permission bitmasks that the user of the BlockBackend needs. + */ +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + + if (blk->root) { + ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); + if (ret < 0) { + return ret; + } + } + + blk->perm = perm; + blk->shared_perm = shared_perm; + + return 0; +} + +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) +{ + *perm = blk->perm; + *shared_perm = blk->shared_perm; } static int blk_do_attach_dev(BlockBackend *blk, void *dev) @@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev_ops = NULL; blk->dev_opaque = NULL; blk->guest_block_size = 512; + blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); blk_unref(blk); } @@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, /* * Notify @blk's attached device model of media change. - * If @load is true, notify of media load. - * Else, notify of media eject. + * + * If @load is true, notify of media load. This action can fail, meaning that + * the medium cannot be loaded. @errp is set then. + * + * If @load is false, notify of media eject. This can never fail. + * * Also send DEVICE_TRAY_MOVED events as appropriate. */ -void blk_dev_change_media_cb(BlockBackend *blk, bool load) +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; + Error *local_err = NULL; assert(!blk->legacy_dev); tray_was_open = blk_dev_is_tray_open(blk); - blk->dev_ops->change_media_cb(blk->dev_opaque, load); + blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err); + if (local_err) { + assert(load == true); + error_propagate(errp, local_err); + return; + } tray_is_open = blk_dev_is_tray_open(blk); if (tray_was_open != tray_is_open) { @@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load) static void blk_root_change_media(BdrvChild *child, bool load) { - blk_dev_change_media_cb(child->opaque, load); + blk_dev_change_media_cb(child->opaque, load, NULL); } /* diff --git a/block/bochs.c b/block/bochs.c index 7dd2ac4f51..516da56c3b 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -293,6 +293,7 @@ static BlockDriver bdrv_bochs = { .instance_size = sizeof(BDRVBochsState), .bdrv_probe = bochs_probe, .bdrv_open = bochs_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = bochs_refresh_limits, .bdrv_co_preadv = bochs_co_preadv, .bdrv_close = bochs_close, diff --git a/block/cloop.c b/block/cloop.c index 877c9b0d1b..a6c7b9dbe6 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -290,6 +290,7 @@ static BlockDriver bdrv_cloop = { .instance_size = sizeof(BDRVCloopState), .bdrv_probe = cloop_probe, .bdrv_open = cloop_open, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_refresh_limits = cloop_refresh_limits, .bdrv_co_preadv = cloop_co_preadv, .bdrv_close = cloop_close, diff --git a/block/commit.c b/block/commit.c index c284e8535d..22a0a4db98 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,6 +36,7 @@ typedef struct CommitBlockJob { BlockJob common; RateLimit limit; BlockDriverState *active; + BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; @@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque) BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, top); + BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); int ret = data->ret; + bool remove_commit_top_bs = false; + + /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before + * the normal backing chain can be restored. */ + blk_unref(s->base); if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); + ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + s->backing_file_str); + } else if (overlay_bs) { + /* XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote + * something to base, the intermediate images aren't valid any more. */ + remove_commit_top_bs = true; } /* restore base open flags here if appropriate (e.g., change the base back @@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque) } g_free(s->backing_file_str); blk_unref(s->top); - blk_unref(s->base); block_job_completed(&s->common, ret); g_free(data); + + /* If bdrv_drop_intermediate() didn't already do that, remove the commit + * filter driver from the backing chain. Do this as the final step so that + * the 'consistent read' permission can be granted. */ + if (remove_commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top, &error_abort); + } } static void coroutine_fn commit_run(void *opaque) @@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = { .start = commit_run, }; +static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static void bdrv_commit_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = 0; + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_commit_top = { + .format_name = "commit_top", + .bdrv_co_preadv = bdrv_commit_top_preadv, + .bdrv_close = bdrv_commit_top_close, + .bdrv_child_perm = bdrv_commit_top_child_perm, +}; + void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp) + const char *filter_node_name, Error **errp) { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; @@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, int orig_base_flags; BlockDriverState *iter; BlockDriverState *overlay_bs; + BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; + int ret; assert(top != bs); if (top == base) { @@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - s = block_job_create(job_id, &commit_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); + s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } @@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs, bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); - block_job_unref(&s->common); - return; + goto fail; } } + /* Insert commit_top block node above top, so we can block consistent read + * on the backing chain below it */ + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0, + errp); + if (commit_top_bs == NULL) { + goto fail; + } + + bdrv_set_backing_hd(commit_top_bs, top, &error_abort); + bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort); + + s->commit_top_bs = commit_top_bs; + bdrv_unref(commit_top_bs); /* Block all nodes between top and base, because they will * disappear from the chain after this operation. */ assert(bdrv_chain_contains(top, base)); - for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + for (iter = top; iter != base; iter = backing_bs(iter)) { + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves + * at s->base (if writes are blocked for a node, they are also blocked + * for its backing file). The other options would be a second filter + * driver above s->base. */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } + } + + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; } + /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string, but if it's the root node then - * don't block it again */ - if (bs != overlay_bs) { - block_job_add_bdrv(&s->common, overlay_bs); + * update the backing image string. */ + ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, + BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); + if (ret < 0) { + goto fail; } - s->base = blk_new(); - blk_insert_bs(s->base, base); + s->base = blk_new(BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_RESIZE, + BLK_PERM_CONSISTENT_READ + | BLK_PERM_GRAPH_MOD + | BLK_PERM_WRITE_UNCHANGED); + ret = blk_insert_bs(s->base, base, errp); + if (ret < 0) { + goto fail; + } - s->top = blk_new(); - blk_insert_bs(s->top, top); + /* Required permissions are already taken with block_job_add_bdrv() */ + s->top = blk_new(0, BLK_PERM_ALL); + blk_insert_bs(s->top, top, errp); + if (ret < 0) { + goto fail; + } s->active = bs; @@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, trace_commit_start(bs, base, top, s); block_job_start(&s->common); + return; + +fail: + if (s->base) { + blk_unref(s->base); + } + if (s->top) { + blk_unref(s->top); + } + if (commit_top_bs) { + bdrv_set_backing_hd(overlay_bs, top, &error_abort); + } + block_job_unref(&s->common); } @@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs, int bdrv_commit(BlockDriverState *bs) { BlockBackend *src, *backing; + BlockDriverState *backing_file_bs = NULL; + BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; int64_t sector, total_sectors, length, backing_length; int n, ro, open_flags; int ret = 0; uint8_t *buf = NULL; + Error *local_err = NULL; if (!drv) return -ENOMEDIUM; @@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs) } } - src = blk_new(); - blk_insert_bs(src, bs); + src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); - backing = blk_new(); - blk_insert_bs(backing, bs->backing->bs); + ret = blk_insert_bs(src, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } + + /* Insert commit_top block node above backing, so we can write to it */ + backing_file_bs = backing_bs(bs); + + commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, + &local_err); + if (commit_top_bs == NULL) { + error_report_err(local_err); + goto ro_cleanup; + } + + bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); + bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); + + ret = blk_insert_bs(backing, backing_file_bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto ro_cleanup; + } length = blk_getlength(src); if (length < 0) { @@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs) ro_cleanup: qemu_vfree(buf); - blk_unref(src); blk_unref(backing); + if (backing_file_bs) { + bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); + } + bdrv_unref(commit_top_bs); + blk_unref(src); if (ro) { /* ignoring error return here */ diff --git a/block/crypto.c b/block/crypto.c index 7cb2ff2946..4a2038888d 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -628,6 +628,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_probe = block_crypto_probe_luks, .bdrv_open = block_crypto_open_luks, .bdrv_close = block_crypto_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = block_crypto_create_luks, .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, diff --git a/block/curl.c b/block/curl.c index 2939cc77e9..e83dcd8f50 100644 --- a/block/curl.c +++ b/block/curl.c @@ -135,6 +135,7 @@ typedef struct BDRVCURLState { char *cookie; bool accept_range; AioContext *aio_context; + QemuMutex mutex; char *username; char *password; char *proxyusername; @@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, return FIND_RET_NONE; } +/* Called with s->mutex held. */ static void curl_multi_check_completion(BDRVCURLState *s) { int msgs_in_queue; @@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s) continue; } + qemu_mutex_unlock(&s->mutex); acb->common.cb(acb->common.opaque, -EPROTO); + qemu_mutex_lock(&s->mutex); qemu_aio_unref(acb); state->acb[i] = NULL; } @@ -386,6 +390,7 @@ static void curl_multi_check_completion(BDRVCURLState *s) } } +/* Called with s->mutex held. */ static void curl_multi_do_locked(CURLState *s) { CURLSocket *socket, *next_socket; @@ -409,19 +414,19 @@ static void curl_multi_do(void *arg) { CURLState *s = (CURLState *)arg; - aio_context_acquire(s->s->aio_context); + qemu_mutex_lock(&s->s->mutex); curl_multi_do_locked(s); - aio_context_release(s->s->aio_context); + qemu_mutex_unlock(&s->s->mutex); } static void curl_multi_read(void *arg) { CURLState *s = (CURLState *)arg; - aio_context_acquire(s->s->aio_context); + qemu_mutex_lock(&s->s->mutex); curl_multi_do_locked(s); curl_multi_check_completion(s->s); - aio_context_release(s->s->aio_context); + qemu_mutex_unlock(&s->s->mutex); } static void curl_multi_timeout_do(void *arg) @@ -434,11 +439,11 @@ static void curl_multi_timeout_do(void *arg) return; } - aio_context_acquire(s->aio_context); + qemu_mutex_lock(&s->mutex); curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); curl_multi_check_completion(s); - aio_context_release(s->aio_context); + qemu_mutex_unlock(&s->mutex); #else abort(); #endif @@ -771,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, curl_easy_cleanup(state->curl); state->curl = NULL; + qemu_mutex_init(&s->mutex); curl_attach_aio_context(bs, bdrv_get_aio_context(bs)); qemu_opts_del(opts); @@ -801,12 +807,11 @@ static void curl_readv_bh_cb(void *p) CURLAIOCB *acb = p; BlockDriverState *bs = acb->common.bs; BDRVCURLState *s = bs->opaque; - AioContext *ctx = bdrv_get_aio_context(bs); size_t start = acb->sector_num * BDRV_SECTOR_SIZE; size_t end; - aio_context_acquire(ctx); + qemu_mutex_lock(&s->mutex); // In case we have the requested data already (e.g. read-ahead), // we can just call the callback and be done. @@ -854,7 +859,7 @@ static void curl_readv_bh_cb(void *p) curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); out: - aio_context_release(ctx); + qemu_mutex_unlock(&s->mutex); if (ret != -EINPROGRESS) { acb->common.cb(acb->common.opaque, ret); qemu_aio_unref(acb); @@ -883,6 +888,7 @@ static void curl_close(BlockDriverState *bs) DPRINTF("CURL: Close\n"); curl_detach_aio_context(bs); + qemu_mutex_destroy(&s->mutex); g_free(s->cookie); g_free(s->url); diff --git a/block/dmg.c b/block/dmg.c index 8e387cdfe5..a7d25fc47b 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -697,6 +697,7 @@ static BlockDriver bdrv_dmg = { .bdrv_probe = dmg_probe, .bdrv_open = dmg_open, .bdrv_refresh_limits = dmg_refresh_limits, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = dmg_co_preadv, .bdrv_close = dmg_close, }; diff --git a/block/io.c b/block/io.c index d5c45447fd..8f38d46de0 100644 --- a/block/io.c +++ b/block/io.c @@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); } -static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { + BlockDriverState *bs = child->bs; + /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up * modifying the image file. This is critical for zero-copy guest I/O @@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, size_t skip_bytes; int ret; + assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ @@ -1001,10 +1005,11 @@ err: * handles copy on read, zeroing after EOF, and fragmentation of large * reads; any other features must be implemented by the caller. */ -static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; int64_t total_bytes, max_bytes; int ret = 0; uint64_t bytes_remaining = bytes; @@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov); goto out; } } @@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, } tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); - ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, + ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); tracked_request_end(&req); @@ -1306,10 +1311,11 @@ fail: * Forwards an already correctly aligned write request to the BlockDriver, * after possibly fragmenting it. */ -static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; bool waited; int ret; @@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert(!waited || !req->serialising); assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); + assert(child->perm & BLK_PERM_WRITE); + assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); @@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, return ret; } -static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, BdrvRequestFlags flags, BdrvTrackedRequest *req) { + BlockDriverState *bs = child->bs; uint8_t *buf = NULL; QEMUIOVector local_qiov; struct iovec iov; @@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); memset(buf + head_padding_bytes, 0, zero_bytes); - ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, + ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); if (ret < 0) { @@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, if (bytes >= align) { /* Write the aligned part in the middle. */ uint64_t aligned_bytes = bytes & ~(align - 1); - ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align, + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, NULL, flags); if (ret < 0) { goto fail; @@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, mark_request_serialising(req, align); wait_serialising_requests(req); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, req, offset, align, + ret = bdrv_aligned_preadv(child, req, offset, align, align, &local_qiov, 0); if (ret < 0) { goto fail; @@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); memset(buf, 0, bytes); - ret = bdrv_aligned_pwritev(bs, req, offset, align, align, + ret = bdrv_aligned_pwritev(child, req, offset, align, align, &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); } fail: @@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); if (!qiov) { - ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); + ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); goto out; } @@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&head_qiov, &head_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, + ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, align, &head_qiov, 0); if (ret < 0) { goto fail; @@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, - align, &tail_qiov, 0); + ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), + align, align, &tail_qiov, 0); if (ret < 0) { goto fail; } @@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, bytes = ROUND_UP(bytes, align); } - ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); diff --git a/block/iscsi.c b/block/iscsi.c index c4f813bfd2..76319a1a6e 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -58,6 +58,7 @@ typedef struct IscsiLun { int events; QEMUTimer *nop_timer; QEMUTimer *event_timer; + QemuMutex mutex; struct scsi_inquiry_logical_block_provisioning lbp; struct scsi_inquiry_block_limits bl; unsigned char *zeroblock; @@ -252,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense) return ret; } +/* Called (via iscsi_service) with QemuMutex held. */ static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -352,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = { static void iscsi_process_read(void *arg); static void iscsi_process_write(void *arg); +/* Called with QemuMutex held. */ static void iscsi_set_events(IscsiLun *iscsilun) { @@ -395,10 +398,10 @@ iscsi_process_read(void *arg) IscsiLun *iscsilun = arg; struct iscsi_context *iscsi = iscsilun->iscsi; - aio_context_acquire(iscsilun->aio_context); + qemu_mutex_lock(&iscsilun->mutex); iscsi_service(iscsi, POLLIN); iscsi_set_events(iscsilun); - aio_context_release(iscsilun->aio_context); + qemu_mutex_unlock(&iscsilun->mutex); } static void @@ -407,10 +410,10 @@ iscsi_process_write(void *arg) IscsiLun *iscsilun = arg; struct iscsi_context *iscsi = iscsilun->iscsi; - aio_context_acquire(iscsilun->aio_context); + qemu_mutex_lock(&iscsilun->mutex); iscsi_service(iscsi, POLLOUT); iscsi_set_events(iscsilun); - aio_context_release(iscsilun->aio_context); + qemu_mutex_unlock(&iscsilun->mutex); } static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun) @@ -589,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, uint64_t lba; uint32_t num_sectors; bool fua = flags & BDRV_REQ_FUA; + int r = 0; if (fua) { assert(iscsilun->dpofua); @@ -604,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, lba = sector_qemu2lun(sector_num, iscsilun); num_sectors = sector_qemu2lun(nb_sectors, iscsilun); iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); retry: if (iscsilun->use_16_for_rw) { #if LIBISCSI_API_VERSION >= (20160603) @@ -640,7 +645,9 @@ retry: #endif while (!iTask.complete) { iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); } if (iTask.task != NULL) { @@ -655,12 +662,15 @@ retry: if (iTask.status != SCSI_STATUS_GOOD) { iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors); - return iTask.err_code; + r = iTask.err_code; + goto out_unlock; } iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors); - return 0; +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + return r; } @@ -693,18 +703,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, goto out; } + qemu_mutex_lock(&iscsilun->mutex); retry: if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun, sector_qemu2lun(sector_num, iscsilun), 8 + 16, iscsi_co_generic_cb, &iTask) == NULL) { ret = -ENOMEM; - goto out; + goto out_unlock; } while (!iTask.complete) { iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); } if (iTask.do_retry) { @@ -721,20 +734,20 @@ retry: * because the device is busy or the cmd is not * supported) we pretend all blocks are allocated * for backwards compatibility */ - goto out; + goto out_unlock; } lbas = scsi_datain_unmarshall(iTask.task); if (lbas == NULL) { ret = -EIO; - goto out; + goto out_unlock; } lbasd = &lbas->descriptors[0]; if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) { ret = -EIO; - goto out; + goto out_unlock; } *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun); @@ -756,6 +769,8 @@ retry: if (*pnum > nb_sectors) { *pnum = nb_sectors; } +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); out: if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); @@ -818,6 +833,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, num_sectors = sector_qemu2lun(nb_sectors, iscsilun); iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); retry: if (iscsilun->use_16_for_rw) { #if LIBISCSI_API_VERSION >= (20160603) @@ -855,7 +871,9 @@ retry: #endif while (!iTask.complete) { iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); } if (iTask.task != NULL) { @@ -867,6 +885,7 @@ retry: iTask.complete = 0; goto retry; } + qemu_mutex_unlock(&iscsilun->mutex); if (iTask.status != SCSI_STATUS_GOOD) { return iTask.err_code; @@ -881,6 +900,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) struct IscsiTask iTask; iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); retry: if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { @@ -889,7 +909,9 @@ retry: while (!iTask.complete) { iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); } if (iTask.task != NULL) { @@ -901,6 +923,7 @@ retry: iTask.complete = 0; goto retry; } + qemu_mutex_unlock(&iscsilun->mutex); if (iTask.status != SCSI_STATUS_GOOD) { return iTask.err_code; @@ -910,6 +933,7 @@ retry: } #ifdef __linux__ +/* Called (via iscsi_service) with QemuMutex held. */ static void iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -1034,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, acb->task->expxferlen = acb->ioh->dxfer_len; data.size = 0; + qemu_mutex_lock(&iscsilun->mutex); if (acb->task->xfer_dir == SCSI_XFER_WRITE) { if (acb->ioh->iovec_count == 0) { data.data = acb->ioh->dxferp; @@ -1049,6 +1074,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, iscsi_aio_ioctl_cb, (data.size > 0) ? &data : NULL, acb) != 0) { + qemu_mutex_unlock(&iscsilun->mutex); scsi_free_scsi_task(acb->task); qemu_aio_unref(acb); return NULL; @@ -1068,6 +1094,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, } iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); return &acb->common; } @@ -1092,6 +1119,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; struct unmap_list list; + int r = 0; if (!is_byte_request_lun_aligned(offset, count, iscsilun)) { return -ENOTSUP; @@ -1106,15 +1134,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) list.num = count / iscsilun->block_size; iscsi_co_init_iscsitask(iscsilun, &iTask); + qemu_mutex_lock(&iscsilun->mutex); retry: if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, iscsi_co_generic_cb, &iTask) == NULL) { - return -ENOMEM; + r = -ENOMEM; + goto out_unlock; } while (!iTask.complete) { iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); } if (iTask.task != NULL) { @@ -1131,17 +1163,20 @@ retry: /* the target might fail with a check condition if it is not happy with the alignment of the UNMAP request we silently fail in this case */ - return 0; + goto out_unlock; } if (iTask.status != SCSI_STATUS_GOOD) { - return iTask.err_code; + r = iTask.err_code; + goto out_unlock; } iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, count >> BDRV_SECTOR_BITS); - return 0; +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + return r; } static int @@ -1153,6 +1188,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, uint64_t lba; uint32_t nb_blocks; bool use_16_for_ws = iscsilun->use_16_for_rw; + int r = 0; if (!is_byte_request_lun_aligned(offset, count, iscsilun)) { return -ENOTSUP; @@ -1186,6 +1222,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, } } + qemu_mutex_lock(&iscsilun->mutex); iscsi_co_init_iscsitask(iscsilun, &iTask); retry: if (use_16_for_ws) { @@ -1205,7 +1242,9 @@ retry: while (!iTask.complete) { iscsi_set_events(iscsilun); + qemu_mutex_unlock(&iscsilun->mutex); qemu_coroutine_yield(); + qemu_mutex_lock(&iscsilun->mutex); } if (iTask.status == SCSI_STATUS_CHECK_CONDITION && @@ -1215,7 +1254,8 @@ retry: /* WRITE SAME is not supported by the target */ iscsilun->has_write_same = false; scsi_free_scsi_task(iTask.task); - return -ENOTSUP; + r = -ENOTSUP; + goto out_unlock; } if (iTask.task != NULL) { @@ -1231,7 +1271,8 @@ retry: if (iTask.status != SCSI_STATUS_GOOD) { iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, count >> BDRV_SECTOR_BITS); - return iTask.err_code; + r = iTask.err_code; + goto out_unlock; } if (flags & BDRV_REQ_MAY_UNMAP) { @@ -1242,7 +1283,9 @@ retry: count >> BDRV_SECTOR_BITS); } - return 0; +out_unlock: + qemu_mutex_unlock(&iscsilun->mutex); + return r; } static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts, @@ -1331,7 +1374,7 @@ static void iscsi_nop_timed_event(void *opaque) { IscsiLun *iscsilun = opaque; - aio_context_acquire(iscsilun->aio_context); + qemu_mutex_lock(&iscsilun->mutex); if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) { error_report("iSCSI: NOP timeout. Reconnecting..."); iscsilun->request_timed_out = true; @@ -1344,7 +1387,7 @@ static void iscsi_nop_timed_event(void *opaque) iscsi_set_events(iscsilun); out: - aio_context_release(iscsilun->aio_context); + qemu_mutex_unlock(&iscsilun->mutex); } static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) @@ -1890,6 +1933,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, scsi_free_scsi_task(task); task = NULL; + qemu_mutex_init(&iscsilun->mutex); iscsi_attach_aio_context(bs, iscsilun->aio_context); /* Guess the internal cluster (page) size of the iscsi target by the means @@ -1935,6 +1979,7 @@ static void iscsi_close(BlockDriverState *bs) iscsi_destroy_context(iscsi); g_free(iscsilun->zeroblock); iscsi_allocmap_free(iscsilun); + qemu_mutex_destroy(&iscsilun->mutex); memset(iscsilun, 0, sizeof(IscsiLun)); } diff --git a/block/mirror.c b/block/mirror.c index 3d50857300..57f26c33a4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -38,7 +38,10 @@ typedef struct MirrorBlockJob { BlockJob common; RateLimit limit; BlockBackend *target; + BlockDriverState *mirror_top_bs; + BlockDriverState *source; BlockDriverState *base; + /* The name of the graph node to replace */ char *replaces; /* The BDS to replace */ @@ -327,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { - BlockDriverState *source = blk_bs(s->common.blk); + BlockDriverState *source = s->source; int64_t sector_num, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ @@ -386,7 +389,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) nb_chunks * sectors_per_chunk); bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks); while (nb_chunks > 0 && sector_num < end) { - int ret; + int64_t ret; int io_sectors, io_sectors_acct; BlockDriverState *file; enum MirrorMethod { @@ -497,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = blk_bs(s->common.blk); + BlockDriverState *src = s->source; BlockDriverState *target_bs = blk_bs(s->target); + BlockDriverState *mirror_top_bs = s->mirror_top_bs; + Error *local_err = NULL; /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ bdrv_ref(src); + bdrv_ref(mirror_top_bs); + + /* We don't access the source any more. Dropping any WRITE/RESIZE is + * required before it could become a backing file of target_bs. */ + bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target_bs) != backing) { + bdrv_set_backing_hd(target_bs, backing, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + } + } + } if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); @@ -524,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_drained_begin(target_bs); bdrv_replace_in_backing_chain(to_replace, target_bs); bdrv_drained_end(target_bs); - - /* We just changed the BDS the job BB refers to */ - blk_remove_bs(job->blk); - blk_insert_bs(job->blk, src); } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); @@ -540,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque) g_free(s->replaces); blk_unref(s->target); s->target = NULL; + + /* Remove the mirror filter driver from the graph. Before this, get rid of + * the blockers on the intermediate nodes so that the resulting state is + * valid. */ + block_job_remove_all_bdrv(job); + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); + + /* We just changed the BDS the job BB refers to (with either or both of the + * bdrv_replace_in_backing_chain() calls), so switch the BB back so the + * cleanup does the right thing. We don't need any permissions any more + * now. */ + blk_remove_bs(job->blk); + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); + blk_insert_bs(job->blk, mirror_top_bs, &error_abort); + block_job_completed(&s->common, data->ret); + g_free(data); bdrv_drained_end(src); + bdrv_unref(mirror_top_bs); bdrv_unref(src); } @@ -562,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { int64_t sector_num, end; BlockDriverState *base = s->base; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); int ret, n; @@ -644,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; - BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; @@ -876,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - BlockDriverState *src, *target; + BlockDriverState *target; - src = blk_bs(job->blk); target = blk_bs(s->target); if (!s->synced) { @@ -910,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp) replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); + /* TODO Translate this into permission system. Current definition of + * GRAPH_MOD would require to request it for the parents; they might + * not even be BlockDriverStates, however, so a BdrvChild can't address + * them. May need redefinition of GRAPH_MOD. */ error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); @@ -918,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp) aio_context_release(replace_aio_context); } - if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target) != backing) { - bdrv_set_backing_hd(target, backing); - } - } - s->should_complete = true; block_job_enter(&s->common); } @@ -980,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = { .drain = mirror_drain, }; +static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) +{ + return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); +} + +static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) +{ + return bdrv_co_flush(bs->backing->bs); +} + +static int64_t coroutine_fn bdrv_mirror_top_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + *pnum = nb_sectors; + *file = bs->backing->bs; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | + (sector_num << BDRV_SECTOR_BITS); +} + +static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int count, BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags); +} + +static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, + int64_t offset, int count) +{ + return bdrv_co_pdiscard(bs->backing->bs, offset, count); +} + +static void bdrv_mirror_top_close(BlockDriverState *bs) +{ +} + +static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + /* Must be able to forward guest writes to the real image */ + *nperm = 0; + if (perm & BLK_PERM_WRITE) { + *nperm |= BLK_PERM_WRITE; + } + + *nshared = BLK_PERM_ALL; +} + +/* Dummy node that provides consistent read to its users without requiring it + * from its backing file and that allows writes on the backing file chain. */ +static BlockDriver bdrv_mirror_top = { + .format_name = "mirror_top", + .bdrv_co_preadv = bdrv_mirror_top_preadv, + .bdrv_co_pwritev = bdrv_mirror_top_pwritev, + .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, + .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, + .bdrv_co_flush = bdrv_mirror_top_flush, + .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status, + .bdrv_close = bdrv_mirror_top_close, + .bdrv_child_perm = bdrv_mirror_top_child_perm, +}; + static void mirror_start_job(const char *job_id, BlockDriverState *bs, int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, @@ -992,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, void *opaque, Error **errp, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, - bool auto_complete) + bool auto_complete, const char *filter_node_name) { MirrorBlockJob *s; + BlockDriverState *mirror_top_bs; + bool target_graph_mod; + bool target_is_backing; + Error *local_err = NULL; + int ret; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -1011,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, buf_size = DEFAULT_MIRROR_BUF_SIZE; } - s = block_job_create(job_id, driver, bs, speed, creation_flags, - cb, opaque, errp); - if (!s) { + /* In the case of active commit, add dummy driver to provide consistent + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, + BDRV_O_RDWR, errp); + if (mirror_top_bs == NULL) { return; } + mirror_top_bs->total_sectors = bs->total_sectors; - s->target = blk_new(); - blk_insert_bs(s->target, target); + /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep + * it alive until block_job_create() even if bs has no parent. */ + bdrv_ref(mirror_top_bs); + bdrv_drained_begin(bs); + bdrv_append(mirror_top_bs, bs, &local_err); + bdrv_drained_end(bs); + + if (local_err) { + bdrv_unref(mirror_top_bs); + error_propagate(errp, local_err); + return; + } + + /* Make sure that the source is not resized while the job is running */ + s = block_job_create(job_id, driver, mirror_top_bs, + BLK_PERM_CONSISTENT_READ, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, + creation_flags, cb, opaque, errp); + bdrv_unref(mirror_top_bs); + if (!s) { + goto fail; + } + s->source = bs; + s->mirror_top_bs = mirror_top_bs; + + /* No resize for the target either; while the mirror is still running, a + * consistent read isn't necessarily possible. We could possibly allow + * writes and graph modifications, though it would likely defeat the + * purpose of a mirror, so leave them blocked for now. + * + * In the case of active commit, things look a bit different, though, + * because the target is an already populated backing file in active use. + * We can allow anything except resize there.*/ + target_is_backing = bdrv_chain_contains(bs, target); + target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); + s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE | + (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), + BLK_PERM_WRITE_UNCHANGED | + (target_is_backing ? BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE | + BLK_PERM_GRAPH_MOD : 0)); + ret = blk_insert_bs(s->target, target, errp); + if (ret < 0) { + goto fail; + } s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; @@ -1041,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - block_job_add_bdrv(&s->common, target); + /* Required permissions are already taken with blk_new() */ + block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); + /* In commit_active_start() all intermediate nodes disappear, so * any jobs in them must be blocked */ - if (bdrv_chain_contains(bs, target)) { + if (target_is_backing) { BlockDriverState *iter; for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + /* XXX BLK_PERM_WRITE needs to be allowed so we don't block + * ourselves at s->base (if writes are blocked for a node, they are + * also blocked for its backing file). The other options would be a + * second filter driver above s->base (== target). */ + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, + errp); + if (ret < 0) { + goto fail; + } } } trace_mirror_start(bs, s, opaque); block_job_start(&s->common); + return; + +fail: + if (s) { + g_free(s->replaces); + blk_unref(s->target); + block_job_unref(&s->common); + } + + bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs)); } void mirror_start(const char *job_id, BlockDriverState *bs, @@ -1061,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp) + bool unmap, const char *filter_node_name, Error **errp) { bool is_none_mode; BlockDriverState *base; @@ -1075,12 +1251,14 @@ void mirror_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, errp, - &mirror_job_driver, is_none_mode, base, false); + &mirror_job_driver, is_none_mode, base, false, + filter_node_name); } void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, BlockCompletionFunc *cb, void *opaque, Error **errp, bool auto_complete) { @@ -1096,7 +1274,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, on_error, on_error, true, cb, opaque, &local_err, - &commit_active_job_driver, false, base, auto_complete); + &commit_active_job_driver, false, base, auto_complete, + filter_node_name); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/block/nfs.c b/block/nfs.c index ffb54be065..890d5d4aff 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -54,6 +54,7 @@ typedef struct NFSClient { int events; bool has_zero_init; AioContext *aio_context; + QemuMutex mutex; blkcnt_t st_blocks; bool cache_used; NFSServer *server; @@ -191,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options, static void nfs_process_read(void *arg); static void nfs_process_write(void *arg); +/* Called with QemuMutex held. */ static void nfs_set_events(NFSClient *client) { int ev = nfs_which_events(client->context); @@ -209,20 +211,20 @@ static void nfs_process_read(void *arg) { NFSClient *client = arg; - aio_context_acquire(client->aio_context); + qemu_mutex_lock(&client->mutex); nfs_service(client->context, POLLIN); nfs_set_events(client); - aio_context_release(client->aio_context); + qemu_mutex_unlock(&client->mutex); } static void nfs_process_write(void *arg) { NFSClient *client = arg; - aio_context_acquire(client->aio_context); + qemu_mutex_lock(&client->mutex); nfs_service(client->context, POLLOUT); nfs_set_events(client); - aio_context_release(client->aio_context); + qemu_mutex_unlock(&client->mutex); } static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task) @@ -242,6 +244,7 @@ static void nfs_co_generic_bh_cb(void *opaque) aio_co_wake(task->co); } +/* Called (via nfs_service) with QemuMutex held. */ static void nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data, void *private_data) @@ -273,12 +276,15 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset, nfs_co_init_task(bs, &task); task.iov = iov; + qemu_mutex_lock(&client->mutex); if (nfs_pread_async(client->context, client->fh, offset, bytes, nfs_co_generic_cb, &task) != 0) { + qemu_mutex_unlock(&client->mutex); return -ENOMEM; } nfs_set_events(client); + qemu_mutex_unlock(&client->mutex); while (!task.complete) { qemu_coroutine_yield(); } @@ -317,9 +323,11 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset, buf = iov->iov[0].iov_base; } + qemu_mutex_lock(&client->mutex); if (nfs_pwrite_async(client->context, client->fh, offset, bytes, buf, nfs_co_generic_cb, &task) != 0) { + qemu_mutex_unlock(&client->mutex); if (my_buffer) { g_free(buf); } @@ -327,6 +335,7 @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset, } nfs_set_events(client); + qemu_mutex_unlock(&client->mutex); while (!task.complete) { qemu_coroutine_yield(); } @@ -349,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs) nfs_co_init_task(bs, &task); + qemu_mutex_lock(&client->mutex); if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb, &task) != 0) { + qemu_mutex_unlock(&client->mutex); return -ENOMEM; } nfs_set_events(client); + qemu_mutex_unlock(&client->mutex); while (!task.complete) { qemu_coroutine_yield(); } @@ -440,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs) { NFSClient *client = bs->opaque; nfs_client_close(client); + qemu_mutex_destroy(&client->mutex); } static NFSServer *nfs_config(QDict *options, Error **errp) @@ -647,6 +660,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, if (ret < 0) { return ret; } + qemu_mutex_init(&client->mutex); bs->total_sectors = ret; ret = 0; return ret; @@ -702,6 +716,7 @@ static int nfs_has_zero_init(BlockDriverState *bs) return client->has_zero_init; } +/* Called (via nfs_service) with QemuMutex held. */ static void nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data, void *private_data) diff --git a/block/parallels.c b/block/parallels.c index b2ec09f7e6..19935e29a9 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) } file = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (file == NULL) { error_propagate(errp, local_err); return -EIO; @@ -762,6 +763,7 @@ static BlockDriver bdrv_parallels = { .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_get_block_status = parallels_co_get_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_flush_to_os = parallels_co_flush_to_os, diff --git a/block/qcow.c b/block/qcow.c index 038b05ab1b..9d6ac83959 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -823,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } qcow_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (qcow_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1052,6 +1053,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, diff --git a/block/qcow2.c b/block/qcow2.c index 21e61427eb..6a92d2ef3f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2202,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -2266,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); blk = blk_new_open(filename, NULL, options, - BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -3113,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, uint64_t cluster_size = s->cluster_size; bool encrypt; int refcount_bits = s->refcount_bits; + Error *local_err = NULL; int ret; QemuOptDesc *desc = opts->list->desc; Qcow2AmendHelperCBInfo helper_cb_info; @@ -3262,11 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (new_size) { - BlockBackend *blk = blk_new(); - blk_insert_bs(blk, bs); + BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + blk_unref(blk); + return ret; + } + ret = blk_truncate(blk, new_size); blk_unref(blk); - if (ret < 0) { return ret; } @@ -3403,6 +3411,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_commit = qcow2_reopen_commit, .bdrv_reopen_abort = qcow2_reopen_abort, .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, diff --git a/block/qed.c b/block/qed.c index 62a0a09326..5ec7fd83f2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -625,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); return -EIO; @@ -1704,6 +1705,7 @@ static BlockDriver bdrv_qed = { .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, diff --git a/block/quorum.c b/block/quorum.c index 86e2072dce..40205fb1b3 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, /* We can safely add the child now */ bdrv_ref(child_bs); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); + + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp); + if (child == NULL) { + s->next_child_index--; + bdrv_unref(child_bs); + goto out; + } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); s->children[s->num_children++] = child; +out: bdrv_drained_end(bs); } @@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = { .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, + .bdrv_child_perm = bdrv_filter_default_perms, + .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; diff --git a/block/raw-format.c b/block/raw-format.c index ce34d1b1cd..86fbc657eb 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -467,6 +467,7 @@ BlockDriver bdrv_raw = { .bdrv_reopen_abort = &raw_reopen_abort, .bdrv_open = &raw_open, .bdrv_close = &raw_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_create = &raw_create, .bdrv_co_preadv = &raw_co_preadv, .bdrv_co_pwritev = &raw_co_pwritev, diff --git a/block/replication.c b/block/replication.c index eff85c77ba..22f170fd33 100644 --- a/block/replication.c +++ b/block/replication.c @@ -644,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->replication_state = BLOCK_REPLICATION_FAILOVER; commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs, BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, - replication_done, bs, errp, true); + NULL, replication_done, bs, errp, true); break; default: aio_context_release(aio_context); @@ -660,6 +660,7 @@ BlockDriver bdrv_replication = { .bdrv_open = replication_open, .bdrv_close = replication_close, + .bdrv_child_perm = bdrv_filter_default_perms, .bdrv_getlength = replication_getlength, .bdrv_co_readv = replication_co_readv, diff --git a/block/sheepdog.c b/block/sheepdog.c index 860ba61502..743471043e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp) int ret; blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, errp); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); if (blk == NULL) { ret = -EIO; goto out_with_err_set; diff --git a/block/stream.c b/block/stream.c index 1523ba7dfb..0113710845 100644 --- a/block/stream.c +++ b/block/stream.c @@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque) StreamCompleteData *data = opaque; BlockDriverState *bs = blk_bs(job->blk); BlockDriverState *base = s->base; + Error *local_err = NULL; if (!block_job_is_cancelled(&s->common) && data->reached_end && data->ret == 0) { @@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque) } } data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); - bdrv_set_backing_hd(bs, base); + bdrv_set_backing_hd(bs, base, &local_err); + if (local_err) { + error_report_err(local_err); + data->ret = -EPERM; + goto out; + } } +out: /* Reopen the image back in read-only mode if necessary */ if (s->bs_flags != bdrv_get_flags(bs)) { + /* Give up write permissions before making it read-only */ + blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort); bdrv_reopen(bs, s->bs_flags, NULL); } @@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs, BlockDriverState *iter; int orig_bs_flags; - s = block_job_create(job_id, &stream_job_driver, bs, speed, - BLOCK_JOB_DEFAULT, NULL, NULL, errp); - if (!s) { - return; - } - /* Make sure that the image is opened in read-write mode */ orig_bs_flags = bdrv_get_flags(bs); if (!(orig_bs_flags & BDRV_O_RDWR)) { if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) { - block_job_unref(&s->common); return; } } - /* Block all intermediate nodes between bs and base, because they - * will disappear from the chain after this operation */ + /* Prevent concurrent jobs trying to modify the graph structure here, we + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. */ + s = block_job_create(job_id, &stream_job_driver, bs, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE, + speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); + if (!s) { + goto fail; + } + + /* Block all intermediate nodes between bs and base, because they will + * disappear from the chain after this operation. The streaming job reads + * every block only once, assuming that it doesn't change, so block writes + * and resizes. */ for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { - block_job_add_bdrv(&s->common, iter); + block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, + &error_abort); } s->base = base; @@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, s->on_error = on_error; trace_stream_start(bs, base, s); block_job_start(&s->common); + return; + +fail: + if (orig_bs_flags != bdrv_get_flags(bs)) { + bdrv_reopen(bs, s->bs_flags, NULL); + } } diff --git a/block/vdi.c b/block/vdi.c index 18b4773aac..9b4f70e977 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -763,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -891,6 +892,7 @@ static BlockDriver bdrv_vdi = { .bdrv_open = vdi_open, .bdrv_close = vdi_close, .bdrv_reopen_prepare = vdi_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vdi_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = vdi_co_get_block_status, diff --git a/block/vhdx.c b/block/vhdx.c index 9918ee98ff..052a753159 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1859,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1983,6 +1984,7 @@ static BlockDriver bdrv_vhdx = { .bdrv_open = vhdx_open, .bdrv_close = vhdx_close, .bdrv_reopen_prepare = vhdx_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, .bdrv_create = vhdx_create, diff --git a/block/vmdk.c b/block/vmdk.c index 9d68ec5a4e..a9bd22bf93 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1703,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2071,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } new_blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (new_blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2359,6 +2361,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_open = vmdk_open, .bdrv_check = vmdk_check, .bdrv_reopen_prepare = vmdk_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_preadv = vmdk_co_preadv, .bdrv_co_pwritev = vmdk_co_pwritev, .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, diff --git a/block/vpc.c b/block/vpc.c index d0df2a1c54..f591d4be38 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -915,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } blk = blk_new_open(filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -1067,6 +1068,7 @@ static BlockDriver bdrv_vpc = { .bdrv_open = vpc_open, .bdrv_close = vpc_close, .bdrv_reopen_prepare = vpc_reopen_prepare, + .bdrv_child_perm = bdrv_format_default_perms, .bdrv_create = vpc_create, .bdrv_co_preadv = vpc_co_preadv, diff --git a/block/vvfat.c b/block/vvfat.c index 7f230be006..aa61c329e7 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3041,7 +3041,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) &error_abort); *(void**) backing->opaque = s; - bdrv_set_backing_hd(s->bs, backing); + bdrv_set_backing_hd(s->bs, backing, &error_abort); bdrv_unref(backing); return 0; @@ -3052,6 +3052,27 @@ err: return ret; } +static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + BDRVVVFATState *s = bs->opaque; + + assert(c == s->qcow || role == &child_backing); + + if (c == s->qcow) { + /* This is a private node, nobody should try to attach to it */ + *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; + *nshared = BLK_PERM_WRITE_UNCHANGED; + } else { + /* The backing file is there so 'commit' can use it. vvfat doesn't + * access it in any way. */ + *nperm = 0; + *nshared = BLK_PERM_ALL; + } +} + static void vvfat_close(BlockDriverState *bs) { BDRVVVFATState *s = bs->opaque; @@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_file_open = vvfat_open, .bdrv_refresh_limits = vvfat_refresh_limits, .bdrv_close = vvfat_close, + .bdrv_child_perm = vvfat_child_perm, .bdrv_co_preadv = vvfat_co_preadv, .bdrv_co_pwritev = vvfat_co_pwritev, diff --git a/blockdev.c b/blockdev.c index 2b2f6ceef0..8eb4e84fe0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -52,6 +52,7 @@ #include "sysemu/arch_init.h" #include "qemu/cutils.h" #include "qemu/help_option.h" +#include "qemu/throttle-options.h" static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states = QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states); @@ -557,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(); + blk = blk_new(0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = read_only; @@ -1767,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common, if (!state->new_bs->drv->supports_backing) { error_setg(errp, "The snapshot does not support backing images"); + return; + } + + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ + bdrv_ref(state->new_bs); + bdrv_append(state->new_bs, state->old_bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } } @@ -1777,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common) bdrv_set_aio_context(state->new_bs, state->aio_context); - /* This removes our old bs and adds the new bs */ - bdrv_append(state->new_bs, state->old_bs); /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ @@ -1793,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common) ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); if (state->new_bs) { - bdrv_unref(state->new_bs); + if (state->new_bs->backing) { + bdrv_replace_in_backing_chain(state->new_bs, state->old_bs); + } } } @@ -1804,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common) if (state->aio_context) { bdrv_drained_end(state->old_bs); aio_context_release(state->aio_context); + bdrv_unref(state->new_bs); } } @@ -2310,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id, } if (!locked || force) { - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } if (locked && !force) { @@ -2348,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, Error **errp) { BlockBackend *blk; + Error *local_err = NULL; device = has_device ? device : NULL; id = has_id ? id : NULL; @@ -2371,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device, return; } - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } void qmp_x_blockdev_remove_medium(bool has_device, const char *device, @@ -2424,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device, * called at all); therefore, the medium needs to be ejected here. * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. false). */ - blk_dev_change_media_cb(blk, false); + blk_dev_change_media_cb(blk, false, &error_abort); } out: @@ -2434,7 +2452,9 @@ out: static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, BlockDriverState *bs, Error **errp) { + Error *local_err = NULL; bool has_device; + int ret; /* For BBs without a device, we can exchange the BDS tree at will */ has_device = blk_get_attached_dev(blk); @@ -2454,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, return; } - blk_insert_bs(blk, bs); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + return; + } if (!blk_dev_has_tray(blk)) { /* For tray-less devices, blockdev-close-tray is a no-op (or may not be @@ -2462,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, * slot here. * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load * value passed here (i.e. true). */ - blk_dev_change_media_cb(blk, true); + blk_dev_change_media_cb(blk, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk_remove_bs(blk); + return; + } } } @@ -2889,8 +2917,11 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto out; + } /* complete all in-flight operations before resizing the device */ bdrv_drain_all(); @@ -3013,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, bool has_top, const char *top, bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, + bool has_filter_node_name, const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3028,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, if (!has_speed) { speed = 0; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } /* Important Note: * libvirt relies on the DeviceNotFound error class in order to probe for @@ -3102,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, goto out; } commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, - BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL, - &local_err, false); + BLOCK_JOB_DEFAULT, speed, on_error, + filter_node_name, NULL, NULL, &local_err, false); } else { BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { @@ -3111,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, } commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, on_error, has_backing_file ? backing_file : NULL, - &local_err); + filter_node_name, &local_err); } if (local_err != NULL) { error_propagate(errp, local_err); @@ -3347,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, bool has_on_target_error, BlockdevOnError on_target_error, bool has_unmap, bool unmap, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { @@ -3368,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, if (!has_unmap) { unmap = true; } + if (!has_filter_node_name) { + filter_node_name = NULL; + } if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", @@ -3397,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, mirror_start(job_id, bs, target, has_replaces ? replaces : NULL, speed, granularity, buf_size, sync, backing_mode, - on_source_error, on_target_error, unmap, errp); + on_source_error, on_target_error, unmap, filter_node_name, + errp); } void qmp_drive_mirror(DriveMirror *arg, Error **errp) @@ -3535,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) arg->has_on_source_error, arg->on_source_error, arg->has_on_target_error, arg->on_target_error, arg->has_unmap, arg->unmap, + false, NULL, &local_err); bdrv_unref(target_bs); error_propagate(errp, local_err); @@ -3553,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, + bool has_filter_node_name, + const char *filter_node_name, Error **errp) { BlockDriverState *bs; @@ -3584,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_on_source_error, on_source_error, has_on_target_error, on_target_error, true, true, + has_filter_node_name, filter_node_name, &local_err); error_propagate(errp, local_err); @@ -4007,83 +4052,11 @@ QemuOptsList qemu_common_drive_opts = { .name = BDRV_OPT_READ_ONLY, .type = QEMU_OPT_BOOL, .help = "open drive file as read-only", - },{ - .name = "throttling.iops-total", - .type = QEMU_OPT_NUMBER, - .help = "limit total I/O operations per second", - },{ - .name = "throttling.iops-read", - .type = QEMU_OPT_NUMBER, - .help = "limit read operations per second", - },{ - .name = "throttling.iops-write", - .type = QEMU_OPT_NUMBER, - .help = "limit write operations per second", - },{ - .name = "throttling.bps-total", - .type = QEMU_OPT_NUMBER, - .help = "limit total bytes per second", - },{ - .name = "throttling.bps-read", - .type = QEMU_OPT_NUMBER, - .help = "limit read bytes per second", - },{ - .name = "throttling.bps-write", - .type = QEMU_OPT_NUMBER, - .help = "limit write bytes per second", - },{ - .name = "throttling.iops-total-max", - .type = QEMU_OPT_NUMBER, - .help = "I/O operations burst", - },{ - .name = "throttling.iops-read-max", - .type = QEMU_OPT_NUMBER, - .help = "I/O operations read burst", - },{ - .name = "throttling.iops-write-max", - .type = QEMU_OPT_NUMBER, - .help = "I/O operations write burst", - },{ - .name = "throttling.bps-total-max", - .type = QEMU_OPT_NUMBER, - .help = "total bytes burst", - },{ - .name = "throttling.bps-read-max", - .type = QEMU_OPT_NUMBER, - .help = "total bytes read burst", - },{ - .name = "throttling.bps-write-max", - .type = QEMU_OPT_NUMBER, - .help = "total bytes write burst", - },{ - .name = "throttling.iops-total-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the iops-total-max burst period, in seconds", - },{ - .name = "throttling.iops-read-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the iops-read-max burst period, in seconds", - },{ - .name = "throttling.iops-write-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the iops-write-max burst period, in seconds", - },{ - .name = "throttling.bps-total-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the bps-total-max burst period, in seconds", - },{ - .name = "throttling.bps-read-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the bps-read-max burst period, in seconds", - },{ - .name = "throttling.bps-write-max-length", - .type = QEMU_OPT_NUMBER, - .help = "length of the bps-write-max burst period, in seconds", - },{ - .name = "throttling.iops-size", - .type = QEMU_OPT_NUMBER, - .help = "when limiting by iops max size of an I/O in bytes", - },{ + }, + + THROTTLE_OPTS, + + { .name = "throttling.group", .type = QEMU_OPT_STRING, .help = "name of the block throttling group", diff --git a/blockjob.c b/blockjob.c index abee11bb08..69126af97f 100644 --- a/blockjob.c +++ b/blockjob.c @@ -55,6 +55,19 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs) +void block_job_remove_all_bdrv(BlockJob *job) +{ + GSList *l; + for (l = job->nodes; l; l = l->next) { + BdrvChild *c = l->data; + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); + } + g_slist_free(job->nodes); + job->nodes = NULL; +} + +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) { - job->nodes = g_slist_prepend(job->nodes, bs); + BdrvChild *c; + + c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, + job, errp); + if (c == NULL) { + return -EPERM; + } + + job->nodes = g_slist_prepend(job->nodes, c); bdrv_ref(bs); bdrv_op_block_all(bs, job->blocker); + + return 0; } void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp) { BlockBackend *blk; BlockJob *job; + int ret; if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); @@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } } - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(perm, shared_perm); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, bs); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); job->driver = driver; @@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { - GSList *l; BlockDriverState *bs = blk_bs(job->blk); bs->job = NULL; - for (l = job->nodes; l; l = l->next) { - bs = l->data; - bdrv_op_unblock_all(bs, job->blocker); - bdrv_unref(bs); - } - g_slist_free(job->nodes); + block_job_remove_all_bdrv(job); blk_remove_aio_context_notifier(job->blk, block_job_attached_aio_context, block_job_detach_aio_context, job); @@ -5894,6 +5894,7 @@ case "$target_name" in TARGET_BASE_ARCH=i386 ;; alpha) + mttcg="yes" ;; arm|armeb) TARGET_ARCH=arm @@ -769,14 +769,13 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) pd = iotlbentry->addr & ~TARGET_PAGE_MASK; mr = iotlb_to_region(cpu, pd, iotlbentry->attrs); if (memory_region_is_unassigned(mr)) { - CPUClass *cc = CPU_GET_CLASS(cpu); - - if (cc->do_unassigned_access) { - cc->do_unassigned_access(cpu, addr, false, true, 0, 4); - } else { - report_bad_exec(cpu, addr); - exit(1); - } + cpu_unassigned_access(cpu, addr, false, true, 0, 4); + /* The CPU's unassigned access hook might have longjumped out + * with an exception. If it didn't (or there was no hook) then + * we can't proceed further. + */ + report_bad_exec(cpu, addr); + exit(1); } p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend); return qemu_ram_addr_from_host_nofail(p); diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index fdf40893aa..1e3bd2b8ca 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y CONFIG_A9MPCORE=y CONFIG_A15MPCORE=y +CONFIG_ARM_V7M=y + CONFIG_ARM_GIC=y CONFIG_ARM_GIC_KVM=$(CONFIG_KVM) CONFIG_ARM_TIMER=y diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 485a006aa7..7af14e29aa 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -7492,7 +7492,7 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status) { signed char current_rounding_mode = status->float_rounding_mode; set_float_rounding_mode(float_round_to_zero, status); - int64_t v = float64_to_uint64(a, status); + uint64_t v = float64_to_uint64(a, status); set_float_rounding_mode(current_rounding_mode, status); return v; } diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs index 1b120a4a7d..659df6e187 100644 --- a/fsdev/Makefile.objs +++ b/fsdev/Makefile.objs @@ -5,7 +5,7 @@ common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o else common-obj-y = qemu-fsdev-dummy.o endif -common-obj-y += qemu-fsdev-opts.o +common-obj-y += qemu-fsdev-opts.o qemu-fsdev-throttle.o # Toplevel always builds this; targets without virtio will put it in # common-obj-y diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index a56dc8488d..0844a403dc 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -17,6 +17,7 @@ #include <dirent.h> #include <utime.h> #include <sys/vfs.h> +#include "qemu-fsdev-throttle.h" #define SM_LOCAL_MODE_BITS 0600 #define SM_LOCAL_DIR_MODE_BITS 0700 @@ -74,6 +75,7 @@ typedef struct FsDriverEntry { char *path; int export_flags; FileOperations *ops; + FsThrottle fst; } FsDriverEntry; typedef struct FsContext @@ -83,6 +85,7 @@ typedef struct FsContext int export_flags; struct xattr_operations **xops; struct extended_ops exops; + FsThrottle *fst; /* fs driver specific data */ void *private; } FsContext; diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c index 1dd8c7a24c..bf5713008a 100644 --- a/fsdev/qemu-fsdev-opts.c +++ b/fsdev/qemu-fsdev-opts.c @@ -9,6 +9,7 @@ #include "qemu/config-file.h" #include "qemu/option.h" #include "qemu/module.h" +#include "qemu/throttle-options.h" static QemuOptsList qemu_fsdev_opts = { .name = "fsdev", @@ -39,6 +40,8 @@ static QemuOptsList qemu_fsdev_opts = { .type = QEMU_OPT_NUMBER, }, + THROTTLE_OPTS, + { /*End of list */ } }, }; diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c new file mode 100644 index 0000000000..7ae4e86646 --- /dev/null +++ b/fsdev/qemu-fsdev-throttle.c @@ -0,0 +1,118 @@ +/* + * Fsdev Throttle + * + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH + * + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu-fsdev-throttle.h" +#include "qemu/iov.h" + +static void fsdev_throttle_read_timer_cb(void *opaque) +{ + FsThrottle *fst = opaque; + qemu_co_enter_next(&fst->throttled_reqs[false]); +} + +static void fsdev_throttle_write_timer_cb(void *opaque) +{ + FsThrottle *fst = opaque; + qemu_co_enter_next(&fst->throttled_reqs[true]); +} + +void fsdev_throttle_parse_opts(QemuOpts *opts, FsThrottle *fst, Error **errp) +{ + throttle_config_init(&fst->cfg); + fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg = + qemu_opt_get_number(opts, "throttling.bps-total", 0); + fst->cfg.buckets[THROTTLE_BPS_READ].avg = + qemu_opt_get_number(opts, "throttling.bps-read", 0); + fst->cfg.buckets[THROTTLE_BPS_WRITE].avg = + qemu_opt_get_number(opts, "throttling.bps-write", 0); + fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg = + qemu_opt_get_number(opts, "throttling.iops-total", 0); + fst->cfg.buckets[THROTTLE_OPS_READ].avg = + qemu_opt_get_number(opts, "throttling.iops-read", 0); + fst->cfg.buckets[THROTTLE_OPS_WRITE].avg = + qemu_opt_get_number(opts, "throttling.iops-write", 0); + + fst->cfg.buckets[THROTTLE_BPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.bps-total-max", 0); + fst->cfg.buckets[THROTTLE_BPS_READ].max = + qemu_opt_get_number(opts, "throttling.bps-read-max", 0); + fst->cfg.buckets[THROTTLE_BPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.bps-write-max", 0); + fst->cfg.buckets[THROTTLE_OPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.iops-total-max", 0); + fst->cfg.buckets[THROTTLE_OPS_READ].max = + qemu_opt_get_number(opts, "throttling.iops-read-max", 0); + fst->cfg.buckets[THROTTLE_OPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.iops-write-max", 0); + + fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = + qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1); + fst->cfg.buckets[THROTTLE_BPS_READ].burst_length = + qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1); + fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length = + qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1); + fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = + qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1); + fst->cfg.buckets[THROTTLE_OPS_READ].burst_length = + qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1); + fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length = + qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1); + fst->cfg.op_size = + qemu_opt_get_number(opts, "throttling.iops-size", 0); + + throttle_is_valid(&fst->cfg, errp); +} + +void fsdev_throttle_init(FsThrottle *fst) +{ + if (throttle_enabled(&fst->cfg)) { + throttle_init(&fst->ts); + throttle_timers_init(&fst->tt, + qemu_get_aio_context(), + QEMU_CLOCK_REALTIME, + fsdev_throttle_read_timer_cb, + fsdev_throttle_write_timer_cb, + fst); + throttle_config(&fst->ts, &fst->tt, &fst->cfg); + qemu_co_queue_init(&fst->throttled_reqs[0]); + qemu_co_queue_init(&fst->throttled_reqs[1]); + } +} + +void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write, + struct iovec *iov, int iovcnt) +{ + if (throttle_enabled(&fst->cfg)) { + if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) || + !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) { + qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL); + } + + throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt)); + + if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) && + !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) { + qemu_co_queue_next(&fst->throttled_reqs[is_write]); + } + } +} + +void fsdev_throttle_cleanup(FsThrottle *fst) +{ + if (throttle_enabled(&fst->cfg)) { + throttle_timers_destroy(&fst->tt); + } +} diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h new file mode 100644 index 0000000000..e418643ccb --- /dev/null +++ b/fsdev/qemu-fsdev-throttle.h @@ -0,0 +1,39 @@ +/* + * Fsdev Throttle + * + * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH + * + * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ + +#ifndef _FSDEV_THROTTLE_H +#define _FSDEV_THROTTLE_H + +#include "block/aio.h" +#include "qemu/main-loop.h" +#include "qemu/coroutine.h" +#include "qapi/error.h" +#include "qemu/throttle.h" + +typedef struct FsThrottle { + ThrottleState ts; + ThrottleTimers tt; + ThrottleConfig cfg; + CoQueue throttled_reqs[2]; +} FsThrottle; + +void fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **); + +void fsdev_throttle_init(FsThrottle *); + +void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool , + struct iovec *, int); + +void fsdev_throttle_cleanup(FsThrottle *); +#endif /* _FSDEV_THROTTLE_H */ @@ -2045,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) const char* device = qdict_get_str(qdict, "device"); const char* command = qdict_get_str(qdict, "command"); Error *err = NULL; + int ret; blk = blk_by_name(device); if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - blk = local_blk = blk_new(); - blk_insert_bs(blk, bs); + blk = local_blk = blk_new(0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); + if (ret < 0) { + goto fail; + } } else { goto fail; } @@ -2060,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) aio_context = blk_get_aio_context(blk); aio_context_acquire(aio_context); + /* + * Notably absent: Proper permission management. This is sad, but it seems + * almost impossible to achieve without changing the semantics and thereby + * limiting the use cases of the qemu-io HMP command. + * + * In an ideal world we would unconditionally create a new BlockBackend for + * qemuio_command(), but we have commands like 'reopen' and want them to + * take effect on the exact BlockBackend whose name the user passed instead + * of just on a temporary copy of it. + * + * Another problem is that deleting the temporary BlockBackend involves + * draining all requests on it first, but some qemu-iotests cases want to + * issue multiple aio_read/write requests and expect them to complete in + * the background while the monitor has already returned. + * + * This is also what prevents us from saving the original permissions and + * restoring them later: We can't revoke permissions until all requests + * have completed, and we don't know when that is nor can we really let + * anything else run before we have revoken them to avoid race conditions. + * + * What happens now is that command() in qemu-io-cmds.c can extend the + * permissions if necessary for the qemu-io command. And they simply stay + * extended, possibly resulting in a read-only guest device keeping write + * permissions. Ugly, but it appears to be the lesser evil. + */ qemuio_command(blk, command); aio_context_release(aio_context); diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 7de07e1ba6..f22a3c3654 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -13,7 +13,9 @@ #include "qemu/osdep.h" #include "9p.h" +#include "9p-local.h" #include "9p-xattr.h" +#include "9p-util.h" #include "fsdev/qemu-fsdev.h" /* local_ops */ #include <arpa/inet.h> #include <pwd.h> @@ -43,40 +45,62 @@ #define BTRFS_SUPER_MAGIC 0x9123683E #endif -#define VIRTFS_META_DIR ".virtfs_metadata" +typedef struct { + int mountfd; +} LocalData; -static char *local_mapped_attr_path(FsContext *ctx, const char *path) +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode) { - int dirlen; - const char *name = strrchr(path, '/'); - if (name) { - dirlen = name - path; - ++name; - } else { - name = path; - dirlen = 0; + LocalData *data = fs_ctx->private; + + /* All paths are relative to the path data->mountfd points to */ + while (*path == '/') { + path++; } - return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root, - dirlen, path, VIRTFS_META_DIR, name); + + return relative_openat_nofollow(data->mountfd, path, flags, mode); +} + +int local_opendir_nofollow(FsContext *fs_ctx, const char *path) +{ + return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0); +} + +static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd, + const char *npath) +{ + int serrno = errno; + renameat(odirfd, opath, ndirfd, npath); + errno = serrno; } -static FILE *local_fopen(const char *path, const char *mode) +static void unlinkat_preserve_errno(int dirfd, const char *path, int flags) +{ + int serrno = errno; + unlinkat(dirfd, path, flags); + errno = serrno; +} + +#define VIRTFS_META_DIR ".virtfs_metadata" + +static FILE *local_fopenat(int dirfd, const char *name, const char *mode) { int fd, o_mode = 0; FILE *fp; - int flags = O_NOFOLLOW; + int flags; /* * only supports two modes */ if (mode[0] == 'r') { - flags |= O_RDONLY; + flags = O_RDONLY; } else if (mode[0] == 'w') { - flags |= O_WRONLY | O_TRUNC | O_CREAT; + flags = O_WRONLY | O_TRUNC | O_CREAT; o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; } else { return NULL; } - fd = open(path, flags, o_mode); + fd = openat_file(dirfd, name, flags, o_mode); if (fd == -1) { return NULL; } @@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode) } #define ATTR_MAX 100 -static void local_mapped_file_attr(FsContext *ctx, const char *path, +static void local_mapped_file_attr(int dirfd, const char *name, struct stat *stbuf) { FILE *fp; char buf[ATTR_MAX]; - char *attr_path; + int map_dirfd; - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); - g_free(attr_path); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return; + } + + fp = local_fopenat(map_dirfd, name, "r"); + close_preserve_errno(map_dirfd); if (!fp) { return; } @@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path, static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) { - int err; - char *buffer; - char *path = fs_path->data; + int err = -1; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; - buffer = rpath(fs_ctx, path); - err = lstat(buffer, stbuf); + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW); if (err) { goto err_out; } @@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf) gid_t tmp_gid; mode_t tmp_mode; dev_t tmp_dev; - if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { + + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } - if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } - if (getxattr(buffer, "user.virtfs.mode", - &tmp_mode, sizeof(mode_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } - if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { + if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev, + sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - local_mapped_file_attr(fs_ctx, path, stbuf); + local_mapped_file_attr(dirfd, name, stbuf); } err_out: - g_free(buffer); - return err; -} - -static int local_create_mapped_attr_dir(FsContext *ctx, const char *path) -{ - int err; - char *attr_dir; - char *tmp_path = g_strdup(path); - - attr_dir = g_strdup_printf("%s/%s/%s", - ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR); - - err = mkdir(attr_dir, 0700); - if (err < 0 && errno == EEXIST) { - err = 0; - } - g_free(attr_dir); - g_free(tmp_path); + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } -static int local_set_mapped_file_attr(FsContext *ctx, - const char *path, FsCred *credp) +static int local_set_mapped_file_attrat(int dirfd, const char *name, + FsCred *credp) { FILE *fp; - int ret = 0; + int ret; char buf[ATTR_MAX]; - char *attr_path; int uid = -1, gid = -1, mode = -1, rdev = -1; + int map_dirfd; + + ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + return -1; + } - attr_path = local_mapped_attr_path(ctx, path); - fp = local_fopen(attr_path, "r"); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return -1; + } + + fp = local_fopenat(map_dirfd, name, "r"); if (!fp) { - goto create_map_file; + if (errno == ENOENT) { + goto update_map_file; + } else { + close_preserve_errno(map_dirfd); + return -1; + } } memset(buf, 0, ATTR_MAX); while (fgets(buf, ATTR_MAX, fp)) { if (!strncmp(buf, "virtfs.uid", 10)) { - uid = atoi(buf+11); + uid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.gid", 10)) { - gid = atoi(buf+11); + gid = atoi(buf + 11); } else if (!strncmp(buf, "virtfs.mode", 11)) { - mode = atoi(buf+12); + mode = atoi(buf + 12); } else if (!strncmp(buf, "virtfs.rdev", 11)) { - rdev = atoi(buf+12); + rdev = atoi(buf + 12); } memset(buf, 0, ATTR_MAX); } fclose(fp); - goto update_map_file; - -create_map_file: - ret = local_create_mapped_attr_dir(ctx, path); - if (ret < 0) { - goto err_out; - } update_map_file: - fp = local_fopen(attr_path, "w"); + fp = local_fopenat(map_dirfd, name, "w"); + close_preserve_errno(map_dirfd); if (!fp) { - ret = -1; - goto err_out; + return -1; } if (credp->fc_uid != -1) { @@ -230,7 +259,6 @@ update_map_file: rdev = credp->fc_rdev; } - if (uid != -1) { fprintf(fp, "virtfs.uid=%d\n", uid); } @@ -245,39 +273,71 @@ update_map_file: } fclose(fp); -err_out: - g_free(attr_path); + return 0; +} + +static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode) +{ + int fd, ret; + + /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW). + * Unfortunately, the linux kernel doesn't implement it yet. As an + * alternative, let's open the file and use fchmod() instead. This + * may fail depending on the permissions of the file, but it is the + * best we can do to avoid TOCTTOU. We first try to open read-only + * in case name points to a directory. If that fails, we try write-only + * in case name doesn't point to a directory. + */ + fd = openat_file(dirfd, name, O_RDONLY, 0); + if (fd == -1) { + /* In case the file is writable-only and isn't a directory. */ + if (errno == EACCES) { + fd = openat_file(dirfd, name, O_WRONLY, 0); + } + if (fd == -1 && errno == EISDIR) { + errno = EACCES; + } + } + if (fd == -1) { + return -1; + } + ret = fchmod(fd, mode); + close_preserve_errno(fd); return ret; } -static int local_set_xattr(const char *path, FsCred *credp) +static int local_set_xattrat(int dirfd, const char *path, FsCred *credp) { int err; if (credp->fc_uid != -1) { uint32_t tmp_uid = cpu_to_le32(credp->fc_uid); - err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid, + sizeof(uid_t), 0); if (err) { return err; } } if (credp->fc_gid != -1) { uint32_t tmp_gid = cpu_to_le32(credp->fc_gid); - err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid, + sizeof(gid_t), 0); if (err) { return err; } } if (credp->fc_mode != -1) { uint32_t tmp_mode = cpu_to_le32(credp->fc_mode); - err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode, + sizeof(mode_t), 0); if (err) { return err; } } if (credp->fc_rdev != -1) { uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev); - err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0); + err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev, + sizeof(dev_t), 0); if (err) { return err; } @@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp) return 0; } -static int local_post_create_passthrough(FsContext *fs_ctx, const char *path, - FsCred *credp) +static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd, + const char *name, FsCred *credp) { - char *buffer; - - buffer = rpath(fs_ctx, path); - if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) { + if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - goto err; + return -1; } } - if (chmod(buffer, credp->fc_mode & 07777) < 0) { - goto err; - } - - g_free(buffer); - return 0; -err: - g_free(buffer); - return -1; + return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777); } static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path, char *buf, size_t bufsz) { ssize_t tsize = -1; - char *buffer; - char *path = fs_path->data; if ((fs_ctx->export_flags & V9FS_SM_MAPPED) || (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) { int fd; - buffer = rpath(fs_ctx, path); - fd = open(buffer, O_RDONLY | O_NOFOLLOW); - g_free(buffer); + + fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0); if (fd == -1) { return -1; } do { tsize = read(fd, (void *)buf, bufsz); } while (tsize == -1 && errno == EINTR); - close(fd); + close_preserve_errno(fd); } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - tsize = readlink(buffer, buf, bufsz); - g_free(buffer); + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + tsize = readlinkat(dirfd, name, buf, bufsz); + close_preserve_errno(dirfd); + out: + g_free(name); + g_free(dirpath); } return tsize; } @@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs) static int local_open(FsContext *ctx, V9fsPath *fs_path, int flags, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int fd; - buffer = rpath(ctx, path); - fs->fd = open(buffer, flags | O_NOFOLLOW); - g_free(buffer); + fd = local_open_nofollow(ctx, fs_path->data, flags, 0); + if (fd == -1) { + return -1; + } + fs->fd = fd; return fs->fd; } static int local_opendir(FsContext *ctx, V9fsPath *fs_path, V9fsFidOpenState *fs) { - char *buffer; - char *path = fs_path->data; + int dirfd; + DIR *stream; + + dirfd = local_opendir_nofollow(ctx, fs_path->data); + if (dirfd == -1) { + return -1; + } - buffer = rpath(ctx, path); - fs->dir.stream = opendir(buffer); - g_free(buffer); - if (!fs->dir.stream) { + stream = fdopendir(dirfd); + if (!stream) { return -1; } + fs->dir.stream = stream; return 0; } @@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs, static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = chmod(buffer, credp->fc_mode); - g_free(buffer); + ret = local_set_mapped_file_attrat(dirfd, name, credp); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + ret = fchmodat_nofollow(dirfd, name, credp->fc_mode); } + close_preserve_errno(dirfd); + +out: + g_free(dirpath); + g_free(name); return ret; } static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0); if (err == -1) { goto out; } - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0); - if (err == -1) { - goto out; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mknod(buffer, credp->fc_mode, credp->fc_rdev); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, FsCred *credp) { - char *path; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } - /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS); if (err == -1) { goto out; } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS); - if (err == -1) { - goto out; + credp->fc_mode = credp->fc_mode | S_IFDIR; + + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - credp->fc_mode = credp->fc_mode|S_IFDIR; - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - err = mkdir(buffer, credp->fc_mode); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = mkdirat(dirfd, name, credp->fc_mode); if (err == -1) { goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, int flags, FsCred *credp, V9fsFidOpenState *fs) { - char *path; int fd = -1; int err = -1; - int serrno = 0; - V9fsString fullname; - char *buffer = NULL; + int dirfd; /* * Mark all the open to not follow symlinks */ flags |= O_NOFOLLOW; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - path = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { + fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set cleint credentials in xattr */ - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + /* Set cleint credentials in xattr */ + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; - } - credp->fc_mode = credp->fc_mode|S_IFREG; - /* Set client credentials in .virtfs_metadata directory files */ - err = local_set_mapped_file_attr(fs_ctx, path, credp); if (err == -1) { - serrno = errno; goto err_end; } } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - fd = open(buffer, flags, credp->fc_mode); + fd = openat_file(dirfd, name, flags, credp->fc_mode); if (fd == -1) { - err = fd; goto out; } - err = local_post_create_passthrough(fs_ctx, path, credp); + err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp); if (err == -1) { - serrno = errno; goto err_end; } } @@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name, goto out; err_end: - close(fd); - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, + flags & O_DIRECTORY ? AT_REMOVEDIR : 0); + close_preserve_errno(fd); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } @@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, V9fsPath *dir_path, const char *name, FsCred *credp) { int err = -1; - int serrno = 0; - char *newpath; - V9fsString fullname; - char *buffer = NULL; + int dirfd; - v9fs_string_init(&fullname); - v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name); - newpath = fullname.data; + dirfd = local_opendir_nofollow(fs_ctx, dir_path->data); + if (dirfd == -1) { + return -1; + } /* Determine the security model */ - if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + if (fs_ctx->export_flags & V9FS_SM_MAPPED || + fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { int fd; ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); + + fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR, + SM_LOCAL_MODE_BITS); if (fd == -1) { - err = fd; goto out; } /* Write the oldpath (target) to the file. */ @@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath, do { write_size = write(fd, (void *)oldpath, oldpath_size); } while (write_size == -1 && errno == EINTR); + close_preserve_errno(fd); if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; goto err_end; } - close(fd); /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_xattr(buffer, credp); - if (err == -1) { - serrno = errno; - goto err_end; - } - } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - int fd; - ssize_t oldpath_size, write_size; - buffer = rpath(fs_ctx, newpath); - fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS); - if (fd == -1) { - err = fd; - goto out; - } - /* Write the oldpath (target) to the file. */ - oldpath_size = strlen(oldpath); - do { - write_size = write(fd, (void *)oldpath, oldpath_size); - } while (write_size == -1 && errno == EINTR); + credp->fc_mode = credp->fc_mode | S_IFLNK; - if (write_size != oldpath_size) { - serrno = errno; - close(fd); - err = -1; - goto err_end; + if (fs_ctx->export_flags & V9FS_SM_MAPPED) { + err = local_set_xattrat(dirfd, name, credp); + } else { + err = local_set_mapped_file_attrat(dirfd, name, credp); } - close(fd); - /* Set cleint credentials in symlink's xattr */ - credp->fc_mode = credp->fc_mode|S_IFLNK; - err = local_set_mapped_file_attr(fs_ctx, newpath, credp); if (err == -1) { - serrno = errno; goto err_end; } - } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || - (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, newpath); - err = symlink(oldpath, buffer); + } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || + fs_ctx->export_flags & V9FS_SM_NONE) { + err = symlinkat(oldpath, dirfd, name); if (err) { goto out; } - err = lchown(buffer, credp->fc_uid, credp->fc_gid); + err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); if (err == -1) { /* * If we fail to change ownership and if we are * using security model none. Ignore the error */ if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) { - serrno = errno; goto err_end; - } else + } else { err = 0; + } } } goto out; err_end: - remove(buffer); - errno = serrno; + unlinkat_preserve_errno(dirfd, name, 0); out: - g_free(buffer); - v9fs_string_free(&fullname); + close_preserve_errno(dirfd); return err; } static int local_link(FsContext *ctx, V9fsPath *oldpath, V9fsPath *dirpath, const char *name) { - int ret; - V9fsString newpath; - char *buffer, *buffer1; + char *odirpath = g_path_get_dirname(oldpath->data); + char *oname = g_path_get_basename(oldpath->data); + int ret = -1; + int odirfd, ndirfd; - v9fs_string_init(&newpath); - v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name); + odirfd = local_opendir_nofollow(ctx, odirpath); + if (odirfd == -1) { + goto out; + } + + ndirfd = local_opendir_nofollow(ctx, dirpath->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + goto out; + } - buffer = rpath(ctx, oldpath->data); - buffer1 = rpath(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + ret = linkat(odirfd, oname, ndirfd, name, 0); + if (ret < 0) { + goto out_close; + } /* now link the virtfs_metadata files */ - if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) { - /* Link the .virtfs_metadata files. Create the metada directory */ - ret = local_create_mapped_attr_dir(ctx, newpath.data); - if (ret < 0) { - goto err_out; + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; + + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_link; + } + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; } - buffer = local_mapped_attr_path(ctx, oldpath->data); - buffer1 = local_mapped_attr_path(ctx, newpath.data); - ret = link(buffer, buffer1); - g_free(buffer); - g_free(buffer1); + + ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); if (ret < 0 && errno != ENOENT) { - goto err_out; + goto err_undo_link; } - } -err_out: - v9fs_string_free(&newpath); - return ret; -} -static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) -{ - char *buffer; - int ret; - char *path = fs_path->data; + ret = 0; + } + goto out_close; - buffer = rpath(ctx, path); - ret = truncate(buffer, size); - g_free(buffer); +err: + ret = -1; +err_undo_link: + unlinkat_preserve_errno(ndirfd, name, 0); +out_close: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); +out: + g_free(oname); + g_free(odirpath); return ret; } -static int local_rename(FsContext *ctx, const char *oldpath, - const char *newpath) +static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size) { - int err; - char *buffer, *buffer1; + int fd, ret; - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = local_create_mapped_attr_dir(ctx, newpath); - if (err < 0) { - return err; - } - /* rename the .virtfs_metadata files */ - buffer = local_mapped_attr_path(ctx, oldpath); - buffer1 = local_mapped_attr_path(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - if (err < 0 && errno != ENOENT) { - return err; - } + fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0); + if (fd == -1) { + return -1; } - - buffer = rpath(ctx, oldpath); - buffer1 = rpath(ctx, newpath); - err = rename(buffer, buffer1); - g_free(buffer); - g_free(buffer1); - return err; + ret = ftruncate(fd, size); + close_preserve_errno(fd); + return ret; } static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp) { - char *buffer; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); int ret = -1; - char *path = fs_path->data; + int dirfd; + + dirfd = local_opendir_nofollow(fs_ctx, dirpath); + if (dirfd == -1) { + goto out; + } if ((credp->fc_uid == -1 && credp->fc_gid == -1) || (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) || (fs_ctx->export_flags & V9FS_SM_NONE)) { - buffer = rpath(fs_ctx, path); - ret = lchown(buffer, credp->fc_uid, credp->fc_gid); - g_free(buffer); + ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid, + AT_SYMLINK_NOFOLLOW); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) { - buffer = rpath(fs_ctx, path); - ret = local_set_xattr(buffer, credp); - g_free(buffer); + ret = local_set_xattrat(dirfd, name, credp); } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - return local_set_mapped_file_attr(fs_ctx, path, credp); + ret = local_set_mapped_file_attrat(dirfd, name, credp); } + + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return ret; } static int local_utimensat(FsContext *s, V9fsPath *fs_path, const struct timespec *buf) { - char *buffer; - int ret; - char *path = fs_path->data; + char *dirpath = g_path_get_dirname(fs_path->data); + char *name = g_path_get_basename(fs_path->data); + int dirfd, ret = -1; + + dirfd = local_opendir_nofollow(s, dirpath); + if (dirfd == -1) { + goto out; + } - buffer = rpath(s, path); - ret = qemu_utimens(buffer, buf); - g_free(buffer); + ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(name); return ret; } -static int local_remove(FsContext *ctx, const char *path) +static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, + int flags) { - int err; - struct stat stbuf; - char *buffer; + int ret = -1; if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - buffer = rpath(ctx, path); - err = lstat(buffer, &stbuf); - g_free(buffer); - if (err) { - goto err_out; - } - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - if (S_ISDIR(stbuf.st_mode)) { - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - path, VIRTFS_META_DIR); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + int map_dirfd; + + if (flags == AT_REMOVEDIR) { + int fd; + + fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH); + if (fd == -1) { + goto err_out; + } + /* + * If directory remove .virtfs_metadata contained in the + * directory + */ + ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); + close_preserve_errno(fd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path) } /* * Now remove the name from parent directory - * .virtfs_metadata directory + * .virtfs_metadata directory. */ - buffer = local_mapped_attr_path(ctx, path); - err = remove(buffer); - g_free(buffer); - if (err < 0 && errno != ENOENT) { + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + ret = unlinkat(map_dirfd, name, 0); + close_preserve_errno(map_dirfd); + if (ret < 0 && errno != ENOENT) { /* * We didn't had the .virtfs_metadata file. May be file created * in non-mapped mode ?. Ignore ENOENT. @@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path) } } - buffer = rpath(ctx, path); - err = remove(buffer); - g_free(buffer); + ret = unlinkat(dirfd, name, flags); +err_out: + return ret; +} + +static int local_remove(FsContext *ctx, const char *path) +{ + struct stat stbuf; + char *dirpath = g_path_get_dirname(path); + char *name = g_path_get_basename(path); + int flags = 0; + int dirfd; + int err = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd) { + goto out; + } + + if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) { + goto err_out; + } + + if (S_ISDIR(stbuf.st_mode)) { + flags |= AT_REMOVEDIR; + } + + err = local_unlinkat_common(ctx, dirfd, name, flags); err_out: + close_preserve_errno(dirfd); +out: + g_free(name); + g_free(dirpath); return err; } @@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type, static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf) { - char *buffer; - int ret; - char *path = fs_path->data; + int fd, ret; - buffer = rpath(s, path); - ret = statfs(buffer, stbuf); - g_free(buffer); + fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0); + ret = fstatfs(fd, stbuf); + close_preserve_errno(fd); return ret; } @@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir, const char *new_name) { int ret; - V9fsString old_full_name, new_full_name; + int odirfd, ndirfd; + + odirfd = local_opendir_nofollow(ctx, olddir->data); + if (odirfd == -1) { + return -1; + } + + ndirfd = local_opendir_nofollow(ctx, newdir->data); + if (ndirfd == -1) { + close_preserve_errno(odirfd); + return -1; + } + + ret = renameat(odirfd, old_name, ndirfd, new_name); + if (ret < 0) { + goto out; + } + + if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { + int omap_dirfd, nmap_dirfd; - v9fs_string_init(&old_full_name); - v9fs_string_init(&new_full_name); + ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + goto err_undo_rename; + } + + omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR); + if (omap_dirfd == -1) { + goto err; + } + + nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR); + if (nmap_dirfd == -1) { + close_preserve_errno(omap_dirfd); + goto err; + } - v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name); - v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name); + /* rename the .virtfs_metadata files */ + ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name); + close_preserve_errno(nmap_dirfd); + close_preserve_errno(omap_dirfd); + if (ret < 0 && errno != ENOENT) { + goto err_undo_rename; + } - ret = local_rename(ctx, old_full_name.data, new_full_name.data); - v9fs_string_free(&old_full_name); - v9fs_string_free(&new_full_name); + ret = 0; + } + goto out; + +err: + ret = -1; +err_undo_rename: + renameat_preserve_errno(ndirfd, new_name, odirfd, old_name); +out: + close_preserve_errno(ndirfd); + close_preserve_errno(odirfd); return ret; } +static void v9fs_path_init_dirname(V9fsPath *path, const char *str) +{ + path->data = g_path_get_dirname(str); + path->size = strlen(path->data) + 1; +} + +static int local_rename(FsContext *ctx, const char *oldpath, + const char *newpath) +{ + int err; + char *oname = g_path_get_basename(oldpath); + char *nname = g_path_get_basename(newpath); + V9fsPath olddir, newdir; + + v9fs_path_init_dirname(&olddir, oldpath); + v9fs_path_init_dirname(&newdir, newpath); + + err = local_renameat(ctx, &olddir, oname, &newdir, nname); + + v9fs_path_free(&newdir); + v9fs_path_free(&olddir); + g_free(nname); + g_free(oname); + + return err; +} + static int local_unlinkat(FsContext *ctx, V9fsPath *dir, const char *name, int flags) { int ret; - V9fsString fullname; - char *buffer; + int dirfd; - v9fs_string_init(&fullname); - - v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name); - if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { - if (flags == AT_REMOVEDIR) { - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ - buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root, - fullname.data, VIRTFS_META_DIR); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } - } - /* - * Now remove the name from parent directory - * .virtfs_metadata directory. - */ - buffer = local_mapped_attr_path(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ - goto err_out; - } + dirfd = local_opendir_nofollow(ctx, dir->data); + if (dirfd == -1) { + return -1; } - /* Remove the name finally */ - buffer = rpath(ctx, fullname.data); - ret = remove(buffer); - g_free(buffer); -err_out: - v9fs_string_free(&fullname); + ret = local_unlinkat_common(ctx, dirfd, name, flags); + close_preserve_errno(dirfd); return ret; } @@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path, static int local_init(FsContext *ctx) { - int err = 0; struct statfs stbuf; + LocalData *data = g_malloc(sizeof(*data)); + + data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY); + if (data->mountfd == -1) { + goto err; + } + +#ifdef FS_IOC_GETVERSION + /* + * use ioc_getversion only if the ioctl is definied + */ + if (fstatfs(data->mountfd, &stbuf) < 0) { + close_preserve_errno(data->mountfd); + goto err; + } + switch (stbuf.f_type) { + case EXT2_SUPER_MAGIC: + case BTRFS_SUPER_MAGIC: + case REISERFS_SUPER_MAGIC: + case XFS_SUPER_MAGIC: + ctx->exops.get_st_gen = local_ioc_getversion; + break; + } +#endif if (ctx->export_flags & V9FS_SM_PASSTHROUGH) { ctx->xops = passthrough_xattr_ops; @@ -1185,29 +1277,28 @@ static int local_init(FsContext *ctx) ctx->xops = passthrough_xattr_ops; } ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT; -#ifdef FS_IOC_GETVERSION - /* - * use ioc_getversion only if the iocl is definied - */ - err = statfs(ctx->fs_root, &stbuf); - if (!err) { - switch (stbuf.f_type) { - case EXT2_SUPER_MAGIC: - case BTRFS_SUPER_MAGIC: - case REISERFS_SUPER_MAGIC: - case XFS_SUPER_MAGIC: - ctx->exops.get_st_gen = local_ioc_getversion; - break; - } - } -#endif - return err; + + ctx->private = data; + return 0; + +err: + g_free(data); + return -1; +} + +static void local_cleanup(FsContext *ctx) +{ + LocalData *data = ctx->private; + + close(data->mountfd); + g_free(data); } static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) { const char *sec_model = qemu_opt_get(opts, "security_model"); const char *path = qemu_opt_get(opts, "path"); + Error *err = NULL; if (!sec_model) { error_report("Security model not specified, local fs needs security model"); @@ -1236,6 +1327,13 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) error_report("fsdev: No path specified"); return -1; } + + fsdev_throttle_parse_opts(opts, &fse->fst, &err); + if (err) { + error_reportf_err(err, "Throttle configuration is not valid: "); + return -1; + } + fse->path = g_strdup(path); return 0; @@ -1244,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse) FileOperations local_ops = { .parse_opts = local_parse_opts, .init = local_init, + .cleanup = local_cleanup, .lstat = local_lstat, .readlink = local_readlink, .close = local_close, diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h new file mode 100644 index 0000000000..32c72749d9 --- /dev/null +++ b/hw/9pfs/9p-local.h @@ -0,0 +1,20 @@ +/* + * 9p local backend utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_LOCAL_H +#define QEMU_9P_LOCAL_H + +int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, + mode_t mode); +int local_opendir_nofollow(FsContext *fs_ctx, const char *path); + +#endif diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c index ec003181cd..bbf89064f7 100644 --- a/hw/9pfs/9p-posix-acl.c +++ b/hw/9pfs/9p-posix-acl.c @@ -25,13 +25,7 @@ static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size); } static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, @@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path, static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size, + flags); } static int mp_pacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_ACCESS); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size); } static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, @@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path, static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size, + flags); } static int mp_dacl_removexattr(FsContext *ctx, const char *path, const char *name) { int ret; - char *buffer; - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, MAP_ACL_DEFAULT); + ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT); if (ret == -1 && errno == ENODATA) { /* * We don't get ENODATA error when trying to remove a @@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx, errno = 0; ret = 0; } - g_free(buffer); return ret; } diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c new file mode 100644 index 0000000000..fdb4d57376 --- /dev/null +++ b/hw/9pfs/9p-util.c @@ -0,0 +1,69 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "9p-util.h" + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode) +{ + int fd; + + fd = dup(dirfd); + if (fd == -1) { + return -1; + } + + while (*path) { + const char *c; + int next_fd; + char *head; + + /* Only relative paths without consecutive slashes */ + assert(path[0] != '/'); + + head = g_strdup(path); + c = strchr(path, '/'); + if (c) { + head[c - path] = 0; + next_fd = openat_dir(fd, head); + } else { + next_fd = openat_file(fd, head, flags, mode); + } + g_free(head); + if (next_fd == -1) { + close_preserve_errno(fd); + return -1; + } + close(fd); + fd = next_fd; + + if (!c) { + break; + } + path = c + 1; + } + + return fd; +} + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lgetxattr(proc_path, name, value, size); + g_free(proc_path); + return ret; +} diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h new file mode 100644 index 0000000000..091f3ce88e --- /dev/null +++ b/hw/9pfs/9p-util.h @@ -0,0 +1,54 @@ +/* + * 9p utilities + * + * Copyright IBM, Corp. 2017 + * + * Authors: + * Greg Kurz <groug@kaod.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_9P_UTIL_H +#define QEMU_9P_UTIL_H + +static inline void close_preserve_errno(int fd) +{ + int serrno = errno; + close(fd); + errno = serrno; +} + +static inline int openat_dir(int dirfd, const char *name) +{ + return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH); +} + +static inline int openat_file(int dirfd, const char *name, int flags, + mode_t mode) +{ + int fd, serrno, ret; + + fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, + mode); + if (fd == -1) { + return -1; + } + + serrno = errno; + /* O_NONBLOCK was only needed to open the file. Let's drop it. */ + ret = fcntl(fd, F_SETFL, flags); + assert(!ret); + errno = serrno; + return fd; +} + +int relative_openat_nofollow(int dirfd, const char *path, int flags, + mode_t mode); +ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size); +int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, + void *value, size_t size, int flags); + +#endif diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c index f87530c8b5..2c90817b75 100644 --- a/hw/9pfs/9p-xattr-user.c +++ b/hw/9pfs/9p-xattr-user.c @@ -20,9 +20,6 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size) { - char *buffer; - ssize_t ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path, errno = ENOATTR; return -1; } - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; + return local_getxattr_nofollow(ctx, path, name, value, size); } static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, @@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path, static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; + return local_setxattr_nofollow(ctx, path, name, value, size, flags); } static int mp_user_removexattr(FsContext *ctx, const char *path, const char *name) { - char *buffer; - int ret; - if (strncmp(name, "user.virtfs.", 12) == 0) { /* * Don't allow fetch of user.virtfs namesapce @@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx, errno = EACCES; return -1; } - buffer = rpath(ctx, path); - ret = lremovexattr(buffer, name); - g_free(buffer); - return ret; + return local_removexattr_nofollow(ctx, path, name); } XattrOperations mapped_user_xattr = { diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index 5d8595ed93..eec160b3c2 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -15,6 +15,8 @@ #include "9p.h" #include "fsdev/file-op-9p.h" #include "9p-xattr.h" +#include "9p-util.h" +#include "9p-local.h" static XattrOperations *get_xattr_operations(XattrOperations **h, @@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path, return name_size; } +static ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = llistxattr(proc_path, list, size); + g_free(proc_path); + return ret; +} /* * Get the list and pass to each layer to find out whether @@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, size_t vsize) { ssize_t size = 0; - char *buffer; void *ovalue = value; XattrOperations *xops; char *orig_value, *orig_value_start; ssize_t xattr_len, parsed_len = 0, attr_len; + char *dirpath, *name; + int dirfd; /* Get the actual len */ - buffer = rpath(ctx, path); - xattr_len = llistxattr(buffer, value, 0); + dirpath = g_path_get_dirname(path); + dirfd = local_opendir_nofollow(ctx, dirpath); + g_free(dirpath); + if (dirfd == -1) { + return -1; + } + + name = g_path_get_basename(path); + xattr_len = flistxattrat_nofollow(dirfd, name, value, 0); if (xattr_len <= 0) { - g_free(buffer); + g_free(name); + close_preserve_errno(dirfd); return xattr_len; } /* Now fetch the xattr and find the actual size */ orig_value = g_malloc(xattr_len); - xattr_len = llistxattr(buffer, orig_value, xattr_len); - g_free(buffer); + xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len); + g_free(name); + close_preserve_errno(dirfd); + if (xattr_len < 0) { + return -1; + } /* store the orig pointer */ orig_value_start = orig_value; @@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx, } +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fgetxattrat_nofollow(dirfd, filename, name, value, size); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + return local_getxattr_nofollow(ctx, path, name, value, size); +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lsetxattr(proc_path, name, value, size, flags); + g_free(proc_path); + return ret; +} + +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags) +{ + return local_setxattr_nofollow(ctx, path, name, value, size, flags); +} + +static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, + const char *name) +{ + char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename); + int ret; + + ret = lremovexattr(proc_path, name); + g_free(proc_path); + return ret; +} + +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name) +{ + char *dirpath = g_path_get_dirname(path); + char *filename = g_path_get_basename(path); + int dirfd; + ssize_t ret = -1; + + dirfd = local_opendir_nofollow(ctx, dirpath); + if (dirfd == -1) { + goto out; + } + + ret = fremovexattrat_nofollow(dirfd, filename, name); + close_preserve_errno(dirfd); +out: + g_free(dirpath); + g_free(filename); + return ret; +} + +int pt_removexattr(FsContext *ctx, const char *path, const char *name) +{ + return local_removexattr_nofollow(ctx, path, name); +} + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size) +{ + errno = ENOTSUP; + return -1; +} + +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags) +{ + errno = ENOTSUP; + return -1; +} + +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size) +{ + return 0; +} + +int notsup_removexattr(FsContext *ctx, const char *path, const char *name) +{ + errno = ENOTSUP; + return -1; +} + XattrOperations *mapped_xattr_ops[] = { &mapped_user_xattr, &mapped_pacl_xattr, diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h index a853ea641c..0d83996575 100644 --- a/hw/9pfs/9p-xattr.h +++ b/hw/9pfs/9p-xattr.h @@ -29,6 +29,13 @@ typedef struct xattr_operations const char *path, const char *name); } XattrOperations; +ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size); +ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path, + const char *name, void *value, size_t size, + int flags); +ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path, + const char *name); extern XattrOperations mapped_user_xattr; extern XattrOperations passthrough_user_xattr; @@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value, int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name, void *value, size_t size, int flags); int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name); + ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value, size_t size); - -static inline ssize_t pt_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, size_t size) -{ - char *buffer; - ssize_t ret; - - buffer = rpath(ctx, path); - ret = lgetxattr(buffer, name, value, size); - g_free(buffer); - return ret; -} - -static inline int pt_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lsetxattr(buffer, name, value, size, flags); - g_free(buffer); - return ret; -} - -static inline int pt_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - char *buffer; - int ret; - - buffer = rpath(ctx, path); - ret = lremovexattr(path, name); - g_free(buffer); - return ret; -} - -static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size) -{ - errno = ENOTSUP; - return -1; -} - -static inline int notsup_setxattr(FsContext *ctx, const char *path, - const char *name, void *value, - size_t size, int flags) -{ - errno = ENOTSUP; - return -1; -} - -static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path, - char *name, void *value, size_t size) -{ - return 0; -} - -static inline int notsup_removexattr(FsContext *ctx, - const char *path, const char *name) -{ - errno = ENOTSUP; - return -1; -} +ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value, + size_t size, int flags); +int pt_removexattr(FsContext *ctx, const char *path, const char *name); + +ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size); +int notsup_setxattr(FsContext *ctx, const char *path, const char *name, + void *value, size_t size, int flags); +ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name, + void *value, size_t size); +int notsup_removexattr(FsContext *ctx, const char *path, const char *name); #endif diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 3af1c93dc8..76c9247c77 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -3010,7 +3010,6 @@ out_nofid: */ static void coroutine_fn v9fs_lock(void *opaque) { - int8_t status; V9fsFlock flock; size_t offset = 7; struct stat stbuf; @@ -3018,7 +3017,6 @@ static void coroutine_fn v9fs_lock(void *opaque) int32_t fid, err = 0; V9fsPDU *pdu = opaque; - status = P9_LOCK_ERROR; v9fs_string_init(&flock.client_id); err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type, &flock.flags, &flock.start, &flock.length, @@ -3044,15 +3042,15 @@ static void coroutine_fn v9fs_lock(void *opaque) if (err < 0) { goto out; } - status = P9_LOCK_SUCCESS; + err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS); + if (err < 0) { + goto out; + } + err += offset; + trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS); out: put_fid(pdu, fidp); out_nofid: - err = pdu_marshal(pdu, offset, "b", status); - if (err > 0) { - err += offset; - } - trace_v9fs_lock_return(pdu->tag, pdu->id, status); pdu_complete(pdu, err); v9fs_string_free(&flock.client_id); } @@ -3531,6 +3529,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp) error_setg(errp, "share path %s is not a directory", fse->path); goto out; } + + s->ctx.fst = &fse->fst; + fsdev_throttle_init(s->ctx.fst); + v9fs_path_free(&path); rc = 0; @@ -3551,6 +3553,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp) if (s->ops->cleanup) { s->ops->cleanup(&s->ctx); } + fsdev_throttle_cleanup(s->ctx.fst); g_free(s->tag); g_free(s->ctx.fs_root); } diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs index da0ae0cfdb..32197e6671 100644 --- a/hw/9pfs/Makefile.objs +++ b/hw/9pfs/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y = 9p.o +common-obj-y = 9p.o 9p-util.o common-obj-y += 9p-local.o 9p-xattr.o common-obj-y += 9p-xattr-user.o 9p-posix-acl.o common-obj-y += coth.o cofs.o codir.o cofile.o diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c index 120e267108..88791bc327 100644 --- a/hw/9pfs/cofile.c +++ b/hw/9pfs/cofile.c @@ -247,6 +247,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp, if (v9fs_request_cancelled(pdu)) { return -EINTR; } + fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt); v9fs_co_run_in_worker( { err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset); @@ -266,6 +267,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp, if (v9fs_request_cancelled(pdu)) { return -EINTR; } + fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt); v9fs_co_run_in_worker( { err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset); diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c index 8ce7daf23a..b4adac88cd 100644 --- a/hw/acpi/tco.c +++ b/hw/acpi/tco.c @@ -49,6 +49,7 @@ static inline void tco_timer_reload(TCOIORegs *tr) static inline void tco_timer_stop(TCOIORegs *tr) { tr->expire_time = -1; + timer_del(tr->tco_timer); } static void tco_timer_expired(void *opaque) diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c index 0c9ca7bfa0..c8a11f2b53 100644 --- a/hw/arm/armv7m.c +++ b/hw/arm/armv7m.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" +#include "hw/arm/armv7m.h" #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" @@ -17,147 +18,260 @@ #include "elf.h" #include "sysemu/qtest.h" #include "qemu/error-report.h" +#include "exec/address-spaces.h" /* Bitbanded IO. Each word corresponds to a single bit. */ /* Get the byte address of the real memory for a bitband access. */ -static inline uint32_t bitband_addr(void * opaque, uint32_t addr) +static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset) { - uint32_t res; - - res = *(uint32_t *)opaque; - res |= (addr & 0x1ffffff) >> 5; - return res; - + return s->base | (offset & 0x1ffffff) >> 5; } -static uint32_t bitband_readb(void *opaque, hwaddr offset) +static MemTxResult bitband_read(void *opaque, hwaddr offset, + uint64_t *data, unsigned size, MemTxAttrs attrs) { - uint8_t v; - cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1); - return (v & (1 << ((offset >> 2) & 7))) != 0; + BitBandState *s = opaque; + uint8_t buf[4]; + MemTxResult res; + int bitpos, bit; + hwaddr addr; + + assert(size <= 4); + + /* Find address in underlying memory and round down to multiple of size */ + addr = bitband_addr(s, offset) & (-size); + res = address_space_read(s->source_as, addr, attrs, buf, size); + if (res) { + return res; + } + /* Bit position in the N bytes read... */ + bitpos = (offset >> 2) & ((size * 8) - 1); + /* ...converted to byte in buffer and bit in byte */ + bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1; + *data = bit; + return MEMTX_OK; } -static void bitband_writeb(void *opaque, hwaddr offset, - uint32_t value) +static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size, MemTxAttrs attrs) { - uint32_t addr; - uint8_t mask; - uint8_t v; - addr = bitband_addr(opaque, offset); - mask = (1 << ((offset >> 2) & 7)); - cpu_physical_memory_read(addr, &v, 1); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 1); + BitBandState *s = opaque; + uint8_t buf[4]; + MemTxResult res; + int bitpos, bit; + hwaddr addr; + + assert(size <= 4); + + /* Find address in underlying memory and round down to multiple of size */ + addr = bitband_addr(s, offset) & (-size); + res = address_space_read(s->source_as, addr, attrs, buf, size); + if (res) { + return res; + } + /* Bit position in the N bytes read... */ + bitpos = (offset >> 2) & ((size * 8) - 1); + /* ...converted to byte in buffer and bit in byte */ + bit = 1 << (bitpos & 7); + if (value & 1) { + buf[bitpos >> 3] |= bit; + } else { + buf[bitpos >> 3] &= ~bit; + } + return address_space_write(s->source_as, addr, attrs, buf, size); } -static uint32_t bitband_readw(void *opaque, hwaddr offset) +static const MemoryRegionOps bitband_ops = { + .read_with_attrs = bitband_read, + .write_with_attrs = bitband_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl.min_access_size = 1, + .impl.max_access_size = 4, + .valid.min_access_size = 1, + .valid.max_access_size = 4, +}; + +static void bitband_init(Object *obj) { - uint32_t addr; - uint16_t mask; - uint16_t v; - addr = bitband_addr(opaque, offset) & ~1; - mask = (1 << ((offset >> 2) & 15)); - mask = tswap16(mask); - cpu_physical_memory_read(addr, &v, 2); - return (v & mask) != 0; + BitBandState *s = BITBAND(obj); + SysBusDevice *dev = SYS_BUS_DEVICE(obj); + + object_property_add_link(obj, "source-memory", + TYPE_MEMORY_REGION, + (Object **)&s->source_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + memory_region_init_io(&s->iomem, obj, &bitband_ops, s, + "bitband", 0x02000000); + sysbus_init_mmio(dev, &s->iomem); } -static void bitband_writew(void *opaque, hwaddr offset, - uint32_t value) +static void bitband_realize(DeviceState *dev, Error **errp) { - uint32_t addr; - uint16_t mask; - uint16_t v; - addr = bitband_addr(opaque, offset) & ~1; - mask = (1 << ((offset >> 2) & 15)); - mask = tswap16(mask); - cpu_physical_memory_read(addr, &v, 2); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 2); + BitBandState *s = BITBAND(dev); + + if (!s->source_memory) { + error_setg(errp, "source-memory property not set"); + return; + } + + s->source_as = address_space_init_shareable(s->source_memory, + "bitband-source"); } -static uint32_t bitband_readl(void *opaque, hwaddr offset) +/* Board init. */ + +static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = { + 0x20000000, 0x40000000 +}; + +static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = { + 0x22000000, 0x42000000 +}; + +static void armv7m_instance_init(Object *obj) { - uint32_t addr; - uint32_t mask; - uint32_t v; - addr = bitband_addr(opaque, offset) & ~3; - mask = (1 << ((offset >> 2) & 31)); - mask = tswap32(mask); - cpu_physical_memory_read(addr, &v, 4); - return (v & mask) != 0; + ARMv7MState *s = ARMV7M(obj); + int i; + + /* Can't init the cpu here, we don't yet know which model to use */ + + object_property_add_link(obj, "memory", + TYPE_MEMORY_REGION, + (Object **)&s->board_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX); + + object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic"); + qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default()); + object_property_add_alias(obj, "num-irq", + OBJECT(&s->nvic), "num-irq", &error_abort); + + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { + object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND); + qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default()); + } } -static void bitband_writel(void *opaque, hwaddr offset, - uint32_t value) +static void armv7m_realize(DeviceState *dev, Error **errp) { - uint32_t addr; - uint32_t mask; - uint32_t v; - addr = bitband_addr(opaque, offset) & ~3; - mask = (1 << ((offset >> 2) & 31)); - mask = tswap32(mask); - cpu_physical_memory_read(addr, &v, 4); - if (value & 1) - v |= mask; - else - v &= ~mask; - cpu_physical_memory_write(addr, &v, 4); -} + ARMv7MState *s = ARMV7M(dev); + SysBusDevice *sbd; + Error *err = NULL; + int i; + char **cpustr; + ObjectClass *oc; + const char *typename; + CPUClass *cc; + + if (!s->board_memory) { + error_setg(errp, "memory property was not set"); + return; + } -static const MemoryRegionOps bitband_ops = { - .old_mmio = { - .read = { bitband_readb, bitband_readw, bitband_readl, }, - .write = { bitband_writeb, bitband_writew, bitband_writel, }, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; + memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1); -#define TYPE_BITBAND "ARM,bitband-memory" -#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND) + cpustr = g_strsplit(s->cpu_model, ",", 2); -typedef struct { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ + oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]); + if (!oc) { + error_setg(errp, "Unknown CPU model %s", cpustr[0]); + g_strfreev(cpustr); + return; + } - MemoryRegion iomem; - uint32_t base; -} BitBandState; + cc = CPU_CLASS(oc); + typename = object_class_get_name(oc); + cc->parse_features(typename, cpustr[1], &err); + g_strfreev(cpustr); + if (err) { + error_propagate(errp, err); + return; + } -static void bitband_init(Object *obj) -{ - BitBandState *s = BITBAND(obj); - SysBusDevice *dev = SYS_BUS_DEVICE(obj); + s->cpu = ARM_CPU(object_new(typename)); + if (!s->cpu) { + error_setg(errp, "Unknown CPU model %s", s->cpu_model); + return; + } - memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base, - "bitband", 0x02000000); - sysbus_init_mmio(dev, &s->iomem); + object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory", + &error_abort); + object_property_set_bool(OBJECT(s->cpu), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + /* Note that we must realize the NVIC after the CPU */ + object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + /* Alias the NVIC's input and output GPIOs as our own so the board + * code can wire them up. (We do this in realize because the + * NVIC doesn't create the input GPIO array until realize.) + */ + qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL); + qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ"); + + /* Wire the NVIC up to the CPU */ + sbd = SYS_BUS_DEVICE(&s->nvic); + sysbus_connect_irq(sbd, 0, + qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ)); + s->cpu->env.nvic = &s->nvic; + + memory_region_add_subregion(&s->container, 0xe000e000, + sysbus_mmio_get_region(sbd, 0)); + + for (i = 0; i < ARRAY_SIZE(s->bitband); i++) { + Object *obj = OBJECT(&s->bitband[i]); + SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]); + + object_property_set_int(obj, bitband_input_addr[i], "base", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + object_property_set_link(obj, OBJECT(s->board_memory), + "source-memory", &error_abort); + object_property_set_bool(obj, true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->container, bitband_output_addr[i], + sysbus_mmio_get_region(sbd, 0)); + } } -static void armv7m_bitband_init(void) -{ - DeviceState *dev; +static Property armv7m_properties[] = { + DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model), + DEFINE_PROP_END_OF_LIST(), +}; - dev = qdev_create(NULL, TYPE_BITBAND); - qdev_prop_set_uint32(dev, "base", 0x20000000); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000); +static void armv7m_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); - dev = qdev_create(NULL, TYPE_BITBAND); - qdev_prop_set_uint32(dev, "base", 0x40000000); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000); + dc->realize = armv7m_realize; + dc->props = armv7m_properties; } -/* Board init. */ +static const TypeInfo armv7m_info = { + .name = TYPE_ARMV7M, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(ARMv7MState), + .instance_init = armv7m_instance_init, + .class_init = armv7m_class_init, +}; static void armv7m_reset(void *opaque) { @@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque) /* Init CPU and memory for a v7-M based board. mem_size is in bytes. - Returns the NVIC array. */ + Returns the ARMv7M device. */ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model) { - ARMCPU *cpu; - CPUARMState *env; - DeviceState *nvic; - int image_size; - uint64_t entry; - uint64_t lowaddr; - int big_endian; + DeviceState *armv7m; if (cpu_model == NULL) { - cpu_model = "cortex-m3"; - } - cpu = cpu_arm_init(cpu_model); - if (cpu == NULL) { - fprintf(stderr, "Unable to find CPU definition\n"); - exit(1); + cpu_model = "cortex-m3"; } - env = &cpu->env; - armv7m_bitband_init(); + armv7m = qdev_create(NULL, "armv7m"); + qdev_prop_set_uint32(armv7m, "num-irq", num_irq); + qdev_prop_set_string(armv7m, "cpu-model", cpu_model); + object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()), + "memory", &error_abort); + /* This will exit with an error if the user passed us a bad cpu_model */ + qdev_init_nofail(armv7m); + + armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size); + return armv7m; +} - nvic = qdev_create(NULL, "armv7m_nvic"); - qdev_prop_set_uint32(nvic, "num-irq", num_irq); - env->nvic = nvic; - qdev_init_nofail(nvic); - sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0, - qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ)); +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size) +{ + int image_size; + uint64_t entry; + uint64_t lowaddr; + int big_endian; #ifdef TARGET_WORDS_BIGENDIAN big_endian = 1; @@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, } } + /* CPU objects (unlike devices) are not automatically reset on system + * reset, so we must always register a handler to do so. Unlike + * A-profile CPUs, we don't need to do anything special in the + * handler to arrange that it starts correctly. + * This is arguably the wrong place to do this, but it matches the + * way A-profile does it. Note that this means that every M profile + * board must call this function! + */ qemu_register_reset(armv7m_reset, cpu); - return nvic; } static Property bitband_properties[] = { @@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = bitband_realize; dc->props = bitband_properties; } @@ -251,6 +371,7 @@ static const TypeInfo bitband_info = { static void armv7m_register_types(void) { type_register_static(&bitband_info); + type_register_static(&armv7m_info); } type_init(armv7m_register_types) diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index 2e641a3989..369ef1e3bd 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -86,11 +86,21 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_const_link(OBJECT(&s->property), "dma-mr", OBJECT(&s->gpu_bus_mr), &error_abort); + /* Random Number Generator */ + object_initialize(&s->rng, sizeof(s->rng), TYPE_BCM2835_RNG); + object_property_add_child(obj, "rng", OBJECT(&s->rng), NULL); + qdev_set_parent_bus(DEVICE(&s->rng), sysbus_get_default()); + /* Extended Mass Media Controller */ object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI); object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL); qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default()); + /* SDHOST */ + object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST); + object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL); + qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default()); + /* DMA Channels */ object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA); object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL); @@ -98,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj) object_property_add_const_link(OBJECT(&s->dma), "dma-mr", OBJECT(&s->gpu_bus_mr), &error_abort); + + /* GPIO */ + object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO); + object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL); + qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default()); + + object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci", + OBJECT(&s->sdhci.sdbus), &error_abort); + object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost", + OBJECT(&s->sdhost.sdbus), &error_abort); } static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) @@ -226,6 +246,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->property), 0, qdev_get_gpio_in(DEVICE(&s->mboxes), MBOX_CHAN_PROPERTY)); + /* Random Number Generator */ + object_property_set_bool(OBJECT(&s->rng), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, RNG_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rng), 0)); + /* Extended Mass Media Controller */ object_property_set_int(OBJECT(&s->sdhci), BCM2835_SDHC_CAPAREG, "capareg", &err); @@ -252,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0, qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_ARASANSDIO)); - object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus", - &err); + + /* SDHOST */ + object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err); if (err) { error_propagate(errp, err); return; } + memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0, + qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, + INTERRUPT_SDIO)); + /* DMA Channels */ object_property_set_bool(OBJECT(&s->dma), true, "realized", &err); if (err) { @@ -277,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) BCM2835_IC_GPU_IRQ, INTERRUPT_DMA0 + n)); } + + /* GPIO */ + object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET, + sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0)); + + object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus", + &err); + if (err) { + error_propagate(errp, err); + return; + } } static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data) diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c index be3c96d21e..1d2b50cc4e 100644 --- a/hw/arm/exynos4210.c +++ b/hw/arm/exynos4210.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" +#include "qemu/log.h" #include "cpu.h" #include "hw/boards.h" #include "sysemu/sysemu.h" @@ -74,6 +75,9 @@ /* PMU SFR base address */ #define EXYNOS4210_PMU_BASE_ADDR 0x10020000 +/* Clock controller SFR base address */ +#define EXYNOS4210_CLK_BASE_ADDR 0x10030000 + /* Display controllers (FIMD) */ #define EXYNOS4210_FIMD0_BASE_ADDR 0x11C00000 @@ -138,6 +142,16 @@ void exynos4210_write_secondary(ARMCPU *cpu, info->smp_loader_start); } +static uint64_t exynos4210_calc_affinity(int cpu) +{ + uint64_t mp_affinity; + + /* Exynos4210 has 0x9 as cluster ID */ + mp_affinity = (0x9 << ARM_AFF1_SHIFT) | cpu; + + return mp_affinity; +} + Exynos4210State *exynos4210_init(MemoryRegion *system_mem, unsigned long ram_size) { @@ -163,6 +177,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem, } s->cpu[n] = ARM_CPU(cpuobj); + object_property_set_int(cpuobj, exynos4210_calc_affinity(n), + "mp-affinity", &error_abort); object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR, "reset-cbar", &error_abort); object_property_set_bool(cpuobj, true, "realized", &error_fatal); @@ -297,6 +313,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem, */ sysbus_create_simple("exynos4210.pmu", EXYNOS4210_PMU_BASE_ADDR, NULL); + sysbus_create_simple("exynos4210.clk", EXYNOS4210_CLK_BASE_ADDR, NULL); + /* PWM */ sysbus_create_varargs("exynos4210.pwm", EXYNOS4210_PWM_BASE_ADDR, s->irq_table[exynos4210_get_irq(22, 0)], diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c index 23d792837f..3cfe332dd1 100644 --- a/hw/arm/netduino2.c +++ b/hw/arm/netduino2.c @@ -27,17 +27,18 @@ #include "hw/boards.h" #include "qemu/error-report.h" #include "hw/arm/stm32f205_soc.h" +#include "hw/arm/arm.h" static void netduino2_init(MachineState *machine) { DeviceState *dev; dev = qdev_create(NULL, TYPE_STM32F205_SOC); - if (machine->kernel_filename) { - qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename); - } qdev_prop_set_string(dev, "cpu-model", "cortex-m3"); object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal); + + armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, + FLASH_SIZE); } static void netduino2_machine_init(MachineClass *mc) diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c index 38425bda6c..6e1260d2ed 100644 --- a/hw/arm/stm32f205_soc.c +++ b/hw/arm/stm32f205_soc.c @@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj) STM32F205State *s = STM32F205_SOC(obj); int i; + object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M); + qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default()); + object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG); qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default()); @@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj) static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) { STM32F205State *s = STM32F205_SOC(dev_soc); - DeviceState *dev, *nvic; + DeviceState *dev, *armv7m; SysBusDevice *busdev; Error *err = NULL; int i; @@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) vmstate_register_ram_global(sram); memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram); - nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96, - s->kernel_filename, s->cpu_model); + armv7m = DEVICE(&s->armv7m); + qdev_prop_set_uint32(armv7m, "num-irq", 96); + qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model); + object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()), + "memory", &error_abort); + object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } /* System configuration controller */ dev = DEVICE(&s->syscfg); @@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, 0x40013800); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71)); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71)); /* Attach UART (uses USART registers) and USART controllers */ for (i = 0; i < STM_NUM_USARTS; i++) { @@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, usart_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i])); } /* Timer 2 to 5 */ @@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, timer_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i])); } /* ADC 1 to 3 */ @@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) return; } qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0, - qdev_get_gpio_in(nvic, ADC_IRQ)); + qdev_get_gpio_in(armv7m, ADC_IRQ)); for (i = 0; i < STM_NUM_ADCS; i++) { dev = DEVICE(&(s->adc[i])); @@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp) } busdev = SYS_BUS_DEVICE(dev); sysbus_mmio_map(busdev, 0, spi_addr[i]); - sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i])); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i])); } } static Property stm32f205_soc_properties[] = { - DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename), DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index f3440f2ccb..5f62a0321e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -535,7 +535,6 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic) static void create_gic(VirtMachineState *vms, qemu_irq *pic) { /* We create a standalone GIC */ - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); DeviceState *gicdev; SysBusDevice *gicbusdev; const char *gictype; @@ -605,7 +604,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) fdt_add_gic_node(vms); - if (type == 3 && !vmc->no_its) { + if (type == 3 && vms->its) { create_its(vms, gicdev); } else if (type == 2) { create_v2m(vms, pic); @@ -1378,6 +1377,7 @@ static void machvirt_init(MachineState *machine) } object_property_set_bool(cpuobj, true, "realized", NULL); + object_unref(cpuobj); } fdt_add_timer_nodes(vms); fdt_add_cpu_nodes(vms); @@ -1480,6 +1480,20 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } +static bool virt_get_its(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->its; +} + +static void virt_set_its(Object *obj, bool value, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->its = value; +} + static char *virt_get_gic_version(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); @@ -1540,6 +1554,7 @@ type_init(machvirt_machine_init); static void virt_2_9_instance_init(Object *obj) { VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); /* EL3 is disabled by default on virt: this makes us consistent * between KVM and TCG for this board, and it also allows us to @@ -1579,6 +1594,19 @@ static void virt_2_9_instance_init(Object *obj) "Set GIC version. " "Valid values are 2, 3 and host", NULL); + if (vmc->no_its) { + vms->its = false; + } else { + /* Default allows ITS instantiation */ + vms->its = true; + object_property_add_bool(obj, "its", virt_get_its, + virt_set_its, NULL); + object_property_set_description(obj, "its", + "Set on/off to enable/disable " + "ITS instantiation", + NULL); + } + vms->memmap = a15memmap; vms->irqmap = a15irqmap; } diff --git a/hw/block/block.c b/hw/block/block.c index 8dc9d84a39..27878d0087 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf) } } -void blkconf_apply_backend_options(BlockConf *conf) +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp) { BlockBackend *blk = conf->blk; BlockdevOnError rerror, werror; + uint64_t perm, shared_perm; bool wce; + int ret; + + perm = BLK_PERM_CONSISTENT_READ; + if (!readonly) { + perm |= BLK_PERM_WRITE; + } + + shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_GRAPH_MOD; + if (resizable) { + shared_perm |= BLK_PERM_RESIZE; + } + if (conf->share_rw) { + shared_perm |= BLK_PERM_WRITE; + } + + ret = blk_set_perm(blk, perm, shared_perm, errp); + if (ret < 0) { + return; + } switch (conf->wce) { case ON_OFF_AUTO_ON: wce = true; break; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 17d29e7bc5..a328693d15 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -186,6 +186,7 @@ typedef enum FDiskFlags { struct FDrive { FDCtrl *fdctrl; BlockBackend *blk; + BlockConf *conf; /* Drive status */ FloppyDriveType drive; /* CMOS drive type */ uint8_t perpendicular; /* 2.88 MB access mode */ @@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv) } } -static void fd_change_cb(void *opaque, bool load) +static void fd_change_cb(void *opaque, bool load, Error **errp) { FDrive *drive = opaque; + Error *local_err = NULL; + + if (!load) { + blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort); + } else { + blkconf_apply_backend_options(drive->conf, + blk_is_read_only(drive->blk), false, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } drive->media_changed = 1; drive->media_validated = false; @@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev) FloppyDrive *dev = FLOPPY_DRIVE(qdev); FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus); FDrive *drive; + Error *local_err = NULL; int ret; if (dev->unit == -1) { @@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); } @@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev) * blkconf_apply_backend_options(). */ dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO; dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO; - blkconf_apply_backend_options(&dev->conf); + + blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us * for empty drives. */ @@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev) return -1; } + drive->conf = &dev->conf; drive->blk = dev->conf.blk; drive->fdctrl = bus->fdc; diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 2d6eb46a04..190573cefa 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp) { Flash *s = M25P80(ss); M25P80Class *mc = M25P80_GET_CLASS(s); + int ret; s->pi = mc->pi; @@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp) s->dirty_page = -1; if (s->blk) { + uint64_t perm = BLK_PERM_CONSISTENT_READ | + (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + DB_PRINT_L(0, "Binding to IF_MTD drive\n"); s->storage = blk_blockalign(s->blk, s->size); diff --git a/hw/block/nand.c b/hw/block/nand.c index c69e6755d9..0d33ac281f 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp) { int pagesize; NANDFlashState *s = NAND(dev); + int ret; + s->buswidth = nand_flash_ids[s->chip_id].width >> 3; s->size = nand_flash_ids[s->chip_id].size << 20; @@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp) error_setg(errp, "Can't use a read-only drive"); return; } + ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } if (blk_getlength(s->blk) >= (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { pagesize = 0; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ae91a18f17..ae303d44e5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev) int i; int64_t bs_size; uint8_t *pci_conf; + Error *local_err = NULL; if (!n->conf.blk) { return -1; @@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev) return -1; } blkconf_blocksizes(&n->conf); - blkconf_apply_backend_options(&n->conf); + blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), + false, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } pci_conf = pci_dev->config; pci_conf[PCI_INTERRUPT_PIN] = 1; diff --git a/hw/block/onenand.c b/hw/block/onenand.c index 8d8422739e..ddf5492426 100644 --- a/hw/block/onenand.c +++ b/hw/block/onenand.c @@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd) OneNANDState *s = ONE_NAND(dev); uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7)); void *ram; + Error *local_err = NULL; s->base = (hwaddr)-1; s->rdy = NULL; @@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd) error_report("Can't use a read-only drive"); return -1; } + blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } s->blk_cur = s->blk; } s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT), diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 71b98a3eef..594d4cf6fe 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, total_len); @@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) } } - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - /* Default to devices being used at their maximum device width. This was * assumed before the device_width support was added. */ diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index ef71322759..e6c5c6c25d 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); pfl->chip_len = chip_len; + + if (pfl->blk) { + uint64_t perm; + pfl->ro = blk_is_read_only(pfl->blk); + perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE); + ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + } else { + pfl->ro = 0; + } + if (pfl->blk) { /* read the initial flash content */ ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len); @@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) pfl->rom_mode = 1; sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); - if (pfl->blk) { - pfl->ro = blk_is_read_only(pfl->blk); - } else { - pfl->ro = 0; - } - pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); pfl->wcycle = 0; pfl->cmd = 0; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 843bd2fa73..98c16a7a9a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) } blkconf_serial(&conf->conf, &conf->serial); - blkconf_apply_backend_options(&conf->conf); + blkconf_apply_backend_options(&conf->conf, + blk_is_read_only(conf->conf.blk), true, + &err); + if (err) { + error_propagate(errp, err); + return; + } s->original_wce = blk_enable_write_cache(conf->conf.blk); blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); if (err) { diff --git a/hw/core/loader.c b/hw/core/loader.c index 8b980e91fb..bf17b42cbe 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -435,6 +435,19 @@ int load_elf_as(const char *filename, uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb, int data_swab, AddressSpace *as) { + return load_elf_ram(filename, translate_fn, translate_opaque, + pentry, lowaddr, highaddr, big_endian, elf_machine, + clear_lsb, data_swab, as, true); +} + +/* return < 0 if error, otherwise the number of bytes loaded in memory */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom) +{ int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED; uint8_t e_ident[EI_NIDENT]; @@ -473,11 +486,11 @@ int load_elf_as(const char *filename, if (e_ident[EI_CLASS] == ELFCLASS64) { ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } else { ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb, - data_swab, as); + data_swab, as, load_rom); } fail: diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c index 1ac090d1a4..1485d5b285 100644 --- a/hw/core/or-irq.c +++ b/hw/core/or-irq.c @@ -89,6 +89,9 @@ static void or_irq_class_init(ObjectClass *klass, void *data) dc->props = or_irq_properties; dc->realize = or_irq_realize; dc->vmsd = &vmstate_or_irq; + + /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */ + dc->cannot_instantiate_with_device_add_yet = true; } static const TypeInfo or_irq_type_info = { diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 94f4d8bde4..c34be1c1ba 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, { BlockBackend *blk; bool blk_created = false; + int ret; blk = blk_by_name(str); if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - blk = blk_new(); - blk_insert_bs(blk, bs); + blk = blk_new(0, BLK_PERM_ALL); blk_created = true; + + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } } } if (!blk) { diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 06ba02e2a3..923e626333 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -102,9 +102,23 @@ static void bus_add_child(BusState *bus, DeviceState *child) void qdev_set_parent_bus(DeviceState *dev, BusState *bus) { + bool replugging = dev->parent_bus != NULL; + + if (replugging) { + /* Keep a reference to the device while it's not plugged into + * any bus, to avoid it potentially evaporating when it is + * dereffed in bus_remove_child(). + */ + object_ref(OBJECT(dev)); + bus_remove_child(dev->parent_bus, dev); + object_unref(OBJECT(dev->parent_bus)); + } dev->parent_bus = bus; object_ref(OBJECT(bus)); bus_add_child(bus, dev); + if (replugging) { + object_unref(OBJECT(dev)); + } } /* Create a new device. This only initializes the device state diff --git a/hw/core/register.c b/hw/core/register.c index 4bfbc508de..dc335a79a9 100644 --- a/hw/core/register.c +++ b/hw/core/register.c @@ -59,6 +59,15 @@ static inline uint64_t register_read_val(RegisterInfo *reg) return 0; /* unreachable */ } +static inline uint64_t register_enabled_mask(int data_size, unsigned size) +{ + if (data_size < size) { + size = data_size; + } + + return MAKE_64BIT_MASK(0, size * 8); +} + void register_write(RegisterInfo *reg, uint64_t val, uint64_t we, const char *prefix, bool debug) { @@ -192,11 +201,7 @@ void register_write_memory(void *opaque, hwaddr addr, } /* Generate appropriate write enable mask */ - if (reg->data_size < size) { - we = MAKE_64BIT_MASK(0, reg->data_size * 8); - } else { - we = MAKE_64BIT_MASK(0, size * 8); - } + we = register_enabled_mask(reg->data_size, size); register_write(reg, value, we, reg_array->prefix, reg_array->debug); @@ -208,6 +213,7 @@ uint64_t register_read_memory(void *opaque, hwaddr addr, RegisterInfoArray *reg_array = opaque; RegisterInfo *reg = NULL; uint64_t read_val; + uint64_t re; int i; for (i = 0; i < reg_array->num_elements; i++) { @@ -223,7 +229,10 @@ uint64_t register_read_memory(void *opaque, hwaddr addr, return 0; } - read_val = register_read(reg, size * 8, reg_array->prefix, + /* Generate appropriate read enable mask */ + re = register_enabled_mask(reg->data_size, size); + + read_val = register_read(reg, re, reg_array->prefix, reg_array->debug); return extract64(read_val, 0, size * 8); @@ -274,9 +283,18 @@ void register_finalize_block(RegisterInfoArray *r_array) g_free(r_array); } +static void register_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + /* Reason: needs to be wired up to work */ + dc->cannot_instantiate_with_device_add_yet = true; +} + static const TypeInfo register_info = { .name = TYPE_REGISTER, .parent = TYPE_DEVICE, + .class_init = register_class_init, }; static void register_register_types(void) diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c index 7528665510..59120ddb67 100644 --- a/hw/display/milkymist-tmu2.c +++ b/hw/display/milkymist-tmu2.c @@ -293,7 +293,7 @@ static void tmu2_start(MilkymistTMU2State *s) cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len); /* Write back the OpenGL framebuffer to the QEMU framebuffer */ - fb_len = 2 * s->regs[R_DSTHRES] * s->regs[R_DSTVRES]; + fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES]; fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, 1); if (fb == NULL) { glDeleteTextures(1, &texture); diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs index a43c7cf442..fa0a72e6d0 100644 --- a/hw/gpio/Makefile.objs +++ b/hw/gpio/Makefile.objs @@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o obj-$(CONFIG_OMAP) += omap_gpio.o obj-$(CONFIG_IMX) += imx_gpio.o +obj-$(CONFIG_RASPI) += bcm2835_gpio.o diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c new file mode 100644 index 0000000000..acc2e3cf9e --- /dev/null +++ b/hw/gpio/bcm2835_gpio.c @@ -0,0 +1,353 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/timer.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/sd/sd.h" +#include "hw/gpio/bcm2835_gpio.h" + +#define GPFSEL0 0x00 +#define GPFSEL1 0x04 +#define GPFSEL2 0x08 +#define GPFSEL3 0x0C +#define GPFSEL4 0x10 +#define GPFSEL5 0x14 +#define GPSET0 0x1C +#define GPSET1 0x20 +#define GPCLR0 0x28 +#define GPCLR1 0x2C +#define GPLEV0 0x34 +#define GPLEV1 0x38 +#define GPEDS0 0x40 +#define GPEDS1 0x44 +#define GPREN0 0x4C +#define GPREN1 0x50 +#define GPFEN0 0x58 +#define GPFEN1 0x5C +#define GPHEN0 0x64 +#define GPHEN1 0x68 +#define GPLEN0 0x70 +#define GPLEN1 0x74 +#define GPAREN0 0x7C +#define GPAREN1 0x80 +#define GPAFEN0 0x88 +#define GPAFEN1 0x8C +#define GPPUD 0x94 +#define GPPUDCLK0 0x98 +#define GPPUDCLK1 0x9C + +static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg) +{ + int i; + uint32_t value = 0; + for (i = 0; i < 10; i++) { + uint32_t index = 10 * reg + i; + if (index < sizeof(s->fsel)) { + value |= (s->fsel[index] & 0x7) << (3 * i); + } + } + return value; +} + +static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value) +{ + int i; + for (i = 0; i < 10; i++) { + uint32_t index = 10 * reg + i; + if (index < sizeof(s->fsel)) { + int fsel = (value >> (3 * i)) & 0x7; + s->fsel[index] = fsel; + } + } + + /* SD controller selection (48-53) */ + if (s->sd_fsel != 0 + && (s->fsel[48] == 0) /* SD_CLK_R */ + && (s->fsel[49] == 0) /* SD_CMD_R */ + && (s->fsel[50] == 0) /* SD_DATA0_R */ + && (s->fsel[51] == 0) /* SD_DATA1_R */ + && (s->fsel[52] == 0) /* SD_DATA2_R */ + && (s->fsel[53] == 0) /* SD_DATA3_R */ + ) { + /* SDHCI controller selected */ + sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci); + s->sd_fsel = 0; + } else if (s->sd_fsel != 4 + && (s->fsel[48] == 4) /* SD_CLK_R */ + && (s->fsel[49] == 4) /* SD_CMD_R */ + && (s->fsel[50] == 4) /* SD_DATA0_R */ + && (s->fsel[51] == 4) /* SD_DATA1_R */ + && (s->fsel[52] == 4) /* SD_DATA2_R */ + && (s->fsel[53] == 4) /* SD_DATA3_R */ + ) { + /* SDHost controller selected */ + sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost); + s->sd_fsel = 4; + } +} + +static int gpfsel_is_out(BCM2835GpioState *s, int index) +{ + if (index >= 0 && index < 54) { + return s->fsel[index] == 1; + } + return 0; +} + +static void gpset(BCM2835GpioState *s, + uint32_t val, uint8_t start, uint8_t count, uint32_t *lev) +{ + uint32_t changes = val & ~*lev; + uint32_t cur = 1; + + int i; + for (i = 0; i < count; i++) { + if ((changes & cur) && (gpfsel_is_out(s, start + i))) { + qemu_set_irq(s->out[start + i], 1); + } + cur <<= 1; + } + + *lev |= val; +} + +static void gpclr(BCM2835GpioState *s, + uint32_t val, uint8_t start, uint8_t count, uint32_t *lev) +{ + uint32_t changes = val & *lev; + uint32_t cur = 1; + + int i; + for (i = 0; i < count; i++) { + if ((changes & cur) && (gpfsel_is_out(s, start + i))) { + qemu_set_irq(s->out[start + i], 0); + } + cur <<= 1; + } + + *lev &= ~val; +} + +static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835GpioState *s = (BCM2835GpioState *)opaque; + + switch (offset) { + case GPFSEL0: + case GPFSEL1: + case GPFSEL2: + case GPFSEL3: + case GPFSEL4: + case GPFSEL5: + return gpfsel_get(s, offset / 4); + case GPSET0: + case GPSET1: + /* Write Only */ + return 0; + case GPCLR0: + case GPCLR1: + /* Write Only */ + return 0; + case GPLEV0: + return s->lev0; + case GPLEV1: + return s->lev1; + case GPEDS0: + case GPEDS1: + case GPREN0: + case GPREN1: + case GPFEN0: + case GPFEN1: + case GPHEN0: + case GPHEN1: + case GPLEN0: + case GPLEN1: + case GPAREN0: + case GPAREN1: + case GPAFEN0: + case GPAFEN1: + case GPPUD: + case GPPUDCLK0: + case GPPUDCLK1: + /* Not implemented */ + return 0; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); + break; + } + + return 0; +} + +static void bcm2835_gpio_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835GpioState *s = (BCM2835GpioState *)opaque; + + switch (offset) { + case GPFSEL0: + case GPFSEL1: + case GPFSEL2: + case GPFSEL3: + case GPFSEL4: + case GPFSEL5: + gpfsel_set(s, offset / 4, value); + break; + case GPSET0: + gpset(s, value, 0, 32, &s->lev0); + break; + case GPSET1: + gpset(s, value, 32, 22, &s->lev1); + break; + case GPCLR0: + gpclr(s, value, 0, 32, &s->lev0); + break; + case GPCLR1: + gpclr(s, value, 32, 22, &s->lev1); + break; + case GPLEV0: + case GPLEV1: + /* Read Only */ + break; + case GPEDS0: + case GPEDS1: + case GPREN0: + case GPREN1: + case GPFEN0: + case GPFEN1: + case GPHEN0: + case GPHEN1: + case GPLEN0: + case GPLEN1: + case GPAREN0: + case GPAREN1: + case GPAFEN0: + case GPAFEN1: + case GPPUD: + case GPPUDCLK0: + case GPPUDCLK1: + /* Not implemented */ + break; + default: + goto err_out; + } + return; + +err_out: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); +} + +static void bcm2835_gpio_reset(DeviceState *dev) +{ + BCM2835GpioState *s = BCM2835_GPIO(dev); + + int i; + for (i = 0; i < 6; i++) { + gpfsel_set(s, i, 0); + } + + s->sd_fsel = 0; + + /* SDHCI is selected by default */ + sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci); + + s->lev0 = 0; + s->lev1 = 0; +} + +static const MemoryRegionOps bcm2835_gpio_ops = { + .read = bcm2835_gpio_read, + .write = bcm2835_gpio_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_bcm2835_gpio = { + .name = "bcm2835_gpio", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54), + VMSTATE_UINT32(lev0, BCM2835GpioState), + VMSTATE_UINT32(lev1, BCM2835GpioState), + VMSTATE_UINT8(sd_fsel, BCM2835GpioState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_gpio_init(Object *obj) +{ + BCM2835GpioState *s = BCM2835_GPIO(obj); + DeviceState *dev = DEVICE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), + TYPE_SD_BUS, DEVICE(s), "sd-bus"); + + memory_region_init_io(&s->iomem, obj, + &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + qdev_init_gpio_out(dev, s->out, 54); +} + +static void bcm2835_gpio_realize(DeviceState *dev, Error **errp) +{ + BCM2835GpioState *s = BCM2835_GPIO(dev); + Object *obj; + Error *err = NULL; + + obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err); + if (obj == NULL) { + error_setg(errp, "%s: required sdhci link not found: %s", + __func__, error_get_pretty(err)); + return; + } + s->sdbus_sdhci = SD_BUS(obj); + + obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err); + if (obj == NULL) { + error_setg(errp, "%s: required sdhost link not found: %s", + __func__, error_get_pretty(err)); + return; + } + s->sdbus_sdhost = SD_BUS(obj); +} + +static void bcm2835_gpio_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_bcm2835_gpio; + dc->realize = &bcm2835_gpio_realize; + dc->reset = &bcm2835_gpio_reset; +} + +static const TypeInfo bcm2835_gpio_info = { + .name = TYPE_BCM2835_GPIO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835GpioState), + .instance_init = bcm2835_gpio_init, + .class_init = bcm2835_gpio_class_init, +}; + +static void bcm2835_gpio_register_types(void) +{ + type_register_static(&bcm2835_gpio_info); +} + +type_init(bcm2835_gpio_register_types) diff --git a/hw/ide/core.c b/hw/ide/core.c index cfa5de6ebf..db509b3e15 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s) } /* called when the inserted state of the media has changed */ -static void ide_cd_change_cb(void *opaque, bool load) +static void ide_cd_change_cb(void *opaque, bool load, Error **errp) { IDEState *s = opaque; uint64_t nb_sectors; diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index dbaa75cf59..4383cd111d 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } else { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } } @@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD, + &err); + if (err) { + error_report_err(err); + return -1; + } if (ide_init_drive(s, dev->conf.blk, kind, dev->version, dev->serial, dev->model, dev->wwn, diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index 8948106ac4..adedd0da5f 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o -obj-$(CONFIG_STELLARIS) += armv7m_nvic.o +obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o obj-$(CONFIG_GRLIB) += grlib_irqmp.o obj-$(CONFIG_IOAPIC) += ioapic.o diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index 521aac3cc6..8e5a9d8a3e 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -156,17 +156,6 @@ static void gic_set_irq_11mpcore(GICState *s, int irq, int level, } } -static void gic_set_irq_nvic(GICState *s, int irq, int level, - int cm, int target) -{ - if (level) { - GIC_SET_LEVEL(irq, cm); - GIC_SET_PENDING(irq, target); - } else { - GIC_CLEAR_LEVEL(irq, cm); - } -} - static void gic_set_irq_generic(GICState *s, int irq, int level, int cm, int target) { @@ -214,8 +203,6 @@ static void gic_set_irq(void *opaque, int irq, int level) if (s->revision == REV_11MPCORE) { gic_set_irq_11mpcore(s, irq, level, cm, target); - } else if (s->revision == REV_NVIC) { - gic_set_irq_nvic(s, irq, level, cm, target); } else { gic_set_irq_generic(s, irq, level, cm, target); } @@ -367,7 +354,7 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs) return 1023; } - if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) { + if (s->revision == REV_11MPCORE) { /* Clear pending flags for both level and edge triggered interrupts. * Level triggered IRQs will be reasserted once they become inactive. */ @@ -589,11 +576,6 @@ void gic_complete_irq(GICState *s, int cpu, int irq, MemTxAttrs attrs) DPRINTF("Set %d pending mask %x\n", irq, cm); GIC_SET_PENDING(irq, cm); } - } else if (s->revision == REV_NVIC) { - if (GIC_TEST_LEVEL(irq, cm)) { - DPRINTF("Set nvic %d pending mask %x\n", irq, cm); - GIC_SET_PENDING(irq, cm); - } } group = gic_has_groups(s) && GIC_TEST_GROUP(irq, cm); @@ -768,7 +750,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs) } else if (offset < 0xf10) { goto bad_reg; } else if (offset < 0xf30) { - if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) { + if (s->revision == REV_11MPCORE) { goto bad_reg; } @@ -802,9 +784,6 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs) case 2: res = gic_id_gicv2[(offset - 0xfd0) >> 2]; break; - case REV_NVIC: - /* Shouldn't be able to get here */ - abort(); default: res = 0; } @@ -1028,7 +1007,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, continue; /* Ignore Non-secure access of Group0 IRQ */ } - if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) { + if (s->revision == REV_11MPCORE) { if (value & (1 << (i * 2))) { GIC_SET_MODEL(irq + i); } else { @@ -1046,7 +1025,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, goto bad_reg; } else if (offset < 0xf20) { /* GICD_CPENDSGIRn */ - if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) { + if (s->revision == REV_11MPCORE) { goto bad_reg; } irq = (offset - 0xf10); @@ -1060,7 +1039,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, } } else if (offset < 0xf30) { /* GICD_SPENDSGIRn */ - if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) { + if (s->revision == REV_11MPCORE) { goto bad_reg; } irq = (offset - 0xf20); diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c index 4a8df44fb1..70f1134823 100644 --- a/hw/intc/arm_gic_common.c +++ b/hw/intc/arm_gic_common.c @@ -99,9 +99,7 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler, * [N+32..N+63] PPIs for CPU 1 * ... */ - if (s->revision != REV_NVIC) { - i += (GIC_INTERNAL * s->num_cpu); - } + i += (GIC_INTERNAL * s->num_cpu); qdev_init_gpio_in(DEVICE(s), handler, i); for (i = 0; i < s->num_cpu; i++) { @@ -121,16 +119,12 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler, memory_region_init_io(&s->iomem, OBJECT(s), ops, s, "gic_dist", 0x1000); sysbus_init_mmio(sbd, &s->iomem); - if (s->revision != REV_NVIC) { - /* This is the main CPU interface "for this core". It is always - * present because it is required by both software emulation and KVM. - * NVIC is not handled here because its CPU interface is different, - * neither it can use KVM. - */ - memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL, - s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100); - sysbus_init_mmio(sbd, &s->cpuiomem[0]); - } + /* This is the main CPU interface "for this core". It is always + * present because it is required by both software emulation and KVM. + */ + memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL, + s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100); + sysbus_init_mmio(sbd, &s->cpuiomem[0]); } static void arm_gic_common_realize(DeviceState *dev, Error **errp) @@ -162,7 +156,7 @@ static void arm_gic_common_realize(DeviceState *dev, Error **errp) } if (s->security_extn && - (s->revision == REV_11MPCORE || s->revision == REV_NVIC)) { + (s->revision == REV_11MPCORE)) { error_setg(errp, "this GIC revision does not implement " "the security extensions"); return; @@ -255,7 +249,6 @@ static Property arm_gic_common_properties[] = { DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32), /* Revision can be 1 or 2 for GIC architecture specification * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC. - * (Internally, 0xffffffff also indicates "not a GIC but an NVIC".) */ DEFINE_PROP_UINT32("revision", GICState, revision, 1), /* True if the GIC should implement the security extensions */ diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 16b9b0f7eb..c6493d6c07 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = { } }; +static int icc_sre_el1_reg_pre_load(void *opaque) +{ + GICv3CPUState *cs = opaque; + + /* + * If the sre_el1 subsection is not transferred this + * means SRE_EL1 is 0x7 (which might not be the same as + * our reset value). + */ + cs->icc_sre_el1 = 0x7; + return 0; +} + +static bool icc_sre_el1_reg_needed(void *opaque) +{ + GICv3CPUState *cs = opaque; + + return cs->icc_sre_el1 != 7; +} + +const VMStateDescription vmstate_gicv3_cpu_sre_el1 = { + .name = "arm_gicv3_cpu/sre_el1", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = icc_sre_el1_reg_pre_load, + .needed = icc_sre_el1_reg_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(icc_sre_el1, GICv3CPUState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_gicv3_cpu = { .name = "arm_gicv3_cpu", .version_id = 1, @@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = { .subsections = (const VMStateDescription * []) { &vmstate_gicv3_cpu_virt, NULL + }, + .subsections = (const VMStateDescription * []) { + &vmstate_gicv3_cpu_sre_el1, + NULL } }; @@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) s->cpu[i].cpu = cpu; s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); /* Pre-construct the GICR_TYPER: * For our implementation: diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index f775aba507..0b208560bd 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -19,6 +19,14 @@ #include "gicv3_internal.h" #include "cpu.h" +void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + + env->gicv3state = (void *)s; +}; + static GICv3CPUState *icc_cs_from_env(CPUARMState *env) { /* Given the CPU, find the right GICv3CPUState struct. diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index d69dc47370..81f0403117 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -23,8 +23,10 @@ #include "qapi/error.h" #include "hw/intc/arm_gicv3_common.h" #include "hw/sysbus.h" +#include "qemu/error-report.h" #include "sysemu/kvm.h" #include "kvm_arm.h" +#include "gicv3_internal.h" #include "vgic_common.h" #include "migration/migration.h" @@ -44,6 +46,32 @@ #define KVM_ARM_GICV3_GET_CLASS(obj) \ OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3) +#define KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \ + (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ICC_PMR_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0) +#define ICC_BPR0_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3) +#define ICC_AP0R_EL1(n) \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n) +#define ICC_AP1R_EL1(n) \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n) +#define ICC_BPR1_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3) +#define ICC_CTLR_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5) +#define ICC_IGRPEN0_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6) +#define ICC_IGRPEN1_EL1 \ + KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7) + typedef struct KVMARMGICv3Class { ARMGICv3CommonClass parent_class; DeviceRealize parent_realize; @@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level) kvm_arm_gic_set_irq(s->num_irq, irq, level); } +#define KVM_VGIC_ATTR(reg, typer) \ + ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg)) + +static inline void kvm_gicd_access(GICv3State *s, int offset, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + KVM_VGIC_ATTR(offset, 0), + val, write); +} + +static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer), + val, write); +} + +static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu, + uint64_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer), + val, write); +} + +static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu, + uint32_t *val, bool write) +{ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) | + (VGIC_LEVEL_INFO_LINE_LEVEL << + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT), + val, write); +} + +/* Loop through each distributor IRQ related register; since bits + * corresponding to SPIs and PPIs are RAZ/WI when affinity routing + * is enabled, we skip those. + */ +#define for_each_dist_irq_reg(_irq, _max, _field_width) \ + for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width)) + +static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp) +{ + uint32_t reg, *field; + int irq; + + field = (uint32_t *)bmp; + for_each_dist_irq_reg(irq, s->num_irq, 8) { + kvm_gicd_access(s, offset, ®, false); + *field = reg; + offset += 4; + field++; + } +} + +static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp) +{ + uint32_t reg, *field; + int irq; + + field = (uint32_t *)bmp; + for_each_dist_irq_reg(irq, s->num_irq, 8) { + reg = *field; + kvm_gicd_access(s, offset, ®, true); + offset += 4; + field++; + } +} + +static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset, + uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 2) { + kvm_gicd_access(s, offset, ®, false); + reg = half_unshuffle32(reg >> 1); + if (irq % 32 != 0) { + reg = (reg << 16); + } + *gic_bmp_ptr32(bmp, irq) |= reg; + offset += 4; + } +} + +static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset, + uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 2) { + reg = *gic_bmp_ptr32(bmp, irq); + if (irq % 32 != 0) { + reg = (reg & 0xffff0000) >> 16; + } else { + reg = reg & 0xffff; + } + reg = half_shuffle32(reg) << 1; + kvm_gicd_access(s, offset, ®, true); + offset += 4; + } +} + +static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + kvm_gic_line_level_access(s, irq, 0, ®, false); + *gic_bmp_ptr32(bmp, irq) = reg; + } +} + +static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + reg = *gic_bmp_ptr32(bmp, irq); + kvm_gic_line_level_access(s, irq, 0, ®, true); + } +} + +/* Read a bitmap register group from the kernel VGIC. */ +static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + kvm_gicd_access(s, offset, ®, false); + *gic_bmp_ptr32(bmp, irq) = reg; + offset += 4; + } +} + +static void kvm_dist_putbmp(GICv3State *s, uint32_t offset, + uint32_t clroffset, uint32_t *bmp) +{ + uint32_t reg; + int irq; + + for_each_dist_irq_reg(irq, s->num_irq, 1) { + /* If this bitmap is a set/clear register pair, first write to the + * clear-reg to clear all bits before using the set-reg to write + * the 1 bits. + */ + if (clroffset != 0) { + reg = 0; + kvm_gicd_access(s, clroffset, ®, true); + } + reg = *gic_bmp_ptr32(bmp, irq); + kvm_gicd_access(s, offset, ®, true); + offset += 4; + } +} + +static void kvm_arm_gicv3_check(GICv3State *s) +{ + uint32_t reg; + uint32_t num_irq; + + /* Sanity checking s->num_irq */ + kvm_gicd_access(s, GICD_TYPER, ®, false); + num_irq = ((reg & 0x1f) + 1) * 32; + + if (num_irq < s->num_irq) { + error_report("Model requests %u IRQs, but kernel supports max %u", + s->num_irq, num_irq); + abort(); + } +} + static void kvm_arm_gicv3_put(GICv3State *s) { - /* TODO */ - DPRINTF("Cannot put kernel gic state, no kernel interface\n"); + uint32_t regl, regh, reg; + uint64_t reg64, redist_typer; + int ncpu, i; + + kvm_arm_gicv3_check(s); + + kvm_gicr_access(s, GICR_TYPER, 0, ®l, false); + kvm_gicr_access(s, GICR_TYPER + 4, 0, ®h, false); + redist_typer = ((uint64_t)regh << 32) | regl; + + reg = s->gicd_ctlr; + kvm_gicd_access(s, GICD_CTLR, ®, true); + + if (redist_typer & GICR_TYPER_PLPIS) { + /* Set base addresses before LPIs are enabled by GICR_CTLR write */ + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + reg64 = c->gicr_propbaser; + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, true); + regh = (uint32_t)(reg64 >> 32); + kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, ®h, true); + + reg64 = c->gicr_pendbaser; + if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) { + /* Setting PTZ is advised if LPIs are disabled, to reduce + * GIC initialization time. + */ + reg64 |= GICR_PENDBASER_PTZ; + } + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PENDBASER, ncpu, ®l, true); + regh = (uint32_t)(reg64 >> 32); + kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, ®h, true); + } + } + + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + reg = c->gicr_ctlr; + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, true); + + reg = c->gicr_statusr[GICV3_NS]; + kvm_gicr_access(s, GICR_STATUSR, ncpu, ®, true); + + reg = c->gicr_waker; + kvm_gicr_access(s, GICR_WAKER, ncpu, ®, true); + + reg = c->gicr_igroupr0; + kvm_gicr_access(s, GICR_IGROUPR0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICENABLER0, ncpu, ®, true); + reg = c->gicr_ienabler0; + kvm_gicr_access(s, GICR_ISENABLER0, ncpu, ®, true); + + /* Restore config before pending so we treat level/edge correctly */ + reg = half_shuffle32(c->edge_trigger >> 16) << 1; + kvm_gicr_access(s, GICR_ICFGR1, ncpu, ®, true); + + reg = c->level; + kvm_gic_line_level_access(s, 0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICPENDR0, ncpu, ®, true); + reg = c->gicr_ipendr0; + kvm_gicr_access(s, GICR_ISPENDR0, ncpu, ®, true); + + reg = ~0; + kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, ®, true); + reg = c->gicr_iactiver0; + kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, ®, true); + + for (i = 0; i < GIC_INTERNAL; i += 4) { + reg = c->gicr_ipriorityr[i] | + (c->gicr_ipriorityr[i + 1] << 8) | + (c->gicr_ipriorityr[i + 2] << 16) | + (c->gicr_ipriorityr[i + 3] << 24); + kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, ®, true); + } + } + + /* Distributor state (shared between all CPUs */ + reg = s->gicd_statusr[GICV3_NS]; + kvm_gicd_access(s, GICD_STATUSR, ®, true); + + /* s->enable bitmap -> GICD_ISENABLERn */ + kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled); + + /* s->group bitmap -> GICD_IGROUPRn */ + kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group); + + /* Restore targets before pending to ensure the pending state is set on + * the appropriate CPU interfaces in the kernel + */ + + /* s->gicd_irouter[irq] -> GICD_IROUTERn + * We can't use kvm_dist_put() here because the registers are 64-bit + */ + for (i = GIC_INTERNAL; i < s->num_irq; i++) { + uint32_t offset; + + offset = GICD_IROUTER + (sizeof(uint32_t) * i); + reg = (uint32_t)s->gicd_irouter[i]; + kvm_gicd_access(s, offset, ®, true); + + offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4; + reg = (uint32_t)(s->gicd_irouter[i] >> 32); + kvm_gicd_access(s, offset, ®, true); + } + + /* s->trigger bitmap -> GICD_ICFGRn + * (restore configuration registers before pending IRQs so we treat + * level/edge correctly) + */ + kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger); + + /* s->level bitmap -> line_level */ + kvm_gic_put_line_level_bmp(s, s->level); + + /* s->pending bitmap -> GICD_ISPENDRn */ + kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending); + + /* s->active bitmap -> GICD_ISACTIVERn */ + kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active); + + /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */ + kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority); + + /* CPU Interface state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); + kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu, + &c->icc_igrpen[GICV3_G0], true); + kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu, + &c->icc_igrpen[GICV3_G1NS], true); + kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true); + kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true); + kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true); + + num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] & + ICC_CTLR_EL1_PRIBITS_MASK) >> + ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; + + switch (num_pri_bits) { + case 7: + reg64 = c->icc_apr[GICV3_G0][3]; + kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, ®64, true); + reg64 = c->icc_apr[GICV3_G0][2]; + kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, ®64, true); + case 6: + reg64 = c->icc_apr[GICV3_G0][1]; + kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, ®64, true); + default: + reg64 = c->icc_apr[GICV3_G0][0]; + kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, ®64, true); + } + + switch (num_pri_bits) { + case 7: + reg64 = c->icc_apr[GICV3_G1NS][3]; + kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, ®64, true); + reg64 = c->icc_apr[GICV3_G1NS][2]; + kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, ®64, true); + case 6: + reg64 = c->icc_apr[GICV3_G1NS][1]; + kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, ®64, true); + default: + reg64 = c->icc_apr[GICV3_G1NS][0]; + kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, ®64, true); + } + } } static void kvm_arm_gicv3_get(GICv3State *s) { - /* TODO */ - DPRINTF("Cannot get kernel gic state, no kernel interface\n"); + uint32_t regl, regh, reg; + uint64_t reg64, redist_typer; + int ncpu, i; + + kvm_arm_gicv3_check(s); + + kvm_gicr_access(s, GICR_TYPER, 0, ®l, false); + kvm_gicr_access(s, GICR_TYPER + 4, 0, ®h, false); + redist_typer = ((uint64_t)regh << 32) | regl; + + kvm_gicd_access(s, GICD_CTLR, ®, false); + s->gicd_ctlr = reg; + + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, false); + c->gicr_ctlr = reg; + + kvm_gicr_access(s, GICR_STATUSR, ncpu, ®, false); + c->gicr_statusr[GICV3_NS] = reg; + + kvm_gicr_access(s, GICR_WAKER, ncpu, ®, false); + c->gicr_waker = reg; + + kvm_gicr_access(s, GICR_IGROUPR0, ncpu, ®, false); + c->gicr_igroupr0 = reg; + kvm_gicr_access(s, GICR_ISENABLER0, ncpu, ®, false); + c->gicr_ienabler0 = reg; + kvm_gicr_access(s, GICR_ICFGR1, ncpu, ®, false); + c->edge_trigger = half_unshuffle32(reg >> 1) << 16; + kvm_gic_line_level_access(s, 0, ncpu, ®, false); + c->level = reg; + kvm_gicr_access(s, GICR_ISPENDR0, ncpu, ®, false); + c->gicr_ipendr0 = reg; + kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, ®, false); + c->gicr_iactiver0 = reg; + + for (i = 0; i < GIC_INTERNAL; i += 4) { + kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, ®, false); + c->gicr_ipriorityr[i] = extract32(reg, 0, 8); + c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8); + c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8); + c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8); + } + } + + if (redist_typer & GICR_TYPER_PLPIS) { + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, false); + kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, ®h, false); + c->gicr_propbaser = ((uint64_t)regh << 32) | regl; + + kvm_gicr_access(s, GICR_PENDBASER, ncpu, ®l, false); + kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, ®h, false); + c->gicr_pendbaser = ((uint64_t)regh << 32) | regl; + } + } + + /* Distributor state (shared between all CPUs */ + + kvm_gicd_access(s, GICD_STATUSR, ®, false); + s->gicd_statusr[GICV3_NS] = reg; + + /* GICD_IGROUPRn -> s->group bitmap */ + kvm_dist_getbmp(s, GICD_IGROUPR, s->group); + + /* GICD_ISENABLERn -> s->enabled bitmap */ + kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled); + + /* Line level of irq */ + kvm_gic_get_line_level_bmp(s, s->level); + /* GICD_ISPENDRn -> s->pending bitmap */ + kvm_dist_getbmp(s, GICD_ISPENDR, s->pending); + + /* GICD_ISACTIVERn -> s->active bitmap */ + kvm_dist_getbmp(s, GICD_ISACTIVER, s->active); + + /* GICD_ICFGRn -> s->trigger bitmap */ + kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger); + + /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */ + kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority); + + /* GICD_IROUTERn -> s->gicd_irouter[irq] */ + for (i = GIC_INTERNAL; i < s->num_irq; i++) { + uint32_t offset; + + offset = GICD_IROUTER + (sizeof(uint32_t) * i); + kvm_gicd_access(s, offset, ®l, false); + offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4; + kvm_gicd_access(s, offset, ®h, false); + s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl; + } + + /***************************************************************** + * CPU Interface(s) State + */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); + kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu, + &c->icc_igrpen[GICV3_G0], false); + kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu, + &c->icc_igrpen[GICV3_G1NS], false); + kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false); + kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false); + kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false); + num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] & + ICC_CTLR_EL1_PRIBITS_MASK) >> + ICC_CTLR_EL1_PRIBITS_SHIFT) + 1; + + switch (num_pri_bits) { + case 7: + kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, ®64, false); + c->icc_apr[GICV3_G0][3] = reg64; + kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, ®64, false); + c->icc_apr[GICV3_G0][2] = reg64; + case 6: + kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, ®64, false); + c->icc_apr[GICV3_G0][1] = reg64; + default: + kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, ®64, false); + c->icc_apr[GICV3_G0][0] = reg64; + } + + switch (num_pri_bits) { + case 7: + kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][3] = reg64; + kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][2] = reg64; + case 6: + kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][1] = reg64; + default: + kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, ®64, false); + c->icc_apr[GICV3_G1NS][0] = reg64; + } + } +} + +static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu; + GICv3State *s; + GICv3CPUState *c; + + c = (GICv3CPUState *)env->gicv3state; + s = c->gic; + cpu = ARM_CPU(c->cpu); + + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), + &c->icc_ctlr_el1[GICV3_NS], false); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + c->icc_pmr_el1 = 0; + c->icc_bpr[GICV3_G0] = GIC_MIN_BPR; + c->icc_bpr[GICV3_G1] = GIC_MIN_BPR; + c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR; + + c->icc_sre_el1 = 0x7; + memset(c->icc_apr, 0, sizeof(c->icc_apr)); + memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen)); } static void kvm_arm_gicv3_reset(DeviceState *dev) @@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev) DPRINTF("Reset\n"); kgc->parent_reset(dev); + + if (s->migration_blocker) { + DPRINTF("Cannot put kernel gic state, no kernel interface\n"); + return; + } + kvm_arm_gicv3_put(s); } +/* + * CPU interface registers of GIC needs to be reset on CPU reset. + * For the calling arm_gicv3_icc_reset() on CPU reset, we register + * below ARMCPRegInfo. As we reset the whole cpu interface under single + * register reset, we define only one register of CPU interface instead + * of defining all the registers. + */ +static const ARMCPRegInfo gicv3_cpuif_reginfo[] = { + { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4, + /* + * If ARM_CP_NOP is used, resetfn is not called, + * So ARM_CP_NO_RAW is appropriate type. + */ + .type = ARM_CP_NO_RAW, + .access = PL1_RW, + .readfn = arm_cp_read_zero, + .writefn = arm_cp_write_ignore, + /* + * We hang the whole cpu interface reset routine off here + * rather than parcelling it out into one little function + * per register + */ + .resetfn = arm_gicv3_icc_reset, + }, + REGINFO_SENTINEL +}; + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) { GICv3State *s = KVM_ARM_GICV3(dev); @@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); - /* Block migration of a KVM GICv3 device: the API for saving and restoring - * the state in the kernel is not yet finalised in the kernel or - * implemented in QEMU. - */ - error_setg(&s->migration_blocker, "vGICv3 migration is not implemented"); - migrate_add_blocker(s->migration_blocker, &local_err); - if (local_err) { - error_propagate(errp, local_err); - error_free(s->migration_blocker); - return; + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); + + define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); } /* Try to create the device via the device control API */ @@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) kvm_irqchip_commit_routes(kvm_state); } + + if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_CTLR)) { + error_setg(&s->migration_blocker, "This operating system kernel does " + "not support vGICv3 migration"); + migrate_add_blocker(s->migration_blocker, &local_err); + if (local_err) { + error_propagate(errp, local_err); + error_free(s->migration_blocker); + return; + } + } } static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index fe5c303de9..32ffa0bf35 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -17,213 +17,425 @@ #include "hw/sysbus.h" #include "qemu/timer.h" #include "hw/arm/arm.h" -#include "exec/address-spaces.h" -#include "gic_internal.h" +#include "hw/arm/armv7m_nvic.h" +#include "target/arm/cpu.h" #include "qemu/log.h" +#include "trace.h" -typedef struct { - GICState gic; - ARMCPU *cpu; - struct { - uint32_t control; - uint32_t reload; - int64_t tick; - QEMUTimer *timer; - } systick; - MemoryRegion sysregmem; - MemoryRegion gic_iomem_alias; - MemoryRegion container; - uint32_t num_irq; - qemu_irq sysresetreq; -} nvic_state; - -#define TYPE_NVIC "armv7m_nvic" -/** - * NVICClass: - * @parent_reset: the parent class' reset handler. +/* IRQ number counting: * - * A model of the v7M NVIC and System Controller + * the num-irq property counts the number of external IRQ lines + * + * NVICState::num_irq counts the total number of exceptions + * (external IRQs, the 15 internal exceptions including reset, + * and one for the unused exception number 0). + * + * NVIC_MAX_IRQ is the highest permitted number of external IRQ lines. + * + * NVIC_MAX_VECTORS is the highest permitted number of exceptions. + * + * Iterating through all exceptions should typically be done with + * for (i = 1; i < s->num_irq; i++) to avoid the unused slot 0. + * + * The external qemu_irq lines are the NVIC's external IRQ lines, + * so line 0 is exception 16. + * + * In the terminology of the architecture manual, "interrupts" are + * a subcategory of exception referring to the external interrupts + * (which are exception numbers NVIC_FIRST_IRQ and upward). + * For historical reasons QEMU tends to use "interrupt" and + * "exception" more or less interchangeably. + */ +#define NVIC_FIRST_IRQ 16 +#define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ) + +/* Effective running priority of the CPU when no exception is active + * (higher than the highest possible priority value) */ -typedef struct NVICClass { - /*< private >*/ - ARMGICClass parent_class; - /*< public >*/ - DeviceRealize parent_realize; - void (*parent_reset)(DeviceState *dev); -} NVICClass; - -#define NVIC_CLASS(klass) \ - OBJECT_CLASS_CHECK(NVICClass, (klass), TYPE_NVIC) -#define NVIC_GET_CLASS(obj) \ - OBJECT_GET_CLASS(NVICClass, (obj), TYPE_NVIC) -#define NVIC(obj) \ - OBJECT_CHECK(nvic_state, (obj), TYPE_NVIC) +#define NVIC_NOEXC_PRIO 0x100 static const uint8_t nvic_id[] = { 0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1 }; -/* qemu timers run at 1GHz. We want something closer to 1MHz. */ -#define SYSTICK_SCALE 1000ULL +static int nvic_pending_prio(NVICState *s) +{ + /* return the priority of the current pending interrupt, + * or NVIC_NOEXC_PRIO if no interrupt is pending + */ + return s->vectpending ? s->vectors[s->vectpending].prio : NVIC_NOEXC_PRIO; +} -#define SYSTICK_ENABLE (1 << 0) -#define SYSTICK_TICKINT (1 << 1) -#define SYSTICK_CLKSOURCE (1 << 2) -#define SYSTICK_COUNTFLAG (1 << 16) +/* Return the value of the ISCR RETTOBASE bit: + * 1 if there is exactly one active exception + * 0 if there is more than one active exception + * UNKNOWN if there are no active exceptions (we choose 1, + * which matches the choice Cortex-M3 is documented as making). + * + * NB: some versions of the documentation talk about this + * counting "active exceptions other than the one shown by IPSR"; + * this is only different in the obscure corner case where guest + * code has manually deactivated an exception and is about + * to fail an exception-return integrity check. The definition + * above is the one from the v8M ARM ARM and is also in line + * with the behaviour documented for the Cortex-M3. + */ +static bool nvic_rettobase(NVICState *s) +{ + int irq, nhand = 0; -int system_clock_scale; + for (irq = ARMV7M_EXCP_RESET; irq < s->num_irq; irq++) { + if (s->vectors[irq].active) { + nhand++; + if (nhand == 2) { + return 0; + } + } + } -/* Conversion factor from qemu timer to SysTick frequencies. */ -static inline int64_t systick_scale(nvic_state *s) -{ - if (s->systick.control & SYSTICK_CLKSOURCE) - return system_clock_scale; - else - return 1000; + return 1; } -static void systick_reload(nvic_state *s, int reset) +/* Return the value of the ISCR ISRPENDING bit: + * 1 if an external interrupt is pending + * 0 if no external interrupt is pending + */ +static bool nvic_isrpending(NVICState *s) { - /* The Cortex-M3 Devices Generic User Guide says that "When the - * ENABLE bit is set to 1, the counter loads the RELOAD value from the - * SYST RVR register and then counts down". So, we need to check the - * ENABLE bit before reloading the value. + int irq; + + /* We can shortcut if the highest priority pending interrupt + * happens to be external or if there is nothing pending. */ - if ((s->systick.control & SYSTICK_ENABLE) == 0) { - return; + if (s->vectpending > NVIC_FIRST_IRQ) { + return true; + } + if (s->vectpending == 0) { + return false; + } + + for (irq = NVIC_FIRST_IRQ; irq < s->num_irq; irq++) { + if (s->vectors[irq].pending) { + return true; + } } + return false; +} - if (reset) - s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - s->systick.tick += (s->systick.reload + 1) * systick_scale(s); - timer_mod(s->systick.timer, s->systick.tick); +/* Return a mask word which clears the subpriority bits from + * a priority value for an M-profile exception, leaving only + * the group priority. + */ +static inline uint32_t nvic_gprio_mask(NVICState *s) +{ + return ~0U << (s->prigroup + 1); } -static void systick_timer_tick(void * opaque) +/* Recompute vectpending and exception_prio */ +static void nvic_recompute_state(NVICState *s) { - nvic_state *s = (nvic_state *)opaque; - s->systick.control |= SYSTICK_COUNTFLAG; - if (s->systick.control & SYSTICK_TICKINT) { - /* Trigger the interrupt. */ - armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); + int i; + int pend_prio = NVIC_NOEXC_PRIO; + int active_prio = NVIC_NOEXC_PRIO; + int pend_irq = 0; + + for (i = 1; i < s->num_irq; i++) { + VecInfo *vec = &s->vectors[i]; + + if (vec->enabled && vec->pending && vec->prio < pend_prio) { + pend_prio = vec->prio; + pend_irq = i; + } + if (vec->active && vec->prio < active_prio) { + active_prio = vec->prio; + } } - if (s->systick.reload == 0) { - s->systick.control &= ~SYSTICK_ENABLE; + + s->vectpending = pend_irq; + s->exception_prio = active_prio & nvic_gprio_mask(s); + + trace_nvic_recompute_state(s->vectpending, s->exception_prio); +} + +/* Return the current execution priority of the CPU + * (equivalent to the pseudocode ExecutionPriority function). + * This is a value between -2 (NMI priority) and NVIC_NOEXC_PRIO. + */ +static inline int nvic_exec_prio(NVICState *s) +{ + CPUARMState *env = &s->cpu->env; + int running; + + if (env->daif & PSTATE_F) { /* FAULTMASK */ + running = -1; + } else if (env->daif & PSTATE_I) { /* PRIMASK */ + running = 0; + } else if (env->v7m.basepri > 0) { + running = env->v7m.basepri & nvic_gprio_mask(s); } else { - systick_reload(s, 0); + running = NVIC_NOEXC_PRIO; /* lower than any possible priority */ } + /* consider priority of active handler */ + return MIN(running, s->exception_prio); } -static void systick_reset(nvic_state *s) +bool armv7m_nvic_can_take_pending_exception(void *opaque) { - s->systick.control = 0; - s->systick.reload = 0; - s->systick.tick = 0; - timer_del(s->systick.timer); + NVICState *s = opaque; + + return nvic_exec_prio(s) > nvic_pending_prio(s); +} + +/* caller must call nvic_irq_update() after this */ +static void set_prio(NVICState *s, unsigned irq, uint8_t prio) +{ + assert(irq > ARMV7M_EXCP_NMI); /* only use for configurable prios */ + assert(irq < s->num_irq); + + s->vectors[irq].prio = prio; + + trace_nvic_set_prio(irq, prio); +} + +/* Recompute state and assert irq line accordingly. + * Must be called after changes to: + * vec->active, vec->enabled, vec->pending or vec->prio for any vector + * prigroup + */ +static void nvic_irq_update(NVICState *s) +{ + int lvl; + int pend_prio; + + nvic_recompute_state(s); + pend_prio = nvic_pending_prio(s); + + /* Raise NVIC output if this IRQ would be taken, except that we + * ignore the effects of the BASEPRI, FAULTMASK and PRIMASK (which + * will be checked for in arm_v7m_cpu_exec_interrupt()); changes + * to those CPU registers don't cause us to recalculate the NVIC + * pending info. + */ + lvl = (pend_prio < s->exception_prio); + trace_nvic_irq_update(s->vectpending, pend_prio, s->exception_prio, lvl); + qemu_set_irq(s->excpout, lvl); +} + +static void armv7m_nvic_clear_pending(void *opaque, int irq) +{ + NVICState *s = (NVICState *)opaque; + VecInfo *vec; + + assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq); + + vec = &s->vectors[irq]; + trace_nvic_clear_pending(irq, vec->enabled, vec->prio); + if (vec->pending) { + vec->pending = 0; + nvic_irq_update(s); + } } -/* The external routines use the hardware vector numbering, ie. the first - IRQ is #16. The internal GIC routines use #32 as the first IRQ. */ void armv7m_nvic_set_pending(void *opaque, int irq) { - nvic_state *s = (nvic_state *)opaque; - if (irq >= 16) - irq += 16; - gic_set_pending_private(&s->gic, 0, irq); + NVICState *s = (NVICState *)opaque; + VecInfo *vec; + + assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq); + + vec = &s->vectors[irq]; + trace_nvic_set_pending(irq, vec->enabled, vec->prio); + + + if (irq >= ARMV7M_EXCP_HARD && irq < ARMV7M_EXCP_PENDSV) { + /* If a synchronous exception is pending then it may be + * escalated to HardFault if: + * * it is equal or lower priority to current execution + * * it is disabled + * (ie we need to take it immediately but we can't do so). + * Asynchronous exceptions (and interrupts) simply remain pending. + * + * For QEMU, we don't have any imprecise (asynchronous) faults, + * so we can assume that PREFETCH_ABORT and DATA_ABORT are always + * synchronous. + * Debug exceptions are awkward because only Debug exceptions + * resulting from the BKPT instruction should be escalated, + * but we don't currently implement any Debug exceptions other + * than those that result from BKPT, so we treat all debug exceptions + * as needing escalation. + * + * This all means we can identify whether to escalate based only on + * the exception number and don't (yet) need the caller to explicitly + * tell us whether this exception is synchronous or not. + */ + int running = nvic_exec_prio(s); + bool escalate = false; + + if (vec->prio >= running) { + trace_nvic_escalate_prio(irq, vec->prio, running); + escalate = true; + } else if (!vec->enabled) { + trace_nvic_escalate_disabled(irq); + escalate = true; + } + + if (escalate) { + if (running < 0) { + /* We want to escalate to HardFault but we can't take a + * synchronous HardFault at this point either. This is a + * Lockup condition due to a guest bug. We don't model + * Lockup, so report via cpu_abort() instead. + */ + cpu_abort(&s->cpu->parent_obj, + "Lockup: can't escalate %d to HardFault " + "(current priority %d)\n", irq, running); + } + + /* We can do the escalation, so we take HardFault instead */ + irq = ARMV7M_EXCP_HARD; + vec = &s->vectors[irq]; + s->cpu->env.v7m.hfsr |= R_V7M_HFSR_FORCED_MASK; + } + } + + if (!vec->pending) { + vec->pending = 1; + nvic_irq_update(s); + } } /* Make pending IRQ active. */ -int armv7m_nvic_acknowledge_irq(void *opaque) +void armv7m_nvic_acknowledge_irq(void *opaque) { - nvic_state *s = (nvic_state *)opaque; - uint32_t irq; - - irq = gic_acknowledge_irq(&s->gic, 0, MEMTXATTRS_UNSPECIFIED); - if (irq == 1023) - hw_error("Interrupt but no vector\n"); - if (irq >= 32) - irq -= 16; - return irq; + NVICState *s = (NVICState *)opaque; + CPUARMState *env = &s->cpu->env; + const int pending = s->vectpending; + const int running = nvic_exec_prio(s); + int pendgroupprio; + VecInfo *vec; + + assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq); + + vec = &s->vectors[pending]; + + assert(vec->enabled); + assert(vec->pending); + + pendgroupprio = vec->prio & nvic_gprio_mask(s); + assert(pendgroupprio < running); + + trace_nvic_acknowledge_irq(pending, vec->prio); + + vec->active = 1; + vec->pending = 0; + + env->v7m.exception = s->vectpending; + + nvic_irq_update(s); } -void armv7m_nvic_complete_irq(void *opaque, int irq) +int armv7m_nvic_complete_irq(void *opaque, int irq) { - nvic_state *s = (nvic_state *)opaque; - if (irq >= 16) - irq += 16; - gic_complete_irq(&s->gic, 0, irq, MEMTXATTRS_UNSPECIFIED); + NVICState *s = (NVICState *)opaque; + VecInfo *vec; + int ret; + + assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq); + + vec = &s->vectors[irq]; + + trace_nvic_complete_irq(irq); + + if (!vec->active) { + /* Tell the caller this was an illegal exception return */ + return -1; + } + + ret = nvic_rettobase(s); + + vec->active = 0; + if (vec->level) { + /* Re-pend the exception if it's still held high; only + * happens for extenal IRQs + */ + assert(irq >= NVIC_FIRST_IRQ); + vec->pending = 1; + } + + nvic_irq_update(s); + + return ret; +} + +/* callback when external interrupt line is changed */ +static void set_irq_level(void *opaque, int n, int level) +{ + NVICState *s = opaque; + VecInfo *vec; + + n += NVIC_FIRST_IRQ; + + assert(n >= NVIC_FIRST_IRQ && n < s->num_irq); + + trace_nvic_set_irq_level(n, level); + + /* The pending status of an external interrupt is + * latched on rising edge and exception handler return. + * + * Pulsing the IRQ will always run the handler + * once, and the handler will re-run until the + * level is low when the handler completes. + */ + vec = &s->vectors[n]; + if (level != vec->level) { + vec->level = level; + if (level) { + armv7m_nvic_set_pending(s, n); + } + } } -static uint32_t nvic_readl(nvic_state *s, uint32_t offset) +static uint32_t nvic_readl(NVICState *s, uint32_t offset) { ARMCPU *cpu = s->cpu; uint32_t val; - int irq; switch (offset) { case 4: /* Interrupt Control Type. */ - return (s->num_irq / 32) - 1; - case 0x10: /* SysTick Control and Status. */ - val = s->systick.control; - s->systick.control &= ~SYSTICK_COUNTFLAG; - return val; - case 0x14: /* SysTick Reload Value. */ - return s->systick.reload; - case 0x18: /* SysTick Current Value. */ - { - int64_t t; - if ((s->systick.control & SYSTICK_ENABLE) == 0) - return 0; - t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (t >= s->systick.tick) - return 0; - val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1; - /* The interrupt in triggered when the timer reaches zero. - However the counter is not reloaded until the next clock - tick. This is a hack to return zero during the first tick. */ - if (val > s->systick.reload) - val = 0; - return val; - } - case 0x1c: /* SysTick Calibration Value. */ - return 10000; + return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1; case 0xd00: /* CPUID Base. */ return cpu->midr; case 0xd04: /* Interrupt Control State. */ /* VECTACTIVE */ val = cpu->env.v7m.exception; - if (val == 1023) { - val = 0; - } else if (val >= 32) { - val -= 16; - } /* VECTPENDING */ - if (s->gic.current_pending[0] != 1023) - val |= (s->gic.current_pending[0] << 12); - /* ISRPENDING and RETTOBASE */ - for (irq = 32; irq < s->num_irq; irq++) { - if (s->gic.irq_state[irq].pending) { - val |= (1 << 22); - break; - } - if (irq != cpu->env.v7m.exception && s->gic.irq_state[irq].active) { - val |= (1 << 11); - } + val |= (s->vectpending & 0xff) << 12; + /* ISRPENDING - set if any external IRQ is pending */ + if (nvic_isrpending(s)) { + val |= (1 << 22); + } + /* RETTOBASE - set if only one handler is active */ + if (nvic_rettobase(s)) { + val |= (1 << 11); } /* PENDSTSET */ - if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending) + if (s->vectors[ARMV7M_EXCP_SYSTICK].pending) { val |= (1 << 26); + } /* PENDSVSET */ - if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending) + if (s->vectors[ARMV7M_EXCP_PENDSV].pending) { val |= (1 << 28); + } /* NMIPENDSET */ - if (s->gic.irq_state[ARMV7M_EXCP_NMI].pending) + if (s->vectors[ARMV7M_EXCP_NMI].pending) { val |= (1 << 31); + } + /* ISRPREEMPT not implemented */ return val; case 0xd08: /* Vector Table Offset. */ return cpu->env.v7m.vecbase; case 0xd0c: /* Application Interrupt/Reset Control. */ - return 0xfa050000; + return 0xfa050000 | (s->prigroup << 8); case 0xd10: /* System Control. */ /* TODO: Implement SLEEPONEXIT. */ return 0; @@ -231,20 +443,48 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset) return cpu->env.v7m.ccr; case 0xd24: /* System Handler Status. */ val = 0; - if (s->gic.irq_state[ARMV7M_EXCP_MEM].active) val |= (1 << 0); - if (s->gic.irq_state[ARMV7M_EXCP_BUS].active) val |= (1 << 1); - if (s->gic.irq_state[ARMV7M_EXCP_USAGE].active) val |= (1 << 3); - if (s->gic.irq_state[ARMV7M_EXCP_SVC].active) val |= (1 << 7); - if (s->gic.irq_state[ARMV7M_EXCP_DEBUG].active) val |= (1 << 8); - if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].active) val |= (1 << 10); - if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].active) val |= (1 << 11); - if (s->gic.irq_state[ARMV7M_EXCP_USAGE].pending) val |= (1 << 12); - if (s->gic.irq_state[ARMV7M_EXCP_MEM].pending) val |= (1 << 13); - if (s->gic.irq_state[ARMV7M_EXCP_BUS].pending) val |= (1 << 14); - if (s->gic.irq_state[ARMV7M_EXCP_SVC].pending) val |= (1 << 15); - if (s->gic.irq_state[ARMV7M_EXCP_MEM].enabled) val |= (1 << 16); - if (s->gic.irq_state[ARMV7M_EXCP_BUS].enabled) val |= (1 << 17); - if (s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled) val |= (1 << 18); + if (s->vectors[ARMV7M_EXCP_MEM].active) { + val |= (1 << 0); + } + if (s->vectors[ARMV7M_EXCP_BUS].active) { + val |= (1 << 1); + } + if (s->vectors[ARMV7M_EXCP_USAGE].active) { + val |= (1 << 3); + } + if (s->vectors[ARMV7M_EXCP_SVC].active) { + val |= (1 << 7); + } + if (s->vectors[ARMV7M_EXCP_DEBUG].active) { + val |= (1 << 8); + } + if (s->vectors[ARMV7M_EXCP_PENDSV].active) { + val |= (1 << 10); + } + if (s->vectors[ARMV7M_EXCP_SYSTICK].active) { + val |= (1 << 11); + } + if (s->vectors[ARMV7M_EXCP_USAGE].pending) { + val |= (1 << 12); + } + if (s->vectors[ARMV7M_EXCP_MEM].pending) { + val |= (1 << 13); + } + if (s->vectors[ARMV7M_EXCP_BUS].pending) { + val |= (1 << 14); + } + if (s->vectors[ARMV7M_EXCP_SVC].pending) { + val |= (1 << 15); + } + if (s->vectors[ARMV7M_EXCP_MEM].enabled) { + val |= (1 << 16); + } + if (s->vectors[ARMV7M_EXCP_BUS].enabled) { + val |= (1 << 17); + } + if (s->vectors[ARMV7M_EXCP_USAGE].enabled) { + val |= (1 << 18); + } return val; case 0xd28: /* Configurable Fault Status. */ return cpu->env.v7m.cfsr; @@ -294,43 +534,11 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset) } } -static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) +static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value) { ARMCPU *cpu = s->cpu; - uint32_t oldval; + switch (offset) { - case 0x10: /* SysTick Control and Status. */ - oldval = s->systick.control; - s->systick.control &= 0xfffffff8; - s->systick.control |= value & 7; - if ((oldval ^ value) & SYSTICK_ENABLE) { - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (value & SYSTICK_ENABLE) { - if (s->systick.tick) { - s->systick.tick += now; - timer_mod(s->systick.timer, s->systick.tick); - } else { - systick_reload(s, 1); - } - } else { - timer_del(s->systick.timer); - s->systick.tick -= now; - if (s->systick.tick < 0) - s->systick.tick = 0; - } - } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { - /* This is a hack. Force the timer to be reloaded - when the reference clock is changed. */ - systick_reload(s, 1); - } - break; - case 0x14: /* SysTick Reload Value. */ - s->systick.reload = value; - break; - case 0x18: /* SysTick Current Value. Writes reload the timer. */ - systick_reload(s, 1); - s->systick.control &= ~SYSTICK_COUNTFLAG; - break; case 0xd04: /* Interrupt Control State. */ if (value & (1 << 31)) { armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI); @@ -338,14 +546,12 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) if (value & (1 << 28)) { armv7m_nvic_set_pending(s, ARMV7M_EXCP_PENDSV); } else if (value & (1 << 27)) { - s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending = 0; - gic_update(&s->gic); + armv7m_nvic_clear_pending(s, ARMV7M_EXCP_PENDSV); } if (value & (1 << 26)) { armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); } else if (value & (1 << 25)) { - s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending = 0; - gic_update(&s->gic); + armv7m_nvic_clear_pending(s, ARMV7M_EXCP_SYSTICK); } break; case 0xd08: /* Vector Table Offset. */ @@ -357,14 +563,17 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) qemu_irq_pulse(s->sysresetreq); } if (value & 2) { - qemu_log_mask(LOG_UNIMP, "VECTCLRACTIVE unimplemented\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "Setting VECTCLRACTIVE when not in DEBUG mode " + "is UNPREDICTABLE\n"); } if (value & 1) { - qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n"); - } - if (value & 0x700) { - qemu_log_mask(LOG_UNIMP, "PRIGROUP unimplemented\n"); + qemu_log_mask(LOG_GUEST_ERROR, + "Setting VECTRESET when not in DEBUG mode " + "is UNPREDICTABLE\n"); } + s->prigroup = extract32(value, 8, 3); + nvic_irq_update(s); } break; case 0xd10: /* System Control. */ @@ -383,11 +592,21 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) cpu->env.v7m.ccr = value; break; case 0xd24: /* System Handler Control. */ - /* TODO: Real hardware allows you to set/clear the active bits - under some circumstances. We don't implement this. */ - s->gic.irq_state[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0; - s->gic.irq_state[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0; - s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0; + s->vectors[ARMV7M_EXCP_MEM].active = (value & (1 << 0)) != 0; + s->vectors[ARMV7M_EXCP_BUS].active = (value & (1 << 1)) != 0; + s->vectors[ARMV7M_EXCP_USAGE].active = (value & (1 << 3)) != 0; + s->vectors[ARMV7M_EXCP_SVC].active = (value & (1 << 7)) != 0; + s->vectors[ARMV7M_EXCP_DEBUG].active = (value & (1 << 8)) != 0; + s->vectors[ARMV7M_EXCP_PENDSV].active = (value & (1 << 10)) != 0; + s->vectors[ARMV7M_EXCP_SYSTICK].active = (value & (1 << 11)) != 0; + s->vectors[ARMV7M_EXCP_USAGE].pending = (value & (1 << 12)) != 0; + s->vectors[ARMV7M_EXCP_MEM].pending = (value & (1 << 13)) != 0; + s->vectors[ARMV7M_EXCP_BUS].pending = (value & (1 << 14)) != 0; + s->vectors[ARMV7M_EXCP_SVC].pending = (value & (1 << 15)) != 0; + s->vectors[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0; + s->vectors[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0; + s->vectors[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0; + nvic_irq_update(s); break; case 0xd28: /* Configurable Fault Status. */ cpu->env.v7m.cfsr &= ~value; /* W1C */ @@ -409,13 +628,16 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) "NVIC: Aux fault status registers unimplemented\n"); break; case 0xf00: /* Software Triggered Interrupt Register */ + { /* user mode can only write to STIR if CCR.USERSETMPEND permits it */ - if ((value & 0x1ff) < s->num_irq && + int excnum = (value & 0x1ff) + NVIC_FIRST_IRQ; + if (excnum < s->num_irq && (arm_current_el(&cpu->env) || (cpu->env.v7m.ccr & R_V7M_CCR_USERSETMPEND_MASK))) { - gic_set_pending_private(&s->gic, 0, value & 0x1ff); + armv7m_nvic_set_pending(s, excnum); } break; + } default: qemu_log_mask(LOG_GUEST_ERROR, "NVIC: Bad write offset 0x%x\n", offset); @@ -425,46 +647,142 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) static uint64_t nvic_sysreg_read(void *opaque, hwaddr addr, unsigned size) { - nvic_state *s = (nvic_state *)opaque; + NVICState *s = (NVICState *)opaque; uint32_t offset = addr; - int i; + unsigned i, startvec, end; uint32_t val; switch (offset) { + /* reads of set and clear both return the status */ + case 0x100 ... 0x13f: /* NVIC Set enable */ + offset += 0x80; + /* fall through */ + case 0x180 ... 0x1bf: /* NVIC Clear enable */ + val = 0; + startvec = offset - 0x180 + NVIC_FIRST_IRQ; /* vector # */ + + for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) { + if (s->vectors[startvec + i].enabled) { + val |= (1 << i); + } + } + break; + case 0x200 ... 0x23f: /* NVIC Set pend */ + offset += 0x80; + /* fall through */ + case 0x280 ... 0x2bf: /* NVIC Clear pend */ + val = 0; + startvec = offset - 0x280 + NVIC_FIRST_IRQ; /* vector # */ + for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) { + if (s->vectors[startvec + i].pending) { + val |= (1 << i); + } + } + break; + case 0x300 ... 0x33f: /* NVIC Active */ + val = 0; + startvec = offset - 0x300 + NVIC_FIRST_IRQ; /* vector # */ + + for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) { + if (s->vectors[startvec + i].active) { + val |= (1 << i); + } + } + break; + case 0x400 ... 0x5ef: /* NVIC Priority */ + val = 0; + startvec = offset - 0x400 + NVIC_FIRST_IRQ; /* vector # */ + + for (i = 0; i < size && startvec + i < s->num_irq; i++) { + val |= s->vectors[startvec + i].prio << (8 * i); + } + break; case 0xd18 ... 0xd23: /* System Handler Priority. */ val = 0; for (i = 0; i < size; i++) { - val |= s->gic.priority1[(offset - 0xd14) + i][0] << (i * 8); + val |= s->vectors[(offset - 0xd14) + i].prio << (i * 8); } - return val; + break; case 0xfe0 ... 0xfff: /* ID. */ if (offset & 3) { - return 0; + val = 0; + } else { + val = nvic_id[(offset - 0xfe0) >> 2]; + } + break; + default: + if (size == 4) { + val = nvic_readl(s, offset); + } else { + qemu_log_mask(LOG_GUEST_ERROR, + "NVIC: Bad read of size %d at offset 0x%x\n", + size, offset); + val = 0; } - return nvic_id[(offset - 0xfe0) >> 2]; - } - if (size == 4) { - return nvic_readl(s, offset); } - qemu_log_mask(LOG_GUEST_ERROR, - "NVIC: Bad read of size %d at offset 0x%x\n", size, offset); - return 0; + + trace_nvic_sysreg_read(addr, val, size); + return val; } static void nvic_sysreg_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { - nvic_state *s = (nvic_state *)opaque; + NVICState *s = (NVICState *)opaque; uint32_t offset = addr; - int i; + unsigned i, startvec, end; + unsigned setval = 0; + + trace_nvic_sysreg_write(addr, value, size); switch (offset) { + case 0x100 ... 0x13f: /* NVIC Set enable */ + offset += 0x80; + setval = 1; + /* fall through */ + case 0x180 ... 0x1bf: /* NVIC Clear enable */ + startvec = 8 * (offset - 0x180) + NVIC_FIRST_IRQ; + + for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) { + if (value & (1 << i)) { + s->vectors[startvec + i].enabled = setval; + } + } + nvic_irq_update(s); + return; + case 0x200 ... 0x23f: /* NVIC Set pend */ + /* the special logic in armv7m_nvic_set_pending() + * is not needed since IRQs are never escalated + */ + offset += 0x80; + setval = 1; + /* fall through */ + case 0x280 ... 0x2bf: /* NVIC Clear pend */ + startvec = 8 * (offset - 0x280) + NVIC_FIRST_IRQ; /* vector # */ + + for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) { + if (value & (1 << i)) { + s->vectors[startvec + i].pending = setval; + } + } + nvic_irq_update(s); + return; + case 0x300 ... 0x33f: /* NVIC Active */ + return; /* R/O */ + case 0x400 ... 0x5ef: /* NVIC Priority */ + startvec = 8 * (offset - 0x400) + NVIC_FIRST_IRQ; /* vector # */ + + for (i = 0; i < size && startvec + i < s->num_irq; i++) { + set_prio(s, startvec + i, (value >> (i * 8)) & 0xff); + } + nvic_irq_update(s); + return; case 0xd18 ... 0xd23: /* System Handler Priority. */ for (i = 0; i < size; i++) { - s->gic.priority1[(offset - 0xd14) + i][0] = - (value >> (i * 8)) & 0xff; + unsigned hdlidx = (offset - 0xd14) + i; + set_prio(s, hdlidx, (value >> (i * 8)) & 0xff); } - gic_update(&s->gic); + nvic_irq_update(s); return; } if (size == 4) { @@ -481,61 +799,143 @@ static const MemoryRegionOps nvic_sysreg_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static const VMStateDescription vmstate_nvic = { - .name = "armv7m_nvic", +static int nvic_post_load(void *opaque, int version_id) +{ + NVICState *s = opaque; + unsigned i; + + /* Check for out of range priority settings */ + if (s->vectors[ARMV7M_EXCP_RESET].prio != -3 || + s->vectors[ARMV7M_EXCP_NMI].prio != -2 || + s->vectors[ARMV7M_EXCP_HARD].prio != -1) { + return 1; + } + for (i = ARMV7M_EXCP_MEM; i < s->num_irq; i++) { + if (s->vectors[i].prio & ~0xff) { + return 1; + } + } + + nvic_recompute_state(s); + + return 0; +} + +static const VMStateDescription vmstate_VecInfo = { + .name = "armv7m_nvic_info", .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT32(systick.control, nvic_state), - VMSTATE_UINT32(systick.reload, nvic_state), - VMSTATE_INT64(systick.tick, nvic_state), - VMSTATE_TIMER_PTR(systick.timer, nvic_state), + VMSTATE_INT16(prio, VecInfo), + VMSTATE_UINT8(enabled, VecInfo), + VMSTATE_UINT8(pending, VecInfo), + VMSTATE_UINT8(active, VecInfo), + VMSTATE_UINT8(level, VecInfo), VMSTATE_END_OF_LIST() } }; +static const VMStateDescription vmstate_nvic = { + .name = "armv7m_nvic", + .version_id = 4, + .minimum_version_id = 4, + .post_load = &nvic_post_load, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1, + vmstate_VecInfo, VecInfo), + VMSTATE_UINT32(prigroup, NVICState), + VMSTATE_END_OF_LIST() + } +}; + +static Property props_nvic[] = { + /* Number of external IRQ lines (so excluding the 16 internal exceptions) */ + DEFINE_PROP_UINT32("num-irq", NVICState, num_irq, 64), + DEFINE_PROP_END_OF_LIST() +}; + static void armv7m_nvic_reset(DeviceState *dev) { - nvic_state *s = NVIC(dev); - NVICClass *nc = NVIC_GET_CLASS(s); - nc->parent_reset(dev); - /* Common GIC reset resets to disabled; the NVIC doesn't have - * per-CPU interfaces so mark our non-existent CPU interface - * as enabled by default, and with a priority mask which allows - * all interrupts through. + NVICState *s = NVIC(dev); + + s->vectors[ARMV7M_EXCP_NMI].enabled = 1; + s->vectors[ARMV7M_EXCP_HARD].enabled = 1; + /* MEM, BUS, and USAGE are enabled through + * the System Handler Control register + */ + s->vectors[ARMV7M_EXCP_SVC].enabled = 1; + s->vectors[ARMV7M_EXCP_DEBUG].enabled = 1; + s->vectors[ARMV7M_EXCP_PENDSV].enabled = 1; + s->vectors[ARMV7M_EXCP_SYSTICK].enabled = 1; + + s->vectors[ARMV7M_EXCP_RESET].prio = -3; + s->vectors[ARMV7M_EXCP_NMI].prio = -2; + s->vectors[ARMV7M_EXCP_HARD].prio = -1; + + /* Strictly speaking the reset handler should be enabled. + * However, we don't simulate soft resets through the NVIC, + * and the reset vector should never be pended. + * So we leave it disabled to catch logic errors. */ - s->gic.cpu_ctlr[0] = GICC_CTLR_EN_GRP0; - s->gic.priority_mask[0] = 0x100; - /* The NVIC as a whole is always enabled. */ - s->gic.ctlr = 1; - systick_reset(s); + + s->exception_prio = NVIC_NOEXC_PRIO; + s->vectpending = 0; +} + +static void nvic_systick_trigger(void *opaque, int n, int level) +{ + NVICState *s = opaque; + + if (level) { + /* SysTick just asked us to pend its exception. + * (This is different from an external interrupt line's + * behaviour.) + */ + armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK); + } } static void armv7m_nvic_realize(DeviceState *dev, Error **errp) { - nvic_state *s = NVIC(dev); - NVICClass *nc = NVIC_GET_CLASS(s); - Error *local_err = NULL; + NVICState *s = NVIC(dev); + SysBusDevice *systick_sbd; + Error *err = NULL; s->cpu = ARM_CPU(qemu_get_cpu(0)); assert(s->cpu); - /* The NVIC always has only one CPU */ - s->gic.num_cpu = 1; - /* Tell the common code we're an NVIC */ - s->gic.revision = 0xffffffff; - s->num_irq = s->gic.num_irq; - nc->parent_realize(dev, &local_err); - if (local_err) { - error_propagate(errp, local_err); + + if (s->num_irq > NVIC_MAX_IRQ) { + error_setg(errp, "num-irq %d exceeds NVIC maximum", s->num_irq); + return; + } + + qdev_init_gpio_in(dev, set_irq_level, s->num_irq); + + /* include space for internal exception vectors */ + s->num_irq += NVIC_FIRST_IRQ; + + object_property_set_bool(OBJECT(&s->systick), true, "realized", &err); + if (err != NULL) { + error_propagate(errp, err); return; } - gic_init_irqs_and_distributor(&s->gic); - /* The NVIC and system controller register area looks like this: - * 0..0xff : system control registers, including systick - * 0x100..0xcff : GIC-like registers - * 0xd00..0xfff : system control registers - * We use overlaying to put the GIC like registers - * over the top of the system control register region. + systick_sbd = SYS_BUS_DEVICE(&s->systick); + sysbus_connect_irq(systick_sbd, 0, + qdev_get_gpio_in_named(dev, "systick-trigger", 0)); + + /* The NVIC and System Control Space (SCS) starts at 0xe000e000 + * and looks like this: + * 0x004 - ICTR + * 0x010 - 0xff - systick + * 0x100..0x7ec - NVIC + * 0x7f0..0xcff - Reserved + * 0xd00..0xd3c - SCS registers + * 0xd40..0xeff - Reserved or Not implemented + * 0xf00 - STIR + * + * At the moment there is only one thing in the container region, + * but we leave it in place to allow us to pull systick out into + * its own device object later. */ memory_region_init(&s->container, OBJECT(s), "nvic", 0x1000); /* The system register region goes at the bottom of the priority @@ -544,19 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s, "nvic_sysregs", 0x1000); memory_region_add_subregion(&s->container, 0, &s->sysregmem); - /* Alias the GIC region so we can get only the section of it - * we need, and layer it on top of the system register region. - */ - memory_region_init_alias(&s->gic_iomem_alias, OBJECT(s), - "nvic-gic", &s->gic.iomem, - 0x100, 0xc00); - memory_region_add_subregion_overlap(&s->container, 0x100, - &s->gic_iomem_alias, 1); - /* Map the whole thing into system memory at the location required - * by the v7M architecture. - */ - memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container); - s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s); + memory_region_add_subregion_overlap(&s->container, 0x10, + sysbus_mmio_get_region(systick_sbd, 0), + 1); + + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container); } static void armv7m_nvic_instance_init(Object *obj) @@ -567,36 +959,35 @@ static void armv7m_nvic_instance_init(Object *obj) * any user-specified property setting, so just modify the * value in the GICState struct. */ - GICState *s = ARM_GIC_COMMON(obj); DeviceState *dev = DEVICE(obj); - nvic_state *nvic = NVIC(obj); - /* The ARM v7m may have anything from 0 to 496 external interrupt - * IRQ lines. We default to 64. Other boards may differ and should - * set the num-irq property appropriately. - */ - s->num_irq = 64; + NVICState *nvic = NVIC(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK); + qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default()); + + sysbus_init_irq(sbd, &nvic->excpout); qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1); + qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1); } static void armv7m_nvic_class_init(ObjectClass *klass, void *data) { - NVICClass *nc = NVIC_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - nc->parent_reset = dc->reset; - nc->parent_realize = dc->realize; dc->vmsd = &vmstate_nvic; + dc->props = props_nvic; dc->reset = armv7m_nvic_reset; dc->realize = armv7m_nvic_realize; } static const TypeInfo armv7m_nvic_info = { .name = TYPE_NVIC, - .parent = TYPE_ARM_GIC_COMMON, + .parent = TYPE_SYS_BUS_DEVICE, .instance_init = armv7m_nvic_instance_init, - .instance_size = sizeof(nvic_state), + .instance_size = sizeof(NVICState), .class_init = armv7m_nvic_class_init, - .class_size = sizeof(NVICClass), + .class_size = sizeof(SysBusDeviceClass), }; static void armv7m_nvic_register_types(void) diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h index 3f311740da..7fe87b13de 100644 --- a/hw/intc/gic_internal.h +++ b/hw/intc/gic_internal.h @@ -25,9 +25,7 @@ #define ALL_CPU_MASK ((unsigned)(((1 << GIC_NCPU) - 1))) -/* The NVIC has 16 internal vectors. However these are not exposed - through the normal GIC interface. */ -#define GIC_BASE_IRQ ((s->revision == REV_NVIC) ? 32 : 0) +#define GIC_BASE_IRQ 0 #define GIC_SET_ENABLED(irq, cm) s->irq_state[irq].enabled |= (cm) #define GIC_CLEAR_ENABLED(irq, cm) s->irq_state[irq].enabled &= ~(cm) @@ -75,7 +73,6 @@ /* The special cases for the revision property: */ #define REV_11MPCORE 0 -#define REV_NVIC 0xffffffff void gic_set_pending_private(GICState *s, int cpu, int irq); uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs); @@ -87,7 +84,7 @@ void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val, static inline bool gic_test_pending(GICState *s, int irq, int cm) { - if (s->revision == REV_NVIC || s->revision == REV_11MPCORE) { + if (s->revision == REV_11MPCORE) { return s->irq_state[irq].pending & cm; } else { /* Edge-triggered interrupts are marked pending on a rising edge, but diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h index aeb801d133..05303a55c8 100644 --- a/hw/intc/gicv3_internal.h +++ b/hw/intc/gicv3_internal.h @@ -138,6 +138,7 @@ #define ICC_CTLR_EL1_EOIMODE (1U << 1) #define ICC_CTLR_EL1_PMHE (1U << 6) #define ICC_CTLR_EL1_PRIBITS_SHIFT 8 +#define ICC_CTLR_EL1_PRIBITS_MASK (7U << ICC_CTLR_EL1_PRIBITS_SHIFT) #define ICC_CTLR_EL1_IDBITS_SHIFT 11 #define ICC_CTLR_EL1_SEIS (1U << 14) #define ICC_CTLR_EL1_A3V (1U << 15) @@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) } } +void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 39a538d048..729c1288f1 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -161,3 +161,18 @@ gicv3_redist_write(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size, gicv3_redist_badwrite(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size, bool secure) "GICv3 redistributor %x write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u secure %d: error" gicv3_redist_set_irq(uint32_t cpu, int irq, int level) "GICv3 redistributor %x interrupt %d level changed to %d" gicv3_redist_send_sgi(uint32_t cpu, int irq) "GICv3 redistributor %x pending SGI %d" + +# hw/intc/armv7m_nvic.c +nvic_recompute_state(int vectpending, int exception_prio) "NVIC state recomputed: vectpending %d exception_prio %d" +nvic_set_prio(int irq, uint8_t prio) "NVIC set irq %d priority %d" +nvic_irq_update(int vectpending, int pendprio, int exception_prio, int level) "NVIC vectpending %d pending prio %d exception_prio %d: setting irq line to %d" +nvic_escalate_prio(int irq, int irqprio, int runprio) "NVIC escalating irq %d to HardFault: insufficient priority %d >= %d" +nvic_escalate_disabled(int irq) "NVIC escalating irq %d to HardFault: disabled" +nvic_set_pending(int irq, int en, int prio) "NVIC set pending irq %d (enabled: %d priority %d)" +nvic_clear_pending(int irq, int en, int prio) "NVIC clear pending irq %d (enabled: %d priority %d)" +nvic_set_pending_level(int irq) "NVIC set pending: irq %d higher prio than vectpending: setting irq line to 1" +nvic_acknowledge_irq(int irq, int prio) "NVIC acknowledge IRQ: %d now active (prio %d)" +nvic_complete_irq(int irq) "NVIC complete IRQ %d" +nvic_set_irq_level(int irq, int level) "NVIC external irq %d level set to %d" +nvic_sysreg_read(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +nvic_sysreg_write(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 898e4ccfb1..c8b489390f 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -26,7 +26,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o obj-$(CONFIG_REALVIEW) += arm_sysctl.o obj-$(CONFIG_NSERIES) += cbus.o obj-$(CONFIG_ECCMEMCTL) += eccmemctl.o -obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o +obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o exynos4210_clk.o obj-$(CONFIG_IMX) += imx_ccm.o obj-$(CONFIG_IMX) += imx31_ccm.o obj-$(CONFIG_IMX) += imx25_ccm.o @@ -42,6 +42,7 @@ obj-$(CONFIG_OMAP) += omap_sdrc.o obj-$(CONFIG_OMAP) += omap_tap.o obj-$(CONFIG_RASPI) += bcm2835_mbox.o obj-$(CONFIG_RASPI) += bcm2835_property.o +obj-$(CONFIG_RASPI) += bcm2835_rng.o obj-$(CONFIG_SLAVIO) += slavio_misc.o obj-$(CONFIG_ZYNQ) += zynq_slcr.o obj-$(CONFIG_ZYNQ) += zynq-xadc.o diff --git a/hw/misc/bcm2835_rng.c b/hw/misc/bcm2835_rng.c new file mode 100644 index 0000000000..4d62143b24 --- /dev/null +++ b/hw/misc/bcm2835_rng.c @@ -0,0 +1,149 @@ +/* + * BCM2835 Random Number Generator emulation + * + * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "crypto/random.h" +#include "hw/misc/bcm2835_rng.h" + +static uint32_t get_random_bytes(void) +{ + uint32_t res; + Error *err = NULL; + + if (qcrypto_random_bytes((uint8_t *)&res, sizeof(res), &err) < 0) { + /* On failure we don't want to return the guest a non-random + * value in case they're really using it for cryptographic + * purposes, so the best we can do is die here. + * This shouldn't happen unless something's broken. + * In theory we could implement this device's full FIFO + * and interrupt semantics and then just stop filling the + * FIFO. That's a lot of work, though, so we assume any + * errors are systematic problems and trust that if we didn't + * fail as the guest inited then we won't fail later on + * mid-run. + */ + error_report_err(err); + exit(1); + } + return res; +} + +static uint64_t bcm2835_rng_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835RngState *s = (BCM2835RngState *)opaque; + uint32_t res = 0; + + assert(size == 4); + + switch (offset) { + case 0x0: /* rng_ctrl */ + res = s->rng_ctrl; + break; + case 0x4: /* rng_status */ + res = s->rng_status | (1 << 24); + break; + case 0x8: /* rng_data */ + res = get_random_bytes(); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "bcm2835_rng_read: Bad offset %x\n", + (int)offset); + res = 0; + break; + } + + return res; +} + +static void bcm2835_rng_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835RngState *s = (BCM2835RngState *)opaque; + + assert(size == 4); + + switch (offset) { + case 0x0: /* rng_ctrl */ + s->rng_ctrl = value; + break; + case 0x4: /* rng_status */ + /* we shouldn't let the guest write to bits [31..20] */ + s->rng_status &= ~0xFFFFF; /* clear 20 lower bits */ + s->rng_status |= value & 0xFFFFF; /* set them to new value */ + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "bcm2835_rng_write: Bad offset %x\n", + (int)offset); + break; + } +} + +static const MemoryRegionOps bcm2835_rng_ops = { + .read = bcm2835_rng_read, + .write = bcm2835_rng_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_bcm2835_rng = { + .name = TYPE_BCM2835_RNG, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(rng_ctrl, BCM2835RngState), + VMSTATE_UINT32(rng_status, BCM2835RngState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_rng_init(Object *obj) +{ + BCM2835RngState *s = BCM2835_RNG(obj); + + memory_region_init_io(&s->iomem, obj, &bcm2835_rng_ops, s, + TYPE_BCM2835_RNG, 0x10); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); +} + +static void bcm2835_rng_reset(DeviceState *dev) +{ + BCM2835RngState *s = BCM2835_RNG(dev); + + s->rng_ctrl = 0; + s->rng_status = 0; +} + +static void bcm2835_rng_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = bcm2835_rng_reset; + dc->vmsd = &vmstate_bcm2835_rng; +} + +static TypeInfo bcm2835_rng_info = { + .name = TYPE_BCM2835_RNG, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835RngState), + .class_init = bcm2835_rng_class_init, + .instance_init = bcm2835_rng_init, +}; + +static void bcm2835_rng_register_types(void) +{ + type_register_static(&bcm2835_rng_info); +} + +type_init(bcm2835_rng_register_types) diff --git a/hw/misc/exynos4210_clk.c b/hw/misc/exynos4210_clk.c new file mode 100644 index 0000000000..81862c0ada --- /dev/null +++ b/hw/misc/exynos4210_clk.c @@ -0,0 +1,164 @@ +/* + * Exynos4210 Clock Controller Emulation + * + * Copyright (c) 2017 Krzysztof Kozlowski <krzk@kernel.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "qemu/log.h" + +#define TYPE_EXYNOS4210_CLK "exynos4210.clk" +#define EXYNOS4210_CLK(obj) \ + OBJECT_CHECK(Exynos4210ClkState, (obj), TYPE_EXYNOS4210_CLK) + +#define CLK_PLL_LOCKED BIT(29) + +#define EXYNOS4210_CLK_REGS_MEM_SIZE 0x15104 + +typedef struct Exynos4210Reg { + const char *name; /* for debug only */ + uint32_t offset; + uint32_t reset_value; +} Exynos4210Reg; + +/* Clock controller register base: 0x10030000 */ +static const Exynos4210Reg exynos4210_clk_regs[] = { + {"EPLL_LOCK", 0xc010, 0x00000fff}, + {"VPLL_LOCK", 0xc020, 0x00000fff}, + {"EPLL_CON0", 0xc110, 0x00300301 | CLK_PLL_LOCKED}, + {"EPLL_CON1", 0xc114, 0x00000000}, + {"VPLL_CON0", 0xc120, 0x00240201 | CLK_PLL_LOCKED}, + {"VPLL_CON1", 0xc124, 0x66010464}, + {"APLL_LOCK", 0x14000, 0x00000fff}, + {"MPLL_LOCK", 0x14004, 0x00000fff}, + {"APLL_CON0", 0x14100, 0x00c80601 | CLK_PLL_LOCKED}, + {"APLL_CON1", 0x14104, 0x0000001c}, + {"MPLL_CON0", 0x14108, 0x00c80601 | CLK_PLL_LOCKED}, + {"MPLL_CON1", 0x1410c, 0x0000001c}, +}; + +#define EXYNOS4210_REGS_NUM ARRAY_SIZE(exynos4210_clk_regs) + +typedef struct Exynos4210ClkState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + uint32_t reg[EXYNOS4210_REGS_NUM]; +} Exynos4210ClkState; + +static uint64_t exynos4210_clk_read(void *opaque, hwaddr offset, + unsigned size) +{ + const Exynos4210ClkState *s = (Exynos4210ClkState *)opaque; + const Exynos4210Reg *regs = exynos4210_clk_regs; + unsigned int i; + + for (i = 0; i < EXYNOS4210_REGS_NUM; i++) { + if (regs->offset == offset) { + return s->reg[i]; + } + regs++; + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read offset 0x%04x\n", + __func__, (uint32_t)offset); + return 0; +} + +static void exynos4210_clk_write(void *opaque, hwaddr offset, + uint64_t val, unsigned size) +{ + Exynos4210ClkState *s = (Exynos4210ClkState *)opaque; + const Exynos4210Reg *regs = exynos4210_clk_regs; + unsigned int i; + + for (i = 0; i < EXYNOS4210_REGS_NUM; i++) { + if (regs->offset == offset) { + s->reg[i] = val; + return; + } + regs++; + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write offset 0x%04x\n", + __func__, (uint32_t)offset); +} + +static const MemoryRegionOps exynos4210_clk_ops = { + .read = exynos4210_clk_read, + .write = exynos4210_clk_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false + } +}; + +static void exynos4210_clk_reset(DeviceState *dev) +{ + Exynos4210ClkState *s = EXYNOS4210_CLK(dev); + unsigned int i; + + /* Set default values for registers */ + for (i = 0; i < EXYNOS4210_REGS_NUM; i++) { + s->reg[i] = exynos4210_clk_regs[i].reset_value; + } +} + +static void exynos4210_clk_init(Object *obj) +{ + Exynos4210ClkState *s = EXYNOS4210_CLK(obj); + SysBusDevice *dev = SYS_BUS_DEVICE(obj); + + /* memory mapping */ + memory_region_init_io(&s->iomem, obj, &exynos4210_clk_ops, s, + TYPE_EXYNOS4210_CLK, EXYNOS4210_CLK_REGS_MEM_SIZE); + sysbus_init_mmio(dev, &s->iomem); +} + +static const VMStateDescription exynos4210_clk_vmstate = { + .name = TYPE_EXYNOS4210_CLK, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(reg, Exynos4210ClkState, EXYNOS4210_REGS_NUM), + VMSTATE_END_OF_LIST() + } +}; + +static void exynos4210_clk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = exynos4210_clk_reset; + dc->vmsd = &exynos4210_clk_vmstate; +} + +static const TypeInfo exynos4210_clk_info = { + .name = TYPE_EXYNOS4210_CLK, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Exynos4210ClkState), + .instance_init = exynos4210_clk_init, + .class_init = exynos4210_clk_class_init, +}; + +static void exynos4210_clk_register(void) +{ + qemu_log_mask(LOG_GUEST_ERROR, "Clock init\n"); + type_register_static(&exynos4210_clk_info); +} + +type_init(exynos4210_clk_register) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index e99d4544a2..d4de8ad9f1 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -508,7 +508,7 @@ static void gem_update_int_status(CadenceGEMState *s) if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) { /* No priority queues, just trigger the interrupt */ - DB_PRINT("asserting int.\n", i); + DB_PRINT("asserting int.\n"); qemu_set_irq(s->irq[0], 1); return; } diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 65ba188555..aa5d2c1f5f 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp) { sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev); + int ret; if (nvram->blk) { nvram->size = blk_getlength(nvram->blk); + + ret = blk_set_perm(nvram->blk, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } } else { nvram->size = DEFAULT_NVRAM_SIZE; } diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 2e2664f22e..7978c7d52a 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -20,6 +20,7 @@ #include "hw/s390x/virtio-ccw.h" #include "hw/s390x/css.h" #include "ipl.h" +#include "qemu/error-report.h" #define KERN_IMAGE_START 0x010000UL #define KERN_PARM_AREA 0x010480UL @@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = { DEFINE_PROP_STRING("initrd", S390IPLState, initrd), DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline), DEFINE_PROP_STRING("firmware", S390IPLState, firmware), + DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw), DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false), DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration, true), @@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) TYPE_VIRTIO_CCW_DEVICE); SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), TYPE_SCSI_DEVICE); + VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st), + TYPE_VIRTIO_NET); + + if (vn) { + ipl->netboot = true; + } if (virtio_ccw_dev) { CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev); @@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) return false; } +static int load_netboot_image(Error **errp) +{ + S390IPLState *ipl = get_ipl_device(); + char *netboot_filename; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *mr = NULL; + void *ram_ptr = NULL; + int img_size = -1; + + mr = memory_region_find(sysmem, 0, 1).mr; + if (!mr) { + error_setg(errp, "Failed to find memory region at address 0"); + return -1; + } + + ram_ptr = memory_region_get_ram_ptr(mr); + if (!ram_ptr) { + error_setg(errp, "No RAM found"); + goto unref_mr; + } + + netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw); + if (netboot_filename == NULL) { + error_setg(errp, "Could not find network bootloader"); + goto unref_mr; + } + + img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr, + NULL, NULL, 1, EM_S390, 0, 0, NULL, false); + + if (img_size < 0) { + img_size = load_image_size(netboot_filename, ram_ptr, ram_size); + ipl->start_addr = KERN_IMAGE_START; + } + + if (img_size < 0) { + error_setg(errp, "Failed to load network bootloader"); + } + + g_free(netboot_filename); + +unref_mr: + memory_region_unref(mr); + return img_size; +} + +static bool is_virtio_net_device(IplParameterBlock *iplb) +{ + uint8_t cssid; + uint8_t ssid; + uint16_t devno; + uint16_t schid; + SubchDev *sch = NULL; + + if (iplb->pbt != S390_IPL_TYPE_CCW) { + return false; + } + + devno = be16_to_cpu(iplb->ccw.devno); + ssid = iplb->ccw.ssid & 3; + + for (schid = 0; schid < MAX_SCHID; schid++) { + for (cssid = 0; cssid < MAX_CSSID; cssid++) { + sch = css_find_subch(1, cssid, ssid, schid); + + if (sch && sch->devno == devno) { + return sch->id.cu_model == VIRTIO_ID_NET; + } + } + } + return false; +} + void s390_ipl_update_diag308(IplParameterBlock *iplb) { S390IPLState *ipl = get_ipl_device(); ipl->iplb = *iplb; ipl->iplb_valid = true; + ipl->netboot = is_virtio_net_device(iplb); } IplParameterBlock *s390_ipl_get_iplb(void) @@ -287,6 +369,7 @@ void s390_reipl_request(void) void s390_ipl_prepare_cpu(S390CPU *cpu) { S390IPLState *ipl = get_ipl_device(); + Error *err = NULL; cpu->env.psw.addr = ipl->start_addr; cpu->env.psw.mask = IPL_PSW_MASK; @@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu) ipl->iplb_valid = s390_gen_initial_iplb(ipl); } } + if (ipl->netboot) { + if (load_netboot_image(&err) < 0) { + error_report_err(err); + vm_stop(RUN_STATE_INTERNAL_ERROR); + } + ipl->iplb.ccw.netboot_start_addr = ipl->start_addr; + } } static void s390_ipl_reset(DeviceState *dev) diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h index c89109585a..46930e4c64 100644 --- a/hw/s390x/ipl.h +++ b/hw/s390x/ipl.h @@ -16,7 +16,8 @@ #include "cpu.h" struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; @@ -100,12 +101,14 @@ struct S390IPLState { IplParameterBlock iplb; bool iplb_valid; bool reipl_requested; + bool netboot; /*< public >*/ char *kernel; char *initrd; char *cmdline; char *firmware; + char *netboot_fw; uint8_t cssid; uint8_t ssid; uint16_t devno; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 4f0d62b2d8..40914fde6f 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine) /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, - machine->initrd_filename, "s390-ccw.img", true); + machine->initrd_filename, "s390-ccw.img", + "s390-netboot.img", true); s390_flic_init(); dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index 9cfb09057e..afa4148e6b 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -65,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios) { Object *new = object_new(TYPE_S390_IPL); @@ -78,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename, } qdev_prop_set_string(dev, "cmdline", kernel_cmdline); qdev_prop_set_string(dev, "firmware", firmware); + qdev_prop_set_string(dev, "netboot_fw", netboot_fw); qdev_prop_set_bit(dev, "enforce_bios", enforce_bios); object_property_add_child(qdev_get_machine(), TYPE_S390_IPL, new, NULL); diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h index f588b80a6e..f2377a3e0e 100644 --- a/hw/s390x/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, const char *firmware, + const char *netboot_fw, bool enforce_bios); void s390_create_virtio_net(BusState *bus, const char *name); void s390_nmi(NMIState *n, int cpu_index, Error **errp); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bbfb5dc289..a53f058621 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque) } } -static void scsi_cd_change_media_cb(void *opaque, bool load) +static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) { SCSIDiskState *s = opaque; @@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } } - blkconf_apply_backend_options(&dev->conf); + blkconf_apply_backend_options(&dev->conf, + blk_is_read_only(s->qdev.conf.blk), + dev->type == TYPE_DISK, &err); + if (err) { + error_propagate(errp, err); + return; + } if (s->qdev.conf.discard_granularity == -1) { s->qdev.conf.discard_granularity = @@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); if (!dev->conf.blk) { - dev->conf.blk = blk_new(); + dev->conf.blk = blk_new(0, BLK_PERM_ALL); } s->qdev.blocksize = 2048; diff --git a/hw/sd/Makefile.objs b/hw/sd/Makefile.objs index 31c83308f2..c2b7664264 100644 --- a/hw/sd/Makefile.objs +++ b/hw/sd/Makefile.objs @@ -6,3 +6,4 @@ common-obj-$(CONFIG_SDHCI) += sdhci.o obj-$(CONFIG_MILKYMIST) += milkymist-memcard.o obj-$(CONFIG_OMAP) += omap_mmc.o obj-$(CONFIG_PXA2XX) += pxa2xx_mmci.o +obj-$(CONFIG_RASPI) += bcm2835_sdhost.o diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c new file mode 100644 index 0000000000..f7f4e656df --- /dev/null +++ b/hw/sd/bcm2835_sdhost.c @@ -0,0 +1,429 @@ +/* + * Raspberry Pi (BCM2835) SD Host Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "sysemu/blockdev.h" +#include "hw/sd/bcm2835_sdhost.h" + +#define TYPE_BCM2835_SDHOST_BUS "bcm2835-sdhost-bus" +#define BCM2835_SDHOST_BUS(obj) \ + OBJECT_CHECK(SDBus, (obj), TYPE_BCM2835_SDHOST_BUS) + +#define SDCMD 0x00 /* Command to SD card - 16 R/W */ +#define SDARG 0x04 /* Argument to SD card - 32 R/W */ +#define SDTOUT 0x08 /* Start value for timeout counter - 32 R/W */ +#define SDCDIV 0x0c /* Start value for clock divider - 11 R/W */ +#define SDRSP0 0x10 /* SD card rsp (31:0) - 32 R */ +#define SDRSP1 0x14 /* SD card rsp (63:32) - 32 R */ +#define SDRSP2 0x18 /* SD card rsp (95:64) - 32 R */ +#define SDRSP3 0x1c /* SD card rsp (127:96) - 32 R */ +#define SDHSTS 0x20 /* SD host status - 11 R */ +#define SDVDD 0x30 /* SD card power control - 1 R/W */ +#define SDEDM 0x34 /* Emergency Debug Mode - 13 R/W */ +#define SDHCFG 0x38 /* Host configuration - 2 R/W */ +#define SDHBCT 0x3c /* Host byte count (debug) - 32 R/W */ +#define SDDATA 0x40 /* Data to/from SD card - 32 R/W */ +#define SDHBLC 0x50 /* Host block count (SDIO/SDHC) - 9 R/W */ + +#define SDCMD_NEW_FLAG 0x8000 +#define SDCMD_FAIL_FLAG 0x4000 +#define SDCMD_BUSYWAIT 0x800 +#define SDCMD_NO_RESPONSE 0x400 +#define SDCMD_LONG_RESPONSE 0x200 +#define SDCMD_WRITE_CMD 0x80 +#define SDCMD_READ_CMD 0x40 +#define SDCMD_CMD_MASK 0x3f + +#define SDCDIV_MAX_CDIV 0x7ff + +#define SDHSTS_BUSY_IRPT 0x400 +#define SDHSTS_BLOCK_IRPT 0x200 +#define SDHSTS_SDIO_IRPT 0x100 +#define SDHSTS_REW_TIME_OUT 0x80 +#define SDHSTS_CMD_TIME_OUT 0x40 +#define SDHSTS_CRC16_ERROR 0x20 +#define SDHSTS_CRC7_ERROR 0x10 +#define SDHSTS_FIFO_ERROR 0x08 +/* Reserved */ +/* Reserved */ +#define SDHSTS_DATA_FLAG 0x01 + +#define SDHCFG_BUSY_IRPT_EN (1 << 10) +#define SDHCFG_BLOCK_IRPT_EN (1 << 8) +#define SDHCFG_SDIO_IRPT_EN (1 << 5) +#define SDHCFG_DATA_IRPT_EN (1 << 4) +#define SDHCFG_SLOW_CARD (1 << 3) +#define SDHCFG_WIDE_EXT_BUS (1 << 2) +#define SDHCFG_WIDE_INT_BUS (1 << 1) +#define SDHCFG_REL_CMD_LINE (1 << 0) + +#define SDEDM_FORCE_DATA_MODE (1 << 19) +#define SDEDM_CLOCK_PULSE (1 << 20) +#define SDEDM_BYPASS (1 << 21) + +#define SDEDM_WRITE_THRESHOLD_SHIFT 9 +#define SDEDM_READ_THRESHOLD_SHIFT 14 +#define SDEDM_THRESHOLD_MASK 0x1f + +#define SDEDM_FSM_MASK 0xf +#define SDEDM_FSM_IDENTMODE 0x0 +#define SDEDM_FSM_DATAMODE 0x1 +#define SDEDM_FSM_READDATA 0x2 +#define SDEDM_FSM_WRITEDATA 0x3 +#define SDEDM_FSM_READWAIT 0x4 +#define SDEDM_FSM_READCRC 0x5 +#define SDEDM_FSM_WRITECRC 0x6 +#define SDEDM_FSM_WRITEWAIT1 0x7 +#define SDEDM_FSM_POWERDOWN 0x8 +#define SDEDM_FSM_POWERUP 0x9 +#define SDEDM_FSM_WRITESTART1 0xa +#define SDEDM_FSM_WRITESTART2 0xb +#define SDEDM_FSM_GENPULSES 0xc +#define SDEDM_FSM_WRITEWAIT2 0xd +#define SDEDM_FSM_STARTPOWDOWN 0xf + +#define SDDATA_FIFO_WORDS 16 + +static void bcm2835_sdhost_update_irq(BCM2835SDHostState *s) +{ + uint32_t irq = s->status & + (SDHSTS_BUSY_IRPT | SDHSTS_BLOCK_IRPT | SDHSTS_SDIO_IRPT); + qemu_set_irq(s->irq, !!irq); +} + +static void bcm2835_sdhost_send_command(BCM2835SDHostState *s) +{ + SDRequest request; + uint8_t rsp[16]; + int rlen; + + request.cmd = s->cmd & SDCMD_CMD_MASK; + request.arg = s->cmdarg; + + rlen = sdbus_do_command(&s->sdbus, &request, rsp); + if (rlen < 0) { + goto error; + } + if (!(s->cmd & SDCMD_NO_RESPONSE)) { +#define RWORD(n) (((uint32_t)rsp[n] << 24) | (rsp[n + 1] << 16) \ + | (rsp[n + 2] << 8) | rsp[n + 3]) + if (rlen == 0 || (rlen == 4 && (s->cmd & SDCMD_LONG_RESPONSE))) { + goto error; + } + if (rlen != 4 && rlen != 16) { + goto error; + } + if (rlen == 4) { + s->rsp[0] = RWORD(0); + s->rsp[1] = s->rsp[2] = s->rsp[3] = 0; + } else { + s->rsp[0] = RWORD(12); + s->rsp[1] = RWORD(8); + s->rsp[2] = RWORD(4); + s->rsp[3] = RWORD(0); + } +#undef RWORD + } + return; + +error: + s->cmd |= SDCMD_FAIL_FLAG; + s->status |= SDHSTS_CMD_TIME_OUT; +} + +static void bcm2835_sdhost_fifo_push(BCM2835SDHostState *s, uint32_t value) +{ + int n; + + if (s->fifo_len == BCM2835_SDHOST_FIFO_LEN) { + /* FIFO overflow */ + return; + } + n = (s->fifo_pos + s->fifo_len) & (BCM2835_SDHOST_FIFO_LEN - 1); + s->fifo_len++; + s->fifo[n] = value; +} + +static uint32_t bcm2835_sdhost_fifo_pop(BCM2835SDHostState *s) +{ + uint32_t value; + + if (s->fifo_len == 0) { + /* FIFO underflow */ + return 0; + } + value = s->fifo[s->fifo_pos]; + s->fifo_len--; + s->fifo_pos = (s->fifo_pos + 1) & (BCM2835_SDHOST_FIFO_LEN - 1); + return value; +} + +static void bcm2835_sdhost_fifo_run(BCM2835SDHostState *s) +{ + uint32_t value = 0; + int n; + int is_read; + + is_read = (s->cmd & SDCMD_READ_CMD) != 0; + if (s->datacnt != 0 && (!is_read || sdbus_data_ready(&s->sdbus))) { + if (is_read) { + n = 0; + while (s->datacnt && s->fifo_len < BCM2835_SDHOST_FIFO_LEN) { + value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8); + s->datacnt--; + n++; + if (n == 4) { + bcm2835_sdhost_fifo_push(s, value); + n = 0; + value = 0; + } + } + if (n != 0) { + bcm2835_sdhost_fifo_push(s, value); + } + } else { /* write */ + n = 0; + while (s->datacnt > 0 && (s->fifo_len > 0 || n > 0)) { + if (n == 0) { + value = bcm2835_sdhost_fifo_pop(s); + n = 4; + } + n--; + s->datacnt--; + sdbus_write_data(&s->sdbus, value & 0xff); + value >>= 8; + } + } + } + if (s->datacnt == 0) { + s->status |= SDHSTS_DATA_FLAG; + + s->edm &= ~0xf; + s->edm |= SDEDM_FSM_DATAMODE; + + if (s->config & SDHCFG_DATA_IRPT_EN) { + s->status |= SDHSTS_SDIO_IRPT; + } + + if ((s->cmd & SDCMD_BUSYWAIT) && (s->config & SDHCFG_BUSY_IRPT_EN)) { + s->status |= SDHSTS_BUSY_IRPT; + } + + if ((s->cmd & SDCMD_WRITE_CMD) && (s->config & SDHCFG_BLOCK_IRPT_EN)) { + s->status |= SDHSTS_BLOCK_IRPT; + } + + bcm2835_sdhost_update_irq(s); + } + + s->edm &= ~(0x1f << 4); + s->edm |= ((s->fifo_len & 0x1f) << 4); +} + +static uint64_t bcm2835_sdhost_read(void *opaque, hwaddr offset, + unsigned size) +{ + BCM2835SDHostState *s = (BCM2835SDHostState *)opaque; + uint32_t res = 0; + + switch (offset) { + case SDCMD: + res = s->cmd; + break; + case SDHSTS: + res = s->status; + break; + case SDRSP0: + res = s->rsp[0]; + break; + case SDRSP1: + res = s->rsp[1]; + break; + case SDRSP2: + res = s->rsp[2]; + break; + case SDRSP3: + res = s->rsp[3]; + break; + case SDEDM: + res = s->edm; + break; + case SDVDD: + res = s->vdd; + break; + case SDDATA: + res = bcm2835_sdhost_fifo_pop(s); + bcm2835_sdhost_fifo_run(s); + break; + case SDHBCT: + res = s->hbct; + break; + case SDHBLC: + res = s->hblc; + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); + res = 0; + break; + } + + return res; +} + +static void bcm2835_sdhost_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + BCM2835SDHostState *s = (BCM2835SDHostState *)opaque; + + switch (offset) { + case SDCMD: + s->cmd = value; + if (value & SDCMD_NEW_FLAG) { + bcm2835_sdhost_send_command(s); + bcm2835_sdhost_fifo_run(s); + s->cmd &= ~SDCMD_NEW_FLAG; + } + break; + case SDTOUT: + break; + case SDCDIV: + break; + case SDHSTS: + s->status &= ~value; + bcm2835_sdhost_update_irq(s); + break; + case SDARG: + s->cmdarg = value; + break; + case SDEDM: + if ((value & 0xf) == 0xf) { + /* power down */ + value &= ~0xf; + } + s->edm = value; + break; + case SDHCFG: + s->config = value; + bcm2835_sdhost_fifo_run(s); + break; + case SDVDD: + s->vdd = value; + break; + case SDDATA: + bcm2835_sdhost_fifo_push(s, value); + bcm2835_sdhost_fifo_run(s); + break; + case SDHBCT: + s->hbct = value; + break; + case SDHBLC: + s->hblc = value; + s->datacnt = s->hblc * s->hbct; + bcm2835_sdhost_fifo_run(s); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n", + __func__, offset); + break; + } +} + +static const MemoryRegionOps bcm2835_sdhost_ops = { + .read = bcm2835_sdhost_read, + .write = bcm2835_sdhost_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_bcm2835_sdhost = { + .name = TYPE_BCM2835_SDHOST, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(cmd, BCM2835SDHostState), + VMSTATE_UINT32(cmdarg, BCM2835SDHostState), + VMSTATE_UINT32(status, BCM2835SDHostState), + VMSTATE_UINT32_ARRAY(rsp, BCM2835SDHostState, 4), + VMSTATE_UINT32(config, BCM2835SDHostState), + VMSTATE_UINT32(edm, BCM2835SDHostState), + VMSTATE_UINT32(vdd, BCM2835SDHostState), + VMSTATE_UINT32(hbct, BCM2835SDHostState), + VMSTATE_UINT32(hblc, BCM2835SDHostState), + VMSTATE_INT32(fifo_pos, BCM2835SDHostState), + VMSTATE_INT32(fifo_len, BCM2835SDHostState), + VMSTATE_UINT32_ARRAY(fifo, BCM2835SDHostState, BCM2835_SDHOST_FIFO_LEN), + VMSTATE_UINT32(datacnt, BCM2835SDHostState), + VMSTATE_END_OF_LIST() + } +}; + +static void bcm2835_sdhost_init(Object *obj) +{ + BCM2835SDHostState *s = BCM2835_SDHOST(obj); + + qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), + TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus"); + + memory_region_init_io(&s->iomem, obj, &bcm2835_sdhost_ops, s, + TYPE_BCM2835_SDHOST, 0x1000); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); + sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq); +} + +static void bcm2835_sdhost_reset(DeviceState *dev) +{ + BCM2835SDHostState *s = BCM2835_SDHOST(dev); + + s->cmd = 0; + s->cmdarg = 0; + s->edm = 0x0000c60f; + s->config = 0; + s->hbct = 0; + s->hblc = 0; + s->datacnt = 0; + s->fifo_pos = 0; + s->fifo_len = 0; +} + +static void bcm2835_sdhost_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = bcm2835_sdhost_reset; + dc->vmsd = &vmstate_bcm2835_sdhost; +} + +static TypeInfo bcm2835_sdhost_info = { + .name = TYPE_BCM2835_SDHOST, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(BCM2835SDHostState), + .class_init = bcm2835_sdhost_class_init, + .instance_init = bcm2835_sdhost_init, +}; + +static const TypeInfo bcm2835_sdhost_bus_info = { + .name = TYPE_BCM2835_SDHOST_BUS, + .parent = TYPE_SD_BUS, + .instance_size = sizeof(SDBus), +}; + +static void bcm2835_sdhost_register_types(void) +{ + type_register_static(&bcm2835_sdhost_info); + type_register_static(&bcm2835_sdhost_bus_info); +} + +type_init(bcm2835_sdhost_register_types) diff --git a/hw/sd/core.c b/hw/sd/core.c index 14c2bdf27b..295dc44ab7 100644 --- a/hw/sd/core.c +++ b/hw/sd/core.c @@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly) } } +void sdbus_reparent_card(SDBus *from, SDBus *to) +{ + SDState *card = get_card(from); + SDCardClass *sc; + bool readonly; + + /* We directly reparent the card object rather than implementing this + * as a hotpluggable connection because we don't want to expose SD cards + * to users as being hotpluggable, and we can get away with it in this + * limited use case. This could perhaps be implemented more cleanly in + * future by adding support to the hotplug infrastructure for "device + * can be hotplugged only via code, not by user". + */ + + if (!card) { + return; + } + + sc = SD_CARD_GET_CLASS(card); + readonly = sc->get_readonly(card); + + sdbus_set_inserted(from, false); + qdev_set_parent_bus(DEVICE(card), &to->qbus); + sdbus_set_inserted(to, true); + sdbus_set_readonly(to, readonly); +} + static const TypeInfo sd_bus_info = { .name = TYPE_SD_BUS, .parent = TYPE_BUS, diff --git a/hw/sd/sd.c b/hw/sd/sd.c index 8e88e8311a..ba47bff4db 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd) return sd->wp_switch; } -static void sd_cardchange(void *opaque, bool load) +static void sd_cardchange(void *opaque, bool load, Error **errp) { SDState *sd = opaque; DeviceState *dev = DEVICE(sd); @@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj) static void sd_realize(DeviceState *dev, Error **errp) { SDState *sd = SD_CARD(dev); + int ret; if (sd->blk && blk_is_read_only(sd->blk)) { error_setg(errp, "Cannot use read-only drive as SD card"); @@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp) } if (sd->blk) { + ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, + BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } blk_set_dev_ops(sd->blk, &sd_block_ops, sd); } } diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index da32b5f709..6d6a791ee9 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -119,6 +119,7 @@ (SDHC_CAPAB_BASECLKFREQ << 8) | (SDHC_CAPAB_TOUNIT << 7) | \ (SDHC_CAPAB_TOCLKFREQ)) +#define MASK_TRNMOD 0x0037 #define MASKED_WRITE(reg, mask, val) (reg = (reg & (mask)) | (val)) static uint8_t sdhci_slotint(SDHCIState *s) @@ -486,6 +487,11 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) uint32_t boundary_chk = 1 << (((s->blksize & 0xf000) >> 12) + 12); uint32_t boundary_count = boundary_chk - (s->sdmasysad % boundary_chk); + if (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || !s->blkcnt) { + qemu_log_mask(LOG_UNIMP, "infinite transfer is not supported\n"); + return; + } + /* XXX: Some sd/mmc drivers (for example, u-boot-slp) do not account for * possible stop at page boundary if initial address is not page aligned, * allow them to work properly */ @@ -564,7 +570,6 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) } /* single block SDMA transfer */ - static void sdhci_sdma_transfer_single_block(SDHCIState *s) { int n; @@ -583,10 +588,7 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s) sdbus_write_data(&s->sdbus, s->fifo_buffer[n]); } } - - if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) { - s->blkcnt--; - } + s->blkcnt--; sdhci_end_transfer(s); } @@ -797,11 +799,6 @@ static void sdhci_data_transfer(void *opaque) if (s->trnmod & SDHC_TRNS_DMA) { switch (SDHC_DMA_TYPE(s->hostctl)) { case SDHC_CTRL_SDMA: - if ((s->trnmod & SDHC_TRNS_MULTI) && - (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || s->blkcnt == 0)) { - break; - } - if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) { sdhci_sdma_transfer_single_block(s); } else { @@ -1022,7 +1019,11 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) /* Writing to last byte of sdmasysad might trigger transfer */ if (!(mask & 0xFF000000) && TRANSFERRING_DATA(s->prnsts) && s->blkcnt && s->blksize && SDHC_DMA_TYPE(s->hostctl) == SDHC_CTRL_SDMA) { - sdhci_sdma_transfer_multi_blocks(s); + if (s->trnmod & SDHC_TRNS_MULTI) { + sdhci_sdma_transfer_multi_blocks(s); + } else { + sdhci_sdma_transfer_single_block(s); + } } break; case SDHC_BLKSIZE: @@ -1050,7 +1051,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) if (!(s->capareg & SDHC_CAN_DO_DMA)) { value &= ~SDHC_TRNS_DMA; } - MASKED_WRITE(s->trnmod, mask, value); + MASKED_WRITE(s->trnmod, mask, value & MASK_TRNMOD); MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16); /* Writing to the upper byte of CMDREG triggers SD command generation */ diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs index fc9966880f..dd6f27e2a3 100644 --- a/hw/timer/Makefile.objs +++ b/hw/timer/Makefile.objs @@ -1,5 +1,6 @@ common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o +common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o common-obj-$(CONFIG_CADENCE) += cadence_ttc.o common-obj-$(CONFIG_DS1338) += ds1338.o diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c new file mode 100644 index 0000000000..df8d2804b3 --- /dev/null +++ b/hw/timer/armv7m_systick.c @@ -0,0 +1,240 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#include "qemu/osdep.h" +#include "hw/timer/armv7m_systick.h" +#include "qemu-common.h" +#include "hw/sysbus.h" +#include "qemu/timer.h" +#include "qemu/log.h" +#include "trace.h" + +/* qemu timers run at 1GHz. We want something closer to 1MHz. */ +#define SYSTICK_SCALE 1000ULL + +#define SYSTICK_ENABLE (1 << 0) +#define SYSTICK_TICKINT (1 << 1) +#define SYSTICK_CLKSOURCE (1 << 2) +#define SYSTICK_COUNTFLAG (1 << 16) + +int system_clock_scale; + +/* Conversion factor from qemu timer to SysTick frequencies. */ +static inline int64_t systick_scale(SysTickState *s) +{ + if (s->control & SYSTICK_CLKSOURCE) { + return system_clock_scale; + } else { + return 1000; + } +} + +static void systick_reload(SysTickState *s, int reset) +{ + /* The Cortex-M3 Devices Generic User Guide says that "When the + * ENABLE bit is set to 1, the counter loads the RELOAD value from the + * SYST RVR register and then counts down". So, we need to check the + * ENABLE bit before reloading the value. + */ + trace_systick_reload(); + + if ((s->control & SYSTICK_ENABLE) == 0) { + return; + } + + if (reset) { + s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + s->tick += (s->reload + 1) * systick_scale(s); + timer_mod(s->timer, s->tick); +} + +static void systick_timer_tick(void *opaque) +{ + SysTickState *s = (SysTickState *)opaque; + + trace_systick_timer_tick(); + + s->control |= SYSTICK_COUNTFLAG; + if (s->control & SYSTICK_TICKINT) { + /* Tell the NVIC to pend the SysTick exception */ + qemu_irq_pulse(s->irq); + } + if (s->reload == 0) { + s->control &= ~SYSTICK_ENABLE; + } else { + systick_reload(s, 0); + } +} + +static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size) +{ + SysTickState *s = opaque; + uint32_t val; + + switch (addr) { + case 0x0: /* SysTick Control and Status. */ + val = s->control; + s->control &= ~SYSTICK_COUNTFLAG; + break; + case 0x4: /* SysTick Reload Value. */ + val = s->reload; + break; + case 0x8: /* SysTick Current Value. */ + { + int64_t t; + + if ((s->control & SYSTICK_ENABLE) == 0) { + val = 0; + break; + } + t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (t >= s->tick) { + val = 0; + break; + } + val = ((s->tick - (t + 1)) / systick_scale(s)) + 1; + /* The interrupt in triggered when the timer reaches zero. + However the counter is not reloaded until the next clock + tick. This is a hack to return zero during the first tick. */ + if (val > s->reload) { + val = 0; + } + break; + } + case 0xc: /* SysTick Calibration Value. */ + val = 10000; + break; + default: + val = 0; + qemu_log_mask(LOG_GUEST_ERROR, + "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr); + break; + } + + trace_systick_read(addr, val, size); + return val; +} + +static void systick_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + SysTickState *s = opaque; + + trace_systick_write(addr, value, size); + + switch (addr) { + case 0x0: /* SysTick Control and Status. */ + { + uint32_t oldval = s->control; + + s->control &= 0xfffffff8; + s->control |= value & 7; + if ((oldval ^ value) & SYSTICK_ENABLE) { + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (value & SYSTICK_ENABLE) { + if (s->tick) { + s->tick += now; + timer_mod(s->timer, s->tick); + } else { + systick_reload(s, 1); + } + } else { + timer_del(s->timer); + s->tick -= now; + if (s->tick < 0) { + s->tick = 0; + } + } + } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) { + /* This is a hack. Force the timer to be reloaded + when the reference clock is changed. */ + systick_reload(s, 1); + } + break; + } + case 0x4: /* SysTick Reload Value. */ + s->reload = value; + break; + case 0x8: /* SysTick Current Value. Writes reload the timer. */ + systick_reload(s, 1); + s->control &= ~SYSTICK_COUNTFLAG; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps systick_ops = { + .read = systick_read, + .write = systick_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static void systick_reset(DeviceState *dev) +{ + SysTickState *s = SYSTICK(dev); + + s->control = 0; + s->reload = 0; + s->tick = 0; + timer_del(s->timer); +} + +static void systick_instance_init(Object *obj) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + SysTickState *s = SYSTICK(obj); + + memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->irq); + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s); +} + +static const VMStateDescription vmstate_systick = { + .name = "armv7m_systick", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(control, SysTickState), + VMSTATE_UINT32(reload, SysTickState), + VMSTATE_INT64(tick, SysTickState), + VMSTATE_TIMER_PTR(timer, SysTickState), + VMSTATE_END_OF_LIST() + } +}; + +static void systick_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_systick; + dc->reset = systick_reset; +} + +static const TypeInfo armv7m_systick_info = { + .name = TYPE_SYSTICK, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = systick_instance_init, + .instance_size = sizeof(SysTickState), + .class_init = systick_class_init, +}; + +static void armv7m_systick_register_types(void) +{ + type_register_static(&armv7m_systick_info); +} + +type_init(armv7m_systick_register_types) diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c index 010ccbf207..4b9b54bf2e 100644 --- a/hw/timer/imx_gpt.c +++ b/hw/timer/imx_gpt.c @@ -296,18 +296,23 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size) return reg_value; } -static void imx_gpt_reset(DeviceState *dev) -{ - IMXGPTState *s = IMX_GPT(dev); +static void imx_gpt_reset_common(IMXGPTState *s, bool is_soft_reset) +{ /* stop timer */ ptimer_stop(s->timer); - /* - * Soft reset doesn't touch some bits; hard reset clears them + /* Soft reset and hard reset differ only in their handling of the CR + * register -- soft reset preserves the values of some bits there. */ - s->cr &= ~(GPT_CR_EN|GPT_CR_ENMOD|GPT_CR_STOPEN|GPT_CR_DOZEN| - GPT_CR_WAITEN|GPT_CR_DBGEN); + if (is_soft_reset) { + /* Clear all CR bits except those that are preserved by soft reset. */ + s->cr &= GPT_CR_EN | GPT_CR_ENMOD | GPT_CR_STOPEN | GPT_CR_DOZEN | + GPT_CR_WAITEN | GPT_CR_DBGEN | + (GPT_CR_CLKSRC_MASK << GPT_CR_CLKSRC_SHIFT); + } else { + s->cr = 0; + } s->sr = 0; s->pr = 0; s->ir = 0; @@ -333,6 +338,18 @@ static void imx_gpt_reset(DeviceState *dev) } } +static void imx_gpt_soft_reset(DeviceState *dev) +{ + IMXGPTState *s = IMX_GPT(dev); + imx_gpt_reset_common(s, true); +} + +static void imx_gpt_reset(DeviceState *dev) +{ + IMXGPTState *s = IMX_GPT(dev); + imx_gpt_reset_common(s, false); +} + static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { @@ -348,7 +365,7 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value, s->cr = value & ~0x7c14; if (s->cr & GPT_CR_SWR) { /* force reset */ /* handle the reset */ - imx_gpt_reset(DEVICE(s)); + imx_gpt_soft_reset(DEVICE(s)); } else { /* set our freq, as the source might have changed */ imx_gpt_set_freq(s); diff --git a/hw/timer/trace-events b/hw/timer/trace-events index 3495c41c18..d17cfe6b39 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d" aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32 aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32 aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64 + +# hw/timer/armv7m_systick.c +systick_reload(void) "systick reload" +systick_timer_tick(void) "systick reload" +systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" +systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u" diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index c607f7606d..a71b354fa6 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -603,7 +603,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp) blkconf_serial(&s->conf, &dev->serial); blkconf_blocksizes(&s->conf); - blkconf_apply_backend_options(&s->conf); + blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err); + if (err) { + error_propagate(errp, err); + return; + } /* * Hack alert: this pretends to be a block device, but it's really diff --git a/include/block/block.h b/include/block/block.h index bde5ebda18..c7c4a3ac3a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -82,6 +82,7 @@ typedef struct HDGeometry { } HDGeometry; #define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ @@ -187,6 +188,42 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MAX, } BlockOpType; +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * This permission is required to change the node that this BdrvChild + * points to. + */ + BLK_PERM_GRAPH_MOD = 0x10, + + BLK_PERM_ALL = 0x1f, +}; + /* disk I/O throttling */ void bdrv_init(void); void bdrv_init_with_whitelist(void); @@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new(void); -void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new); @@ -210,7 +248,8 @@ BdrvChild *bdrv_open_child(const char *filename, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, @@ -484,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role); + const BdrvChildRole *child_role, + Error **errp); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); diff --git a/include/block/block_int.h b/include/block/block_int.h index 1670941da9..a57c0bfb55 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -320,6 +320,59 @@ struct BlockDriver { void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, Error **errp); + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @role. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + QLIST_ENTRY(BlockDriver) list; }; @@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier { } BdrvAioNotifier; struct BdrvChildRole { + /* If true, bdrv_replace_in_backing_chain() doesn't change the node this + * BdrvChild points to. */ + bool stay_at_node; + void (*inherit_options)(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options); @@ -399,6 +456,12 @@ struct BdrvChildRole { * name), or NULL if the parent can't provide a better name. */ const char* (*get_name)(BdrvChild *child); + /* Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. */ + char* (*get_parent_desc)(BdrvChild *child); + /* * If this pair of functions is implemented, the parent doesn't issue new * requests after returning from .drained_begin() until .drained_end() is @@ -409,16 +472,32 @@ struct BdrvChildRole { */ void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); }; extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; +extern const BdrvChildRole child_backing; struct BdrvChild { BlockDriverState *bs; char *name; const BdrvChildRole *role; void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; }; @@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs, * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. * @errp: Error object. * */ void commit_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, - Error **errp); + const char *filter_node_name, Error **errp); /** * commit_active_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, * See @BlockJobCreateFlags * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs, void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, - BlockCompletionFunc *cb, - void *opaque, Error **errp, bool auto_complete); + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, Error **errp, + bool auto_complete); /* * mirror_start: * @job_id: The id of the newly-created job, or %NULL to use the @@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. * @errp: Error object. * * Start a mirroring operation on @bs. Clusters that are allocated @@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, - bool unmap, Error **errp); + bool unmap, const char *filter_node_name, Error **errp); /* * backup_job_create: @@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, - void *opaque); + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); +int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); +void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +void bdrv_child_abort_perm_update(BdrvChild *c); +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to + * all children */ +void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + +/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by + * (non-raw) image formats: Like above for bs->backing, but for bs->file it + * requires WRITE | RESIZE for read-write images, always requires + * CONSISTENT_READ and doesn't share WRITE. */ +void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + const char *bdrv_get_parent_name(const BlockDriverState *bs); -void blk_dev_change_media_cb(BlockBackend *blk, bool load); +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); bool blk_dev_has_removable_media(BlockBackend *blk); bool blk_dev_has_tray(BlockBackend *blk); void blk_dev_eject_request(BlockBackend *blk, bool force); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 1acb256223..9e906f7d7e 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id); /** * block_job_add_bdrv: * @job: A block job + * @name: The name to assign to the new BdrvChild * @bs: A BlockDriverState that is involved in @job + * @perm, @shared_perm: Permissions to request on the node * * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. */ -void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs); +int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp); + +/** + * block_job_remove_all_bdrv: + * @job: The block job + * + * Remove all BlockDriverStates from the list of nodes that are involved in the + * job. This removes the blockers added with block_job_add_bdrv(). + */ +void block_job_remove_all_bdrv(BlockJob *job); /** * block_job_set_speed: diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 82238229c6..3f86cc5acc 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -119,6 +119,7 @@ struct BlockJobDriver { * generated automatically. * @job_type: The class object for the newly-created job. * @bs: The block + * @perm, @shared_perm: Permissions to request for @bs * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. @@ -134,7 +135,8 @@ struct BlockJobDriver { * called from a wrapper that is specific to the job type. */ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, int64_t speed, int flags, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, BlockCompletionFunc *cb, void *opaque, Error **errp); /** diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index c175c0e999..a3f79d3379 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -26,6 +26,18 @@ typedef enum { /* armv7m.c */ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, const char *kernel_filename, const char *cpu_model); +/** + * armv7m_load_kernel: + * @cpu: CPU + * @kernel_filename: file to load + * @mem_size: mem_size: maximum image size to load + * + * Load the guest image for an ARMv7M system. This must be called by + * any ARMv7M board, either directly or via armv7m_init(). (This is + * necessary to ensure that the CPU resets correctly on system reset, + * as well as for kernel loading.) + */ +void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size); /* * struct used as a parameter of the arm_load_kernel machine init diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h new file mode 100644 index 0000000000..a9b3f2ab35 --- /dev/null +++ b/include/hw/arm/armv7m.h @@ -0,0 +1,63 @@ +/* + * ARMv7M CPU object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell <peter.maydell@linaro.org> + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_H +#define HW_ARM_ARMV7M_H + +#include "hw/sysbus.h" +#include "hw/arm/armv7m_nvic.h" + +#define TYPE_BITBAND "ARM,bitband-memory" +#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND) + +typedef struct { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + AddressSpace *source_as; + MemoryRegion iomem; + uint32_t base; + MemoryRegion *source_memory; +} BitBandState; + +#define TYPE_ARMV7M "armv7m" +#define ARMV7M(obj) OBJECT_CHECK(ARMv7MState, (obj), TYPE_ARMV7M) + +#define ARMV7M_NUM_BITBANDS 2 + +/* ARMv7M container object. + * + Unnamed GPIO input lines: external IRQ lines for the NVIC + * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ + * + Property "cpu-model": CPU model to instantiate + * + Property "num-irq": number of external IRQ lines + * + Property "memory": MemoryRegion defining the physical address space + * that CPU accesses see. (The NVIC, bitbanding and other CPU-internal + * devices will be automatically layered on top of this view.) + */ +typedef struct ARMv7MState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + NVICState nvic; + BitBandState bitband[ARMV7M_NUM_BITBANDS]; + ARMCPU *cpu; + + /* MemoryRegion we pass to the CPU, with our devices layered on + * top of the ones the board provides in board_memory. + */ + MemoryRegion container; + + /* Properties */ + char *cpu_model; + /* MemoryRegion the board provides to us (with its devices, RAM, etc) */ + MemoryRegion *board_memory; +} ARMv7MState; + +#endif diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/arm/armv7m_nvic.h new file mode 100644 index 0000000000..1d145fb75f --- /dev/null +++ b/include/hw/arm/armv7m_nvic.h @@ -0,0 +1,62 @@ +/* + * ARMv7M NVIC object + * + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell <peter.maydell@linaro.org> + * + * This code is licensed under the GPL version 2 or later. + */ + +#ifndef HW_ARM_ARMV7M_NVIC_H +#define HW_ARM_ARMV7M_NVIC_H + +#include "target/arm/cpu.h" +#include "hw/sysbus.h" +#include "hw/timer/armv7m_systick.h" + +#define TYPE_NVIC "armv7m_nvic" + +#define NVIC(obj) \ + OBJECT_CHECK(NVICState, (obj), TYPE_NVIC) + +/* Highest permitted number of exceptions (architectural limit) */ +#define NVIC_MAX_VECTORS 512 + +typedef struct VecInfo { + /* Exception priorities can range from -3 to 255; only the unmodifiable + * priority values for RESET, NMI and HardFault can be negative. + */ + int16_t prio; + uint8_t enabled; + uint8_t pending; + uint8_t active; + uint8_t level; /* exceptions <=15 never set level */ +} VecInfo; + +typedef struct NVICState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + ARMCPU *cpu; + + VecInfo vectors[NVIC_MAX_VECTORS]; + uint32_t prigroup; + + /* vectpending and exception_prio are both cached state that can + * be recalculated from the vectors[] array and the prigroup field. + */ + unsigned int vectpending; /* highest prio pending enabled exception */ + int exception_prio; /* group prio of the highest prio active exception */ + + MemoryRegion sysregmem; + MemoryRegion container; + + uint32_t num_irq; + qemu_irq excpout; + qemu_irq sysresetreq; + + SysTickState systick; +} NVICState; + +#endif diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h index e12ae3721a..122b286de7 100644 --- a/include/hw/arm/bcm2835_peripherals.h +++ b/include/hw/arm/bcm2835_peripherals.h @@ -19,8 +19,11 @@ #include "hw/dma/bcm2835_dma.h" #include "hw/intc/bcm2835_ic.h" #include "hw/misc/bcm2835_property.h" +#include "hw/misc/bcm2835_rng.h" #include "hw/misc/bcm2835_mbox.h" #include "hw/sd/sdhci.h" +#include "hw/sd/bcm2835_sdhost.h" +#include "hw/gpio/bcm2835_gpio.h" #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals" #define BCM2835_PERIPHERALS(obj) \ @@ -41,8 +44,11 @@ typedef struct BCM2835PeripheralState { BCM2835DMAState dma; BCM2835ICState ic; BCM2835PropertyState property; + BCM2835RngState rng; BCM2835MboxState mboxes; SDHCIState sdhci; + BCM2835SDHostState sdhost; + BCM2835GpioState gpio; } BCM2835PeripheralState; #endif /* BCM2835_PERIPHERALS_H */ diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h index 133214195b..e2dce1122e 100644 --- a/include/hw/arm/stm32f205_soc.h +++ b/include/hw/arm/stm32f205_soc.h @@ -31,6 +31,7 @@ #include "hw/adc/stm32f2xx_adc.h" #include "hw/or-irq.h" #include "hw/ssi/stm32f2xx_spi.h" +#include "hw/arm/armv7m.h" #define TYPE_STM32F205_SOC "stm32f205-soc" #define STM32F205_SOC(obj) \ @@ -51,9 +52,10 @@ typedef struct STM32F205State { SysBusDevice parent_obj; /*< public >*/ - char *kernel_filename; char *cpu_model; + ARMv7MState armv7m; + STM32F2XXSyscfgState syscfg; STM32F2XXUsartState usart[STM_NUM_USARTS]; STM32F2XXTimerState timer[STM_NUM_TIMERS]; diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 58ce74e0e5..33b0ff3892 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -93,6 +93,7 @@ typedef struct { FWCfgState *fw_cfg; bool secure; bool highmem; + bool its; bool virt; int32_t gic_version; struct arm_boot_info bootinfo; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index df9d207d81..f3f6e8ef02 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -26,6 +26,7 @@ typedef struct BlockConf { /* geometry, not all devices use this */ uint32_t cyls, heads, secs; OnOffAuto wce; + bool share_rw; BlockdevOnError rerror; BlockdevOnError werror; } BlockConf; @@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ DEFINE_PROP_UINT32("discard_granularity", _state, \ _conf.discard_granularity, -1), \ - DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO) + DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ + ON_OFF_AUTO_AUTO), \ + DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ @@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); void blkconf_blocksizes(BlockConf *conf); -void blkconf_apply_backend_options(BlockConf *conf); +void blkconf_apply_backend_options(BlockConf *conf, bool readonly, + bool resizable, Error **errp); /* Hard disk geometry */ diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 25659b93be..a172a6068a 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd, int must_swab, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, int clear_lsb, int data_swab, - AddressSpace *as) + AddressSpace *as, bool load_rom) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; @@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd, *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; } - snprintf(label, sizeof(label), "phdr #%d: %s", i, name); + if (load_rom) { + snprintf(label, sizeof(label), "phdr #%d: %s", i, name); - /* rom_add_elf_program() seize the ownership of 'data' */ - rom_add_elf_program(label, data, file_size, mem_size, addr, as); + /* rom_add_elf_program() seize the ownership of 'data' */ + rom_add_elf_program(label, data, file_size, mem_size, addr, as); + } else { + cpu_physical_memory_write(addr, data, file_size); + g_free(data); + } total_size += mem_size; if (addr < low) diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h new file mode 100644 index 0000000000..9f8e0c720c --- /dev/null +++ b/include/hw/gpio/bcm2835_gpio.h @@ -0,0 +1,39 @@ +/* + * Raspberry Pi (BCM2835) GPIO Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_GPIO_H +#define BCM2835_GPIO_H + +#include "hw/sd/sd.h" + +typedef struct BCM2835GpioState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + + /* SDBus selector */ + SDBus sdbus; + SDBus *sdbus_sdhci; + SDBus *sdbus_sdhost; + + uint8_t fsel[54]; + uint32_t lev0, lev1; + uint8_t sd_fsel; + qemu_irq out[54]; +} BCM2835GpioState; + +#define TYPE_BCM2835_GPIO "bcm2835_gpio" +#define BCM2835_GPIO(obj) \ + OBJECT_CHECK(BCM2835GpioState, (obj), TYPE_BCM2835_GPIO) + +#endif diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index 4156051d98..bccdfe17c6 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -172,6 +172,7 @@ struct GICv3CPUState { uint8_t gicr_ipriorityr[GIC_INTERNAL]; /* CPU interface */ + uint64_t icc_sre_el1; uint64_t icc_ctlr_el1[2]; uint64_t icc_pmr_el1; uint64_t icc_bpr[3]; diff --git a/include/hw/loader.h b/include/hw/loader.h index 40c4153e58..490c9ff8e6 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_WRONG_ENDIAN -4 const char *load_elf_strerror(int error); -/** load_elf_as: +/** load_elf_ram: * @filename: Path of ELF file * @translate_fn: optional function to translate load addresses * @translate_opaque: opaque data passed to @translate_fn @@ -81,6 +81,7 @@ const char *load_elf_strerror(int error); * words and 3 for within doublewords. * @as: The AddressSpace to load the ELF to. The value of address_space_memory * is used if nothing is supplied here. + * @load_rom : Load ELF binary as ROM * * Load an ELF file's contents to the emulated system's address space. * Clients may optionally specify a callback to perform address @@ -93,6 +94,16 @@ const char *load_elf_strerror(int error); * If @elf_machine is EM_NONE then the machine type will be read from the * ELF header and no checks will be carried out against the machine type. */ +int load_elf_ram(const char *filename, + uint64_t (*translate_fn)(void *, uint64_t), + void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, + uint64_t *highaddr, int big_endian, int elf_machine, + int clear_lsb, int data_swab, AddressSpace *as, + bool load_rom); + +/** load_elf_as: + * Same as load_elf_ram(), but always loads the elf as ROM + */ int load_elf_as(const char *filename, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, diff --git a/include/hw/misc/bcm2835_rng.h b/include/hw/misc/bcm2835_rng.h new file mode 100644 index 0000000000..41a531bce7 --- /dev/null +++ b/include/hw/misc/bcm2835_rng.h @@ -0,0 +1,27 @@ +/* + * BCM2835 Random Number Generator emulation + * + * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_RNG_H +#define BCM2835_RNG_H + +#include "hw/sysbus.h" + +#define TYPE_BCM2835_RNG "bcm2835-rng" +#define BCM2835_RNG(obj) \ + OBJECT_CHECK(BCM2835RngState, (obj), TYPE_BCM2835_RNG) + +typedef struct { + SysBusDevice busdev; + MemoryRegion iomem; + + uint32_t rng_ctrl; + uint32_t rng_status; +} BCM2835RngState; + +#endif diff --git a/include/hw/sd/bcm2835_sdhost.h b/include/hw/sd/bcm2835_sdhost.h new file mode 100644 index 0000000000..7520dd6507 --- /dev/null +++ b/include/hw/sd/bcm2835_sdhost.h @@ -0,0 +1,48 @@ +/* + * Raspberry Pi (BCM2835) SD Host Controller + * + * Copyright (c) 2017 Antfield SAS + * + * Authors: + * Clement Deschamps <clement.deschamps@antfield.fr> + * Luc Michel <luc.michel@antfield.fr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BCM2835_SDHOST_H +#define BCM2835_SDHOST_H + +#include "hw/sysbus.h" +#include "hw/sd/sd.h" + +#define TYPE_BCM2835_SDHOST "bcm2835-sdhost" +#define BCM2835_SDHOST(obj) \ + OBJECT_CHECK(BCM2835SDHostState, (obj), TYPE_BCM2835_SDHOST) + +#define BCM2835_SDHOST_FIFO_LEN 16 + +typedef struct { + SysBusDevice busdev; + SDBus sdbus; + MemoryRegion iomem; + + uint32_t cmd; + uint32_t cmdarg; + uint32_t status; + uint32_t rsp[4]; + uint32_t config; + uint32_t edm; + uint32_t vdd; + uint32_t hbct; + uint32_t hblc; + int32_t fifo_pos; + int32_t fifo_len; + uint32_t fifo[BCM2835_SDHOST_FIFO_LEN]; + uint32_t datacnt; + + qemu_irq irq; +} BCM2835SDHostState; + +#endif diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h index 79909b2478..96caefe373 100644 --- a/include/hw/sd/sd.h +++ b/include/hw/sd/sd.h @@ -140,6 +140,17 @@ uint8_t sdbus_read_data(SDBus *sd); bool sdbus_data_ready(SDBus *sd); bool sdbus_get_inserted(SDBus *sd); bool sdbus_get_readonly(SDBus *sd); +/** + * sdbus_reparent_card: Reparent an SD card from one controller to another + * @from: controller bus to remove card from + * @to: controller bus to move card to + * + * Reparent an SD card, effectively unplugging it from one controller + * and inserting it into another. This is useful for SoCs like the + * bcm2835 which have two SD controllers and connect a single SD card + * to them, selected by the guest reprogramming GPIO line routing. + */ +void sdbus_reparent_card(SDBus *from, SDBus *to); /* Functions to be used by SD devices to report back to qdevified controllers */ void sdbus_set_inserted(SDBus *sd, bool inserted); diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h new file mode 100644 index 0000000000..cca04defd8 --- /dev/null +++ b/include/hw/timer/armv7m_systick.h @@ -0,0 +1,34 @@ +/* + * ARMv7M SysTick timer + * + * Copyright (c) 2006-2007 CodeSourcery. + * Written by Paul Brook + * Copyright (c) 2017 Linaro Ltd + * Written by Peter Maydell + * + * This code is licensed under the GPL (version 2 or later). + */ + +#ifndef HW_TIMER_ARMV7M_SYSTICK_H +#define HW_TIMER_ARMV7M_SYSTICK_H + +#include "hw/sysbus.h" + +#define TYPE_SYSTICK "armv7m_systick" + +#define SYSTICK(obj) OBJECT_CHECK(SysTickState, (obj), TYPE_SYSTICK) + +typedef struct SysTickState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t control; + uint32_t reload; + int64_t tick; + QEMUTimer *timer; + MemoryRegion iomem; + qemu_irq irq; +} SysTickState; + +#endif diff --git a/include/qemu-common.h b/include/qemu-common.h index 1430390eb6..d218821c14 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -19,7 +19,7 @@ #include "qemu/option.h" /* Copyright string for -version arguments, About dialogs, etc */ -#define QEMU_COPYRIGHT "Copyright (c) 2003-2016 " \ +#define QEMU_COPYRIGHT "Copyright (c) 2003-2017 " \ "Fabrice Bellard and the QEMU Project developers" /* main function, renamed */ diff --git a/include/qemu-io.h b/include/qemu-io.h index 4d402b9b01..196fde0f3a 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -36,6 +36,7 @@ typedef struct cmdinfo { const char *args; const char *oneline; helpfunc_t help; + uint64_t perm; } cmdinfo_t; extern bool qemuio_misalign; diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h new file mode 100644 index 0000000000..3133d1ca40 --- /dev/null +++ b/include/qemu/throttle-options.h @@ -0,0 +1,92 @@ +/* + * QEMU throttling command line options + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + * + */ +#ifndef THROTTLE_OPTIONS_H +#define THROTTLE_OPTIONS_H + +#define THROTTLE_OPTS \ + { \ + .name = "throttling.iops-total",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit total I/O operations per second",\ + },{ \ + .name = "throttling.iops-read",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit read operations per second",\ + },{ \ + .name = "throttling.iops-write",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit write operations per second",\ + },{ \ + .name = "throttling.bps-total",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit total bytes per second",\ + },{ \ + .name = "throttling.bps-read",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit read bytes per second",\ + },{ \ + .name = "throttling.bps-write",\ + .type = QEMU_OPT_NUMBER,\ + .help = "limit write bytes per second",\ + },{ \ + .name = "throttling.iops-total-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations burst",\ + },{ \ + .name = "throttling.iops-read-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations read burst",\ + },{ \ + .name = "throttling.iops-write-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "I/O operations write burst",\ + },{ \ + .name = "throttling.bps-total-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes burst",\ + },{ \ + .name = "throttling.bps-read-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes read burst",\ + },{ \ + .name = "throttling.bps-write-max",\ + .type = QEMU_OPT_NUMBER,\ + .help = "total bytes write burst",\ + },{ \ + .name = "throttling.iops-total-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-total-max burst period, in seconds",\ + },{ \ + .name = "throttling.iops-read-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-read-max burst period, in seconds",\ + },{ \ + .name = "throttling.iops-write-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the iops-write-max burst period, in seconds",\ + },{ \ + .name = "throttling.bps-total-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-total-max burst period, in seconds",\ + },{ \ + .name = "throttling.bps-read-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-read-max burst period, in seconds",\ + },{ \ + .name = "throttling.bps-write-max-length",\ + .type = QEMU_OPT_NUMBER,\ + .help = "length of the bps-write-max burst period, in seconds",\ + },{ \ + .name = "throttling.iops-size",\ + .type = QEMU_OPT_NUMBER,\ + .help = "when limiting by iops max size of an I/O in bytes",\ + } + +#endif diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h index 47b38fb816..eca9a2ca22 100644 --- a/include/standard-headers/asm-x86/hyperv.h +++ b/include/standard-headers/asm-x86/hyperv.h @@ -73,6 +73,9 @@ */ #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) +/* Crash MSR available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -144,6 +147,11 @@ */ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) +/* + * Crash notification flag. + */ +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) + /* MSR used to identify the guest OS. */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index 5c10f7e25d..c8b3338375 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -640,7 +640,7 @@ * Control a data application associated with the currently viewed channel, * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ -#define KEY_DATA 0x275 +#define KEY_DATA 0x277 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index e5a2e68b22..634c9c44ed 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -23,6 +23,14 @@ #define LINUX_PCI_REGS_H /* + * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of + * configuration space. PCI-X Mode 2 and PCIe devices have 4096 bytes of + * configuration space. + */ +#define PCI_CFG_SPACE_SIZE 256 +#define PCI_CFG_SPACE_EXP_SIZE 4096 + +/* * Under PCI, each device has 256 bytes of configuration address space, * of which the first 64 bytes are standardized as follows: */ @@ -674,6 +682,7 @@ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ +#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM @@ -965,6 +974,7 @@ #define PCI_EXP_DPC_STATUS 8 /* DPC Status */ #define PCI_EXP_DPC_STATUS_TRIGGER 0x01 /* Trigger Status */ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x08 /* Interrupt Status */ +#define PCI_EXP_DPC_RP_BUSY 0x10 /* Root Port Busy */ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ @@ -977,4 +987,19 @@ #define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ #define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ +/* L1 PM Substates */ +#define PCI_L1SS_CAP 4 /* capability register */ +#define PCI_L1SS_CAP_PCIPM_L1_2 1 /* PCI PM L1.2 Support */ +#define PCI_L1SS_CAP_PCIPM_L1_1 2 /* PCI PM L1.1 Support */ +#define PCI_L1SS_CAP_ASPM_L1_2 4 /* ASPM L1.2 Support */ +#define PCI_L1SS_CAP_ASPM_L1_1 8 /* ASPM L1.1 Support */ +#define PCI_L1SS_CAP_L1_PM_SS 16 /* L1 PM Substates Support */ +#define PCI_L1SS_CTL1 8 /* Control Register 1 */ +#define PCI_L1SS_CTL1_PCIPM_L1_2 1 /* PCI PM L1.2 Enable */ +#define PCI_L1SS_CTL1_PCIPM_L1_1 2 /* PCI PM L1.1 Support */ +#define PCI_L1SS_CTL1_ASPM_L1_2 4 /* ASPM L1.2 Support */ +#define PCI_L1SS_CTL1_ASPM_L1_1 8 /* ASPM L1.1 Support */ +#define PCI_L1SS_CTL1_L1SS_MASK 0x0000000F +#define PCI_L1SS_CTL2 0xC /* Control Register 2 */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index fe74e422d4..6d5c3b2d4f 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -43,4 +43,5 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ + #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index f365a51acf..096c17fce0 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -34,7 +34,7 @@ typedef struct BlockDevOps { * changes. Sure would be useful if it did. * Device models with removable media must implement this callback. */ - void (*change_media_cb)(void *opaque, bool load); + void (*change_media_cb)(void *opaque, bool load, Error **errp); /* * Runs when an eject request is issued from the monitor, the tray * is closed, and the medium is locked. @@ -84,7 +84,7 @@ typedef struct BlockBackendPublic { QLIST_ENTRY(BlockBackendPublic) round_robin; } BlockBackendPublic; -BlockBackend *blk_new(void); +BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); @@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); BlockDriverState *blk_bs(BlockBackend *blk); void blk_remove_bs(BlockBackend *blk); -void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); bool bdrv_has_blk(BlockDriverState *bs); bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); void blk_iostatus_enable(BlockBackend *blk); diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 2fb7859465..1101d55d2f 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -87,9 +87,11 @@ struct kvm_regs { /* Supported VGICv3 address types */ #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 +#define KVM_VGIC_ITS_ADDR_TYPE 4 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) +#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K) #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ @@ -179,10 +181,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h new file mode 100644 index 0000000000..13a74afd02 --- /dev/null +++ b/linux-headers/asm-arm/unistd-common.h @@ -0,0 +1,357 @@ +#ifndef _ASM_ARM_UNISTD_COMMON_H +#define _ASM_ARM_UNISTD_COMMON_H 1 + +#define __NR_restart_syscall (__NR_SYSCALL_BASE + 0) +#define __NR_exit (__NR_SYSCALL_BASE + 1) +#define __NR_fork (__NR_SYSCALL_BASE + 2) +#define __NR_read (__NR_SYSCALL_BASE + 3) +#define __NR_write (__NR_SYSCALL_BASE + 4) +#define __NR_open (__NR_SYSCALL_BASE + 5) +#define __NR_close (__NR_SYSCALL_BASE + 6) +#define __NR_creat (__NR_SYSCALL_BASE + 8) +#define __NR_link (__NR_SYSCALL_BASE + 9) +#define __NR_unlink (__NR_SYSCALL_BASE + 10) +#define __NR_execve (__NR_SYSCALL_BASE + 11) +#define __NR_chdir (__NR_SYSCALL_BASE + 12) +#define __NR_mknod (__NR_SYSCALL_BASE + 14) +#define __NR_chmod (__NR_SYSCALL_BASE + 15) +#define __NR_lchown (__NR_SYSCALL_BASE + 16) +#define __NR_lseek (__NR_SYSCALL_BASE + 19) +#define __NR_getpid (__NR_SYSCALL_BASE + 20) +#define __NR_mount (__NR_SYSCALL_BASE + 21) +#define __NR_setuid (__NR_SYSCALL_BASE + 23) +#define __NR_getuid (__NR_SYSCALL_BASE + 24) +#define __NR_ptrace (__NR_SYSCALL_BASE + 26) +#define __NR_pause (__NR_SYSCALL_BASE + 29) +#define __NR_access (__NR_SYSCALL_BASE + 33) +#define __NR_nice (__NR_SYSCALL_BASE + 34) +#define __NR_sync (__NR_SYSCALL_BASE + 36) +#define __NR_kill (__NR_SYSCALL_BASE + 37) +#define __NR_rename (__NR_SYSCALL_BASE + 38) +#define __NR_mkdir (__NR_SYSCALL_BASE + 39) +#define __NR_rmdir (__NR_SYSCALL_BASE + 40) +#define __NR_dup (__NR_SYSCALL_BASE + 41) +#define __NR_pipe (__NR_SYSCALL_BASE + 42) +#define __NR_times (__NR_SYSCALL_BASE + 43) +#define __NR_brk (__NR_SYSCALL_BASE + 45) +#define __NR_setgid (__NR_SYSCALL_BASE + 46) +#define __NR_getgid (__NR_SYSCALL_BASE + 47) +#define __NR_geteuid (__NR_SYSCALL_BASE + 49) +#define __NR_getegid (__NR_SYSCALL_BASE + 50) +#define __NR_acct (__NR_SYSCALL_BASE + 51) +#define __NR_umount2 (__NR_SYSCALL_BASE + 52) +#define __NR_ioctl (__NR_SYSCALL_BASE + 54) +#define __NR_fcntl (__NR_SYSCALL_BASE + 55) +#define __NR_setpgid (__NR_SYSCALL_BASE + 57) +#define __NR_umask (__NR_SYSCALL_BASE + 60) +#define __NR_chroot (__NR_SYSCALL_BASE + 61) +#define __NR_ustat (__NR_SYSCALL_BASE + 62) +#define __NR_dup2 (__NR_SYSCALL_BASE + 63) +#define __NR_getppid (__NR_SYSCALL_BASE + 64) +#define __NR_getpgrp (__NR_SYSCALL_BASE + 65) +#define __NR_setsid (__NR_SYSCALL_BASE + 66) +#define __NR_sigaction (__NR_SYSCALL_BASE + 67) +#define __NR_setreuid (__NR_SYSCALL_BASE + 70) +#define __NR_setregid (__NR_SYSCALL_BASE + 71) +#define __NR_sigsuspend (__NR_SYSCALL_BASE + 72) +#define __NR_sigpending (__NR_SYSCALL_BASE + 73) +#define __NR_sethostname (__NR_SYSCALL_BASE + 74) +#define __NR_setrlimit (__NR_SYSCALL_BASE + 75) +#define __NR_getrusage (__NR_SYSCALL_BASE + 77) +#define __NR_gettimeofday (__NR_SYSCALL_BASE + 78) +#define __NR_settimeofday (__NR_SYSCALL_BASE + 79) +#define __NR_getgroups (__NR_SYSCALL_BASE + 80) +#define __NR_setgroups (__NR_SYSCALL_BASE + 81) +#define __NR_symlink (__NR_SYSCALL_BASE + 83) +#define __NR_readlink (__NR_SYSCALL_BASE + 85) +#define __NR_uselib (__NR_SYSCALL_BASE + 86) +#define __NR_swapon (__NR_SYSCALL_BASE + 87) +#define __NR_reboot (__NR_SYSCALL_BASE + 88) +#define __NR_munmap (__NR_SYSCALL_BASE + 91) +#define __NR_truncate (__NR_SYSCALL_BASE + 92) +#define __NR_ftruncate (__NR_SYSCALL_BASE + 93) +#define __NR_fchmod (__NR_SYSCALL_BASE + 94) +#define __NR_fchown (__NR_SYSCALL_BASE + 95) +#define __NR_getpriority (__NR_SYSCALL_BASE + 96) +#define __NR_setpriority (__NR_SYSCALL_BASE + 97) +#define __NR_statfs (__NR_SYSCALL_BASE + 99) +#define __NR_fstatfs (__NR_SYSCALL_BASE + 100) +#define __NR_syslog (__NR_SYSCALL_BASE + 103) +#define __NR_setitimer (__NR_SYSCALL_BASE + 104) +#define __NR_getitimer (__NR_SYSCALL_BASE + 105) +#define __NR_stat (__NR_SYSCALL_BASE + 106) +#define __NR_lstat (__NR_SYSCALL_BASE + 107) +#define __NR_fstat (__NR_SYSCALL_BASE + 108) +#define __NR_vhangup (__NR_SYSCALL_BASE + 111) +#define __NR_wait4 (__NR_SYSCALL_BASE + 114) +#define __NR_swapoff (__NR_SYSCALL_BASE + 115) +#define __NR_sysinfo (__NR_SYSCALL_BASE + 116) +#define __NR_fsync (__NR_SYSCALL_BASE + 118) +#define __NR_sigreturn (__NR_SYSCALL_BASE + 119) +#define __NR_clone (__NR_SYSCALL_BASE + 120) +#define __NR_setdomainname (__NR_SYSCALL_BASE + 121) +#define __NR_uname (__NR_SYSCALL_BASE + 122) +#define __NR_adjtimex (__NR_SYSCALL_BASE + 124) +#define __NR_mprotect (__NR_SYSCALL_BASE + 125) +#define __NR_sigprocmask (__NR_SYSCALL_BASE + 126) +#define __NR_init_module (__NR_SYSCALL_BASE + 128) +#define __NR_delete_module (__NR_SYSCALL_BASE + 129) +#define __NR_quotactl (__NR_SYSCALL_BASE + 131) +#define __NR_getpgid (__NR_SYSCALL_BASE + 132) +#define __NR_fchdir (__NR_SYSCALL_BASE + 133) +#define __NR_bdflush (__NR_SYSCALL_BASE + 134) +#define __NR_sysfs (__NR_SYSCALL_BASE + 135) +#define __NR_personality (__NR_SYSCALL_BASE + 136) +#define __NR_setfsuid (__NR_SYSCALL_BASE + 138) +#define __NR_setfsgid (__NR_SYSCALL_BASE + 139) +#define __NR__llseek (__NR_SYSCALL_BASE + 140) +#define __NR_getdents (__NR_SYSCALL_BASE + 141) +#define __NR__newselect (__NR_SYSCALL_BASE + 142) +#define __NR_flock (__NR_SYSCALL_BASE + 143) +#define __NR_msync (__NR_SYSCALL_BASE + 144) +#define __NR_readv (__NR_SYSCALL_BASE + 145) +#define __NR_writev (__NR_SYSCALL_BASE + 146) +#define __NR_getsid (__NR_SYSCALL_BASE + 147) +#define __NR_fdatasync (__NR_SYSCALL_BASE + 148) +#define __NR__sysctl (__NR_SYSCALL_BASE + 149) +#define __NR_mlock (__NR_SYSCALL_BASE + 150) +#define __NR_munlock (__NR_SYSCALL_BASE + 151) +#define __NR_mlockall (__NR_SYSCALL_BASE + 152) +#define __NR_munlockall (__NR_SYSCALL_BASE + 153) +#define __NR_sched_setparam (__NR_SYSCALL_BASE + 154) +#define __NR_sched_getparam (__NR_SYSCALL_BASE + 155) +#define __NR_sched_setscheduler (__NR_SYSCALL_BASE + 156) +#define __NR_sched_getscheduler (__NR_SYSCALL_BASE + 157) +#define __NR_sched_yield (__NR_SYSCALL_BASE + 158) +#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE + 159) +#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE + 160) +#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE + 161) +#define __NR_nanosleep (__NR_SYSCALL_BASE + 162) +#define __NR_mremap (__NR_SYSCALL_BASE + 163) +#define __NR_setresuid (__NR_SYSCALL_BASE + 164) +#define __NR_getresuid (__NR_SYSCALL_BASE + 165) +#define __NR_poll (__NR_SYSCALL_BASE + 168) +#define __NR_nfsservctl (__NR_SYSCALL_BASE + 169) +#define __NR_setresgid (__NR_SYSCALL_BASE + 170) +#define __NR_getresgid (__NR_SYSCALL_BASE + 171) +#define __NR_prctl (__NR_SYSCALL_BASE + 172) +#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173) +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) +#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE + 177) +#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE + 178) +#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) +#define __NR_pread64 (__NR_SYSCALL_BASE + 180) +#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) +#define __NR_chown (__NR_SYSCALL_BASE + 182) +#define __NR_getcwd (__NR_SYSCALL_BASE + 183) +#define __NR_capget (__NR_SYSCALL_BASE + 184) +#define __NR_capset (__NR_SYSCALL_BASE + 185) +#define __NR_sigaltstack (__NR_SYSCALL_BASE + 186) +#define __NR_sendfile (__NR_SYSCALL_BASE + 187) +#define __NR_vfork (__NR_SYSCALL_BASE + 190) +#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) +#define __NR_mmap2 (__NR_SYSCALL_BASE + 192) +#define __NR_truncate64 (__NR_SYSCALL_BASE + 193) +#define __NR_ftruncate64 (__NR_SYSCALL_BASE + 194) +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#define __NR_lstat64 (__NR_SYSCALL_BASE + 196) +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#define __NR_lchown32 (__NR_SYSCALL_BASE + 198) +#define __NR_getuid32 (__NR_SYSCALL_BASE + 199) +#define __NR_getgid32 (__NR_SYSCALL_BASE + 200) +#define __NR_geteuid32 (__NR_SYSCALL_BASE + 201) +#define __NR_getegid32 (__NR_SYSCALL_BASE + 202) +#define __NR_setreuid32 (__NR_SYSCALL_BASE + 203) +#define __NR_setregid32 (__NR_SYSCALL_BASE + 204) +#define __NR_getgroups32 (__NR_SYSCALL_BASE + 205) +#define __NR_setgroups32 (__NR_SYSCALL_BASE + 206) +#define __NR_fchown32 (__NR_SYSCALL_BASE + 207) +#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) +#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209) +#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) +#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211) +#define __NR_chown32 (__NR_SYSCALL_BASE + 212) +#define __NR_setuid32 (__NR_SYSCALL_BASE + 213) +#define __NR_setgid32 (__NR_SYSCALL_BASE + 214) +#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) +#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#define __NR_pivot_root (__NR_SYSCALL_BASE + 218) +#define __NR_mincore (__NR_SYSCALL_BASE + 219) +#define __NR_madvise (__NR_SYSCALL_BASE + 220) +#define __NR_fcntl64 (__NR_SYSCALL_BASE + 221) +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#define __NR_readahead (__NR_SYSCALL_BASE + 225) +#define __NR_setxattr (__NR_SYSCALL_BASE + 226) +#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) +#define __NR_fsetxattr (__NR_SYSCALL_BASE + 228) +#define __NR_getxattr (__NR_SYSCALL_BASE + 229) +#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) +#define __NR_fgetxattr (__NR_SYSCALL_BASE + 231) +#define __NR_listxattr (__NR_SYSCALL_BASE + 232) +#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) +#define __NR_flistxattr (__NR_SYSCALL_BASE + 234) +#define __NR_removexattr (__NR_SYSCALL_BASE + 235) +#define __NR_lremovexattr (__NR_SYSCALL_BASE + 236) +#define __NR_fremovexattr (__NR_SYSCALL_BASE + 237) +#define __NR_tkill (__NR_SYSCALL_BASE + 238) +#define __NR_sendfile64 (__NR_SYSCALL_BASE + 239) +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) +#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) +#define __NR_io_setup (__NR_SYSCALL_BASE + 243) +#define __NR_io_destroy (__NR_SYSCALL_BASE + 244) +#define __NR_io_getevents (__NR_SYSCALL_BASE + 245) +#define __NR_io_submit (__NR_SYSCALL_BASE + 246) +#define __NR_io_cancel (__NR_SYSCALL_BASE + 247) +#define __NR_exit_group (__NR_SYSCALL_BASE + 248) +#define __NR_lookup_dcookie (__NR_SYSCALL_BASE + 249) +#define __NR_epoll_create (__NR_SYSCALL_BASE + 250) +#define __NR_epoll_ctl (__NR_SYSCALL_BASE + 251) +#define __NR_epoll_wait (__NR_SYSCALL_BASE + 252) +#define __NR_remap_file_pages (__NR_SYSCALL_BASE + 253) +#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) +#define __NR_timer_create (__NR_SYSCALL_BASE + 257) +#define __NR_timer_settime (__NR_SYSCALL_BASE + 258) +#define __NR_timer_gettime (__NR_SYSCALL_BASE + 259) +#define __NR_timer_getoverrun (__NR_SYSCALL_BASE + 260) +#define __NR_timer_delete (__NR_SYSCALL_BASE + 261) +#define __NR_clock_settime (__NR_SYSCALL_BASE + 262) +#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) +#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) +#define __NR_clock_nanosleep (__NR_SYSCALL_BASE + 265) +#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) +#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) +#define __NR_tgkill (__NR_SYSCALL_BASE + 268) +#define __NR_utimes (__NR_SYSCALL_BASE + 269) +#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE + 270) +#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE + 271) +#define __NR_pciconfig_read (__NR_SYSCALL_BASE + 272) +#define __NR_pciconfig_write (__NR_SYSCALL_BASE + 273) +#define __NR_mq_open (__NR_SYSCALL_BASE + 274) +#define __NR_mq_unlink (__NR_SYSCALL_BASE + 275) +#define __NR_mq_timedsend (__NR_SYSCALL_BASE + 276) +#define __NR_mq_timedreceive (__NR_SYSCALL_BASE + 277) +#define __NR_mq_notify (__NR_SYSCALL_BASE + 278) +#define __NR_mq_getsetattr (__NR_SYSCALL_BASE + 279) +#define __NR_waitid (__NR_SYSCALL_BASE + 280) +#define __NR_socket (__NR_SYSCALL_BASE + 281) +#define __NR_bind (__NR_SYSCALL_BASE + 282) +#define __NR_connect (__NR_SYSCALL_BASE + 283) +#define __NR_listen (__NR_SYSCALL_BASE + 284) +#define __NR_accept (__NR_SYSCALL_BASE + 285) +#define __NR_getsockname (__NR_SYSCALL_BASE + 286) +#define __NR_getpeername (__NR_SYSCALL_BASE + 287) +#define __NR_socketpair (__NR_SYSCALL_BASE + 288) +#define __NR_send (__NR_SYSCALL_BASE + 289) +#define __NR_sendto (__NR_SYSCALL_BASE + 290) +#define __NR_recv (__NR_SYSCALL_BASE + 291) +#define __NR_recvfrom (__NR_SYSCALL_BASE + 292) +#define __NR_shutdown (__NR_SYSCALL_BASE + 293) +#define __NR_setsockopt (__NR_SYSCALL_BASE + 294) +#define __NR_getsockopt (__NR_SYSCALL_BASE + 295) +#define __NR_sendmsg (__NR_SYSCALL_BASE + 296) +#define __NR_recvmsg (__NR_SYSCALL_BASE + 297) +#define __NR_semop (__NR_SYSCALL_BASE + 298) +#define __NR_semget (__NR_SYSCALL_BASE + 299) +#define __NR_semctl (__NR_SYSCALL_BASE + 300) +#define __NR_msgsnd (__NR_SYSCALL_BASE + 301) +#define __NR_msgrcv (__NR_SYSCALL_BASE + 302) +#define __NR_msgget (__NR_SYSCALL_BASE + 303) +#define __NR_msgctl (__NR_SYSCALL_BASE + 304) +#define __NR_shmat (__NR_SYSCALL_BASE + 305) +#define __NR_shmdt (__NR_SYSCALL_BASE + 306) +#define __NR_shmget (__NR_SYSCALL_BASE + 307) +#define __NR_shmctl (__NR_SYSCALL_BASE + 308) +#define __NR_add_key (__NR_SYSCALL_BASE + 309) +#define __NR_request_key (__NR_SYSCALL_BASE + 310) +#define __NR_keyctl (__NR_SYSCALL_BASE + 311) +#define __NR_semtimedop (__NR_SYSCALL_BASE + 312) +#define __NR_vserver (__NR_SYSCALL_BASE + 313) +#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) +#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) +#define __NR_inotify_init (__NR_SYSCALL_BASE + 316) +#define __NR_inotify_add_watch (__NR_SYSCALL_BASE + 317) +#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE + 318) +#define __NR_mbind (__NR_SYSCALL_BASE + 319) +#define __NR_get_mempolicy (__NR_SYSCALL_BASE + 320) +#define __NR_set_mempolicy (__NR_SYSCALL_BASE + 321) +#define __NR_openat (__NR_SYSCALL_BASE + 322) +#define __NR_mkdirat (__NR_SYSCALL_BASE + 323) +#define __NR_mknodat (__NR_SYSCALL_BASE + 324) +#define __NR_fchownat (__NR_SYSCALL_BASE + 325) +#define __NR_futimesat (__NR_SYSCALL_BASE + 326) +#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327) +#define __NR_unlinkat (__NR_SYSCALL_BASE + 328) +#define __NR_renameat (__NR_SYSCALL_BASE + 329) +#define __NR_linkat (__NR_SYSCALL_BASE + 330) +#define __NR_symlinkat (__NR_SYSCALL_BASE + 331) +#define __NR_readlinkat (__NR_SYSCALL_BASE + 332) +#define __NR_fchmodat (__NR_SYSCALL_BASE + 333) +#define __NR_faccessat (__NR_SYSCALL_BASE + 334) +#define __NR_pselect6 (__NR_SYSCALL_BASE + 335) +#define __NR_ppoll (__NR_SYSCALL_BASE + 336) +#define __NR_unshare (__NR_SYSCALL_BASE + 337) +#define __NR_set_robust_list (__NR_SYSCALL_BASE + 338) +#define __NR_get_robust_list (__NR_SYSCALL_BASE + 339) +#define __NR_splice (__NR_SYSCALL_BASE + 340) +#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE + 341) +#define __NR_tee (__NR_SYSCALL_BASE + 342) +#define __NR_vmsplice (__NR_SYSCALL_BASE + 343) +#define __NR_move_pages (__NR_SYSCALL_BASE + 344) +#define __NR_getcpu (__NR_SYSCALL_BASE + 345) +#define __NR_epoll_pwait (__NR_SYSCALL_BASE + 346) +#define __NR_kexec_load (__NR_SYSCALL_BASE + 347) +#define __NR_utimensat (__NR_SYSCALL_BASE + 348) +#define __NR_signalfd (__NR_SYSCALL_BASE + 349) +#define __NR_timerfd_create (__NR_SYSCALL_BASE + 350) +#define __NR_eventfd (__NR_SYSCALL_BASE + 351) +#define __NR_fallocate (__NR_SYSCALL_BASE + 352) +#define __NR_timerfd_settime (__NR_SYSCALL_BASE + 353) +#define __NR_timerfd_gettime (__NR_SYSCALL_BASE + 354) +#define __NR_signalfd4 (__NR_SYSCALL_BASE + 355) +#define __NR_eventfd2 (__NR_SYSCALL_BASE + 356) +#define __NR_epoll_create1 (__NR_SYSCALL_BASE + 357) +#define __NR_dup3 (__NR_SYSCALL_BASE + 358) +#define __NR_pipe2 (__NR_SYSCALL_BASE + 359) +#define __NR_inotify_init1 (__NR_SYSCALL_BASE + 360) +#define __NR_preadv (__NR_SYSCALL_BASE + 361) +#define __NR_pwritev (__NR_SYSCALL_BASE + 362) +#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363) +#define __NR_perf_event_open (__NR_SYSCALL_BASE + 364) +#define __NR_recvmmsg (__NR_SYSCALL_BASE + 365) +#define __NR_accept4 (__NR_SYSCALL_BASE + 366) +#define __NR_fanotify_init (__NR_SYSCALL_BASE + 367) +#define __NR_fanotify_mark (__NR_SYSCALL_BASE + 368) +#define __NR_prlimit64 (__NR_SYSCALL_BASE + 369) +#define __NR_name_to_handle_at (__NR_SYSCALL_BASE + 370) +#define __NR_open_by_handle_at (__NR_SYSCALL_BASE + 371) +#define __NR_clock_adjtime (__NR_SYSCALL_BASE + 372) +#define __NR_syncfs (__NR_SYSCALL_BASE + 373) +#define __NR_sendmmsg (__NR_SYSCALL_BASE + 374) +#define __NR_setns (__NR_SYSCALL_BASE + 375) +#define __NR_process_vm_readv (__NR_SYSCALL_BASE + 376) +#define __NR_process_vm_writev (__NR_SYSCALL_BASE + 377) +#define __NR_kcmp (__NR_SYSCALL_BASE + 378) +#define __NR_finit_module (__NR_SYSCALL_BASE + 379) +#define __NR_sched_setattr (__NR_SYSCALL_BASE + 380) +#define __NR_sched_getattr (__NR_SYSCALL_BASE + 381) +#define __NR_renameat2 (__NR_SYSCALL_BASE + 382) +#define __NR_seccomp (__NR_SYSCALL_BASE + 383) +#define __NR_getrandom (__NR_SYSCALL_BASE + 384) +#define __NR_memfd_create (__NR_SYSCALL_BASE + 385) +#define __NR_bpf (__NR_SYSCALL_BASE + 386) +#define __NR_execveat (__NR_SYSCALL_BASE + 387) +#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388) +#define __NR_membarrier (__NR_SYSCALL_BASE + 389) +#define __NR_mlock2 (__NR_SYSCALL_BASE + 390) +#define __NR_copy_file_range (__NR_SYSCALL_BASE + 391) +#define __NR_preadv2 (__NR_SYSCALL_BASE + 392) +#define __NR_pwritev2 (__NR_SYSCALL_BASE + 393) +#define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394) +#define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395) +#define __NR_pkey_free (__NR_SYSCALL_BASE + 396) + +#endif /* _ASM_ARM_UNISTD_COMMON_H */ diff --git a/linux-headers/asm-arm/unistd-eabi.h b/linux-headers/asm-arm/unistd-eabi.h new file mode 100644 index 0000000000..266f1fcdfb --- /dev/null +++ b/linux-headers/asm-arm/unistd-eabi.h @@ -0,0 +1,5 @@ +#ifndef _ASM_ARM_UNISTD_EABI_H +#define _ASM_ARM_UNISTD_EABI_H 1 + + +#endif /* _ASM_ARM_UNISTD_EABI_H */ diff --git a/linux-headers/asm-arm/unistd-oabi.h b/linux-headers/asm-arm/unistd-oabi.h new file mode 100644 index 0000000000..47d9afb96d --- /dev/null +++ b/linux-headers/asm-arm/unistd-oabi.h @@ -0,0 +1,17 @@ +#ifndef _ASM_ARM_UNISTD_OABI_H +#define _ASM_ARM_UNISTD_OABI_H 1 + +#define __NR_time (__NR_SYSCALL_BASE + 13) +#define __NR_umount (__NR_SYSCALL_BASE + 22) +#define __NR_stime (__NR_SYSCALL_BASE + 25) +#define __NR_alarm (__NR_SYSCALL_BASE + 27) +#define __NR_utime (__NR_SYSCALL_BASE + 30) +#define __NR_getrlimit (__NR_SYSCALL_BASE + 76) +#define __NR_select (__NR_SYSCALL_BASE + 82) +#define __NR_readdir (__NR_SYSCALL_BASE + 89) +#define __NR_mmap (__NR_SYSCALL_BASE + 90) +#define __NR_socketcall (__NR_SYSCALL_BASE + 102) +#define __NR_syscall (__NR_SYSCALL_BASE + 113) +#define __NR_ipc (__NR_SYSCALL_BASE + 117) + +#endif /* _ASM_ARM_UNISTD_OABI_H */ diff --git a/linux-headers/asm-arm/unistd.h b/linux-headers/asm-arm/unistd.h index ceb5450c81..155571b874 100644 --- a/linux-headers/asm-arm/unistd.h +++ b/linux-headers/asm-arm/unistd.h @@ -17,409 +17,14 @@ #if defined(__thumb__) || defined(__ARM_EABI__) #define __NR_SYSCALL_BASE 0 +#include <asm/unistd-eabi.h> #else #define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE +#include <asm/unistd-oabi.h> #endif -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall (__NR_SYSCALL_BASE+ 0) -#define __NR_exit (__NR_SYSCALL_BASE+ 1) -#define __NR_fork (__NR_SYSCALL_BASE+ 2) -#define __NR_read (__NR_SYSCALL_BASE+ 3) -#define __NR_write (__NR_SYSCALL_BASE+ 4) -#define __NR_open (__NR_SYSCALL_BASE+ 5) -#define __NR_close (__NR_SYSCALL_BASE+ 6) - /* 7 was sys_waitpid */ -#define __NR_creat (__NR_SYSCALL_BASE+ 8) -#define __NR_link (__NR_SYSCALL_BASE+ 9) -#define __NR_unlink (__NR_SYSCALL_BASE+ 10) -#define __NR_execve (__NR_SYSCALL_BASE+ 11) -#define __NR_chdir (__NR_SYSCALL_BASE+ 12) -#define __NR_time (__NR_SYSCALL_BASE+ 13) -#define __NR_mknod (__NR_SYSCALL_BASE+ 14) -#define __NR_chmod (__NR_SYSCALL_BASE+ 15) -#define __NR_lchown (__NR_SYSCALL_BASE+ 16) - /* 17 was sys_break */ - /* 18 was sys_stat */ -#define __NR_lseek (__NR_SYSCALL_BASE+ 19) -#define __NR_getpid (__NR_SYSCALL_BASE+ 20) -#define __NR_mount (__NR_SYSCALL_BASE+ 21) -#define __NR_umount (__NR_SYSCALL_BASE+ 22) -#define __NR_setuid (__NR_SYSCALL_BASE+ 23) -#define __NR_getuid (__NR_SYSCALL_BASE+ 24) -#define __NR_stime (__NR_SYSCALL_BASE+ 25) -#define __NR_ptrace (__NR_SYSCALL_BASE+ 26) -#define __NR_alarm (__NR_SYSCALL_BASE+ 27) - /* 28 was sys_fstat */ -#define __NR_pause (__NR_SYSCALL_BASE+ 29) -#define __NR_utime (__NR_SYSCALL_BASE+ 30) - /* 31 was sys_stty */ - /* 32 was sys_gtty */ -#define __NR_access (__NR_SYSCALL_BASE+ 33) -#define __NR_nice (__NR_SYSCALL_BASE+ 34) - /* 35 was sys_ftime */ -#define __NR_sync (__NR_SYSCALL_BASE+ 36) -#define __NR_kill (__NR_SYSCALL_BASE+ 37) -#define __NR_rename (__NR_SYSCALL_BASE+ 38) -#define __NR_mkdir (__NR_SYSCALL_BASE+ 39) -#define __NR_rmdir (__NR_SYSCALL_BASE+ 40) -#define __NR_dup (__NR_SYSCALL_BASE+ 41) -#define __NR_pipe (__NR_SYSCALL_BASE+ 42) -#define __NR_times (__NR_SYSCALL_BASE+ 43) - /* 44 was sys_prof */ -#define __NR_brk (__NR_SYSCALL_BASE+ 45) -#define __NR_setgid (__NR_SYSCALL_BASE+ 46) -#define __NR_getgid (__NR_SYSCALL_BASE+ 47) - /* 48 was sys_signal */ -#define __NR_geteuid (__NR_SYSCALL_BASE+ 49) -#define __NR_getegid (__NR_SYSCALL_BASE+ 50) -#define __NR_acct (__NR_SYSCALL_BASE+ 51) -#define __NR_umount2 (__NR_SYSCALL_BASE+ 52) - /* 53 was sys_lock */ -#define __NR_ioctl (__NR_SYSCALL_BASE+ 54) -#define __NR_fcntl (__NR_SYSCALL_BASE+ 55) - /* 56 was sys_mpx */ -#define __NR_setpgid (__NR_SYSCALL_BASE+ 57) - /* 58 was sys_ulimit */ - /* 59 was sys_olduname */ -#define __NR_umask (__NR_SYSCALL_BASE+ 60) -#define __NR_chroot (__NR_SYSCALL_BASE+ 61) -#define __NR_ustat (__NR_SYSCALL_BASE+ 62) -#define __NR_dup2 (__NR_SYSCALL_BASE+ 63) -#define __NR_getppid (__NR_SYSCALL_BASE+ 64) -#define __NR_getpgrp (__NR_SYSCALL_BASE+ 65) -#define __NR_setsid (__NR_SYSCALL_BASE+ 66) -#define __NR_sigaction (__NR_SYSCALL_BASE+ 67) - /* 68 was sys_sgetmask */ - /* 69 was sys_ssetmask */ -#define __NR_setreuid (__NR_SYSCALL_BASE+ 70) -#define __NR_setregid (__NR_SYSCALL_BASE+ 71) -#define __NR_sigsuspend (__NR_SYSCALL_BASE+ 72) -#define __NR_sigpending (__NR_SYSCALL_BASE+ 73) -#define __NR_sethostname (__NR_SYSCALL_BASE+ 74) -#define __NR_setrlimit (__NR_SYSCALL_BASE+ 75) -#define __NR_getrlimit (__NR_SYSCALL_BASE+ 76) /* Back compat 2GB limited rlimit */ -#define __NR_getrusage (__NR_SYSCALL_BASE+ 77) -#define __NR_gettimeofday (__NR_SYSCALL_BASE+ 78) -#define __NR_settimeofday (__NR_SYSCALL_BASE+ 79) -#define __NR_getgroups (__NR_SYSCALL_BASE+ 80) -#define __NR_setgroups (__NR_SYSCALL_BASE+ 81) -#define __NR_select (__NR_SYSCALL_BASE+ 82) -#define __NR_symlink (__NR_SYSCALL_BASE+ 83) - /* 84 was sys_lstat */ -#define __NR_readlink (__NR_SYSCALL_BASE+ 85) -#define __NR_uselib (__NR_SYSCALL_BASE+ 86) -#define __NR_swapon (__NR_SYSCALL_BASE+ 87) -#define __NR_reboot (__NR_SYSCALL_BASE+ 88) -#define __NR_readdir (__NR_SYSCALL_BASE+ 89) -#define __NR_mmap (__NR_SYSCALL_BASE+ 90) -#define __NR_munmap (__NR_SYSCALL_BASE+ 91) -#define __NR_truncate (__NR_SYSCALL_BASE+ 92) -#define __NR_ftruncate (__NR_SYSCALL_BASE+ 93) -#define __NR_fchmod (__NR_SYSCALL_BASE+ 94) -#define __NR_fchown (__NR_SYSCALL_BASE+ 95) -#define __NR_getpriority (__NR_SYSCALL_BASE+ 96) -#define __NR_setpriority (__NR_SYSCALL_BASE+ 97) - /* 98 was sys_profil */ -#define __NR_statfs (__NR_SYSCALL_BASE+ 99) -#define __NR_fstatfs (__NR_SYSCALL_BASE+100) - /* 101 was sys_ioperm */ -#define __NR_socketcall (__NR_SYSCALL_BASE+102) -#define __NR_syslog (__NR_SYSCALL_BASE+103) -#define __NR_setitimer (__NR_SYSCALL_BASE+104) -#define __NR_getitimer (__NR_SYSCALL_BASE+105) -#define __NR_stat (__NR_SYSCALL_BASE+106) -#define __NR_lstat (__NR_SYSCALL_BASE+107) -#define __NR_fstat (__NR_SYSCALL_BASE+108) - /* 109 was sys_uname */ - /* 110 was sys_iopl */ -#define __NR_vhangup (__NR_SYSCALL_BASE+111) - /* 112 was sys_idle */ -#define __NR_syscall (__NR_SYSCALL_BASE+113) /* syscall to call a syscall! */ -#define __NR_wait4 (__NR_SYSCALL_BASE+114) -#define __NR_swapoff (__NR_SYSCALL_BASE+115) -#define __NR_sysinfo (__NR_SYSCALL_BASE+116) -#define __NR_ipc (__NR_SYSCALL_BASE+117) -#define __NR_fsync (__NR_SYSCALL_BASE+118) -#define __NR_sigreturn (__NR_SYSCALL_BASE+119) -#define __NR_clone (__NR_SYSCALL_BASE+120) -#define __NR_setdomainname (__NR_SYSCALL_BASE+121) -#define __NR_uname (__NR_SYSCALL_BASE+122) - /* 123 was sys_modify_ldt */ -#define __NR_adjtimex (__NR_SYSCALL_BASE+124) -#define __NR_mprotect (__NR_SYSCALL_BASE+125) -#define __NR_sigprocmask (__NR_SYSCALL_BASE+126) - /* 127 was sys_create_module */ -#define __NR_init_module (__NR_SYSCALL_BASE+128) -#define __NR_delete_module (__NR_SYSCALL_BASE+129) - /* 130 was sys_get_kernel_syms */ -#define __NR_quotactl (__NR_SYSCALL_BASE+131) -#define __NR_getpgid (__NR_SYSCALL_BASE+132) -#define __NR_fchdir (__NR_SYSCALL_BASE+133) -#define __NR_bdflush (__NR_SYSCALL_BASE+134) -#define __NR_sysfs (__NR_SYSCALL_BASE+135) -#define __NR_personality (__NR_SYSCALL_BASE+136) - /* 137 was sys_afs_syscall */ -#define __NR_setfsuid (__NR_SYSCALL_BASE+138) -#define __NR_setfsgid (__NR_SYSCALL_BASE+139) -#define __NR__llseek (__NR_SYSCALL_BASE+140) -#define __NR_getdents (__NR_SYSCALL_BASE+141) -#define __NR__newselect (__NR_SYSCALL_BASE+142) -#define __NR_flock (__NR_SYSCALL_BASE+143) -#define __NR_msync (__NR_SYSCALL_BASE+144) -#define __NR_readv (__NR_SYSCALL_BASE+145) -#define __NR_writev (__NR_SYSCALL_BASE+146) -#define __NR_getsid (__NR_SYSCALL_BASE+147) -#define __NR_fdatasync (__NR_SYSCALL_BASE+148) -#define __NR__sysctl (__NR_SYSCALL_BASE+149) -#define __NR_mlock (__NR_SYSCALL_BASE+150) -#define __NR_munlock (__NR_SYSCALL_BASE+151) -#define __NR_mlockall (__NR_SYSCALL_BASE+152) -#define __NR_munlockall (__NR_SYSCALL_BASE+153) -#define __NR_sched_setparam (__NR_SYSCALL_BASE+154) -#define __NR_sched_getparam (__NR_SYSCALL_BASE+155) -#define __NR_sched_setscheduler (__NR_SYSCALL_BASE+156) -#define __NR_sched_getscheduler (__NR_SYSCALL_BASE+157) -#define __NR_sched_yield (__NR_SYSCALL_BASE+158) -#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE+159) -#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE+160) -#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE+161) -#define __NR_nanosleep (__NR_SYSCALL_BASE+162) -#define __NR_mremap (__NR_SYSCALL_BASE+163) -#define __NR_setresuid (__NR_SYSCALL_BASE+164) -#define __NR_getresuid (__NR_SYSCALL_BASE+165) - /* 166 was sys_vm86 */ - /* 167 was sys_query_module */ -#define __NR_poll (__NR_SYSCALL_BASE+168) -#define __NR_nfsservctl (__NR_SYSCALL_BASE+169) -#define __NR_setresgid (__NR_SYSCALL_BASE+170) -#define __NR_getresgid (__NR_SYSCALL_BASE+171) -#define __NR_prctl (__NR_SYSCALL_BASE+172) -#define __NR_rt_sigreturn (__NR_SYSCALL_BASE+173) -#define __NR_rt_sigaction (__NR_SYSCALL_BASE+174) -#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE+175) -#define __NR_rt_sigpending (__NR_SYSCALL_BASE+176) -#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE+177) -#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE+178) -#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE+179) -#define __NR_pread64 (__NR_SYSCALL_BASE+180) -#define __NR_pwrite64 (__NR_SYSCALL_BASE+181) -#define __NR_chown (__NR_SYSCALL_BASE+182) -#define __NR_getcwd (__NR_SYSCALL_BASE+183) -#define __NR_capget (__NR_SYSCALL_BASE+184) -#define __NR_capset (__NR_SYSCALL_BASE+185) -#define __NR_sigaltstack (__NR_SYSCALL_BASE+186) -#define __NR_sendfile (__NR_SYSCALL_BASE+187) - /* 188 reserved */ - /* 189 reserved */ -#define __NR_vfork (__NR_SYSCALL_BASE+190) -#define __NR_ugetrlimit (__NR_SYSCALL_BASE+191) /* SuS compliant getrlimit */ -#define __NR_mmap2 (__NR_SYSCALL_BASE+192) -#define __NR_truncate64 (__NR_SYSCALL_BASE+193) -#define __NR_ftruncate64 (__NR_SYSCALL_BASE+194) -#define __NR_stat64 (__NR_SYSCALL_BASE+195) -#define __NR_lstat64 (__NR_SYSCALL_BASE+196) -#define __NR_fstat64 (__NR_SYSCALL_BASE+197) -#define __NR_lchown32 (__NR_SYSCALL_BASE+198) -#define __NR_getuid32 (__NR_SYSCALL_BASE+199) -#define __NR_getgid32 (__NR_SYSCALL_BASE+200) -#define __NR_geteuid32 (__NR_SYSCALL_BASE+201) -#define __NR_getegid32 (__NR_SYSCALL_BASE+202) -#define __NR_setreuid32 (__NR_SYSCALL_BASE+203) -#define __NR_setregid32 (__NR_SYSCALL_BASE+204) -#define __NR_getgroups32 (__NR_SYSCALL_BASE+205) -#define __NR_setgroups32 (__NR_SYSCALL_BASE+206) -#define __NR_fchown32 (__NR_SYSCALL_BASE+207) -#define __NR_setresuid32 (__NR_SYSCALL_BASE+208) -#define __NR_getresuid32 (__NR_SYSCALL_BASE+209) -#define __NR_setresgid32 (__NR_SYSCALL_BASE+210) -#define __NR_getresgid32 (__NR_SYSCALL_BASE+211) -#define __NR_chown32 (__NR_SYSCALL_BASE+212) -#define __NR_setuid32 (__NR_SYSCALL_BASE+213) -#define __NR_setgid32 (__NR_SYSCALL_BASE+214) -#define __NR_setfsuid32 (__NR_SYSCALL_BASE+215) -#define __NR_setfsgid32 (__NR_SYSCALL_BASE+216) -#define __NR_getdents64 (__NR_SYSCALL_BASE+217) -#define __NR_pivot_root (__NR_SYSCALL_BASE+218) -#define __NR_mincore (__NR_SYSCALL_BASE+219) -#define __NR_madvise (__NR_SYSCALL_BASE+220) -#define __NR_fcntl64 (__NR_SYSCALL_BASE+221) - /* 222 for tux */ - /* 223 is unused */ -#define __NR_gettid (__NR_SYSCALL_BASE+224) -#define __NR_readahead (__NR_SYSCALL_BASE+225) -#define __NR_setxattr (__NR_SYSCALL_BASE+226) -#define __NR_lsetxattr (__NR_SYSCALL_BASE+227) -#define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -#define __NR_getxattr (__NR_SYSCALL_BASE+229) -#define __NR_lgetxattr (__NR_SYSCALL_BASE+230) -#define __NR_fgetxattr (__NR_SYSCALL_BASE+231) -#define __NR_listxattr (__NR_SYSCALL_BASE+232) -#define __NR_llistxattr (__NR_SYSCALL_BASE+233) -#define __NR_flistxattr (__NR_SYSCALL_BASE+234) -#define __NR_removexattr (__NR_SYSCALL_BASE+235) -#define __NR_lremovexattr (__NR_SYSCALL_BASE+236) -#define __NR_fremovexattr (__NR_SYSCALL_BASE+237) -#define __NR_tkill (__NR_SYSCALL_BASE+238) -#define __NR_sendfile64 (__NR_SYSCALL_BASE+239) -#define __NR_futex (__NR_SYSCALL_BASE+240) -#define __NR_sched_setaffinity (__NR_SYSCALL_BASE+241) -#define __NR_sched_getaffinity (__NR_SYSCALL_BASE+242) -#define __NR_io_setup (__NR_SYSCALL_BASE+243) -#define __NR_io_destroy (__NR_SYSCALL_BASE+244) -#define __NR_io_getevents (__NR_SYSCALL_BASE+245) -#define __NR_io_submit (__NR_SYSCALL_BASE+246) -#define __NR_io_cancel (__NR_SYSCALL_BASE+247) -#define __NR_exit_group (__NR_SYSCALL_BASE+248) -#define __NR_lookup_dcookie (__NR_SYSCALL_BASE+249) -#define __NR_epoll_create (__NR_SYSCALL_BASE+250) -#define __NR_epoll_ctl (__NR_SYSCALL_BASE+251) -#define __NR_epoll_wait (__NR_SYSCALL_BASE+252) -#define __NR_remap_file_pages (__NR_SYSCALL_BASE+253) - /* 254 for set_thread_area */ - /* 255 for get_thread_area */ -#define __NR_set_tid_address (__NR_SYSCALL_BASE+256) -#define __NR_timer_create (__NR_SYSCALL_BASE+257) -#define __NR_timer_settime (__NR_SYSCALL_BASE+258) -#define __NR_timer_gettime (__NR_SYSCALL_BASE+259) -#define __NR_timer_getoverrun (__NR_SYSCALL_BASE+260) -#define __NR_timer_delete (__NR_SYSCALL_BASE+261) -#define __NR_clock_settime (__NR_SYSCALL_BASE+262) -#define __NR_clock_gettime (__NR_SYSCALL_BASE+263) -#define __NR_clock_getres (__NR_SYSCALL_BASE+264) -#define __NR_clock_nanosleep (__NR_SYSCALL_BASE+265) -#define __NR_statfs64 (__NR_SYSCALL_BASE+266) -#define __NR_fstatfs64 (__NR_SYSCALL_BASE+267) -#define __NR_tgkill (__NR_SYSCALL_BASE+268) -#define __NR_utimes (__NR_SYSCALL_BASE+269) -#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE+270) -#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE+271) -#define __NR_pciconfig_read (__NR_SYSCALL_BASE+272) -#define __NR_pciconfig_write (__NR_SYSCALL_BASE+273) -#define __NR_mq_open (__NR_SYSCALL_BASE+274) -#define __NR_mq_unlink (__NR_SYSCALL_BASE+275) -#define __NR_mq_timedsend (__NR_SYSCALL_BASE+276) -#define __NR_mq_timedreceive (__NR_SYSCALL_BASE+277) -#define __NR_mq_notify (__NR_SYSCALL_BASE+278) -#define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) -#define __NR_waitid (__NR_SYSCALL_BASE+280) -#define __NR_socket (__NR_SYSCALL_BASE+281) -#define __NR_bind (__NR_SYSCALL_BASE+282) -#define __NR_connect (__NR_SYSCALL_BASE+283) -#define __NR_listen (__NR_SYSCALL_BASE+284) -#define __NR_accept (__NR_SYSCALL_BASE+285) -#define __NR_getsockname (__NR_SYSCALL_BASE+286) -#define __NR_getpeername (__NR_SYSCALL_BASE+287) -#define __NR_socketpair (__NR_SYSCALL_BASE+288) -#define __NR_send (__NR_SYSCALL_BASE+289) -#define __NR_sendto (__NR_SYSCALL_BASE+290) -#define __NR_recv (__NR_SYSCALL_BASE+291) -#define __NR_recvfrom (__NR_SYSCALL_BASE+292) -#define __NR_shutdown (__NR_SYSCALL_BASE+293) -#define __NR_setsockopt (__NR_SYSCALL_BASE+294) -#define __NR_getsockopt (__NR_SYSCALL_BASE+295) -#define __NR_sendmsg (__NR_SYSCALL_BASE+296) -#define __NR_recvmsg (__NR_SYSCALL_BASE+297) -#define __NR_semop (__NR_SYSCALL_BASE+298) -#define __NR_semget (__NR_SYSCALL_BASE+299) -#define __NR_semctl (__NR_SYSCALL_BASE+300) -#define __NR_msgsnd (__NR_SYSCALL_BASE+301) -#define __NR_msgrcv (__NR_SYSCALL_BASE+302) -#define __NR_msgget (__NR_SYSCALL_BASE+303) -#define __NR_msgctl (__NR_SYSCALL_BASE+304) -#define __NR_shmat (__NR_SYSCALL_BASE+305) -#define __NR_shmdt (__NR_SYSCALL_BASE+306) -#define __NR_shmget (__NR_SYSCALL_BASE+307) -#define __NR_shmctl (__NR_SYSCALL_BASE+308) -#define __NR_add_key (__NR_SYSCALL_BASE+309) -#define __NR_request_key (__NR_SYSCALL_BASE+310) -#define __NR_keyctl (__NR_SYSCALL_BASE+311) -#define __NR_semtimedop (__NR_SYSCALL_BASE+312) -#define __NR_vserver (__NR_SYSCALL_BASE+313) -#define __NR_ioprio_set (__NR_SYSCALL_BASE+314) -#define __NR_ioprio_get (__NR_SYSCALL_BASE+315) -#define __NR_inotify_init (__NR_SYSCALL_BASE+316) -#define __NR_inotify_add_watch (__NR_SYSCALL_BASE+317) -#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE+318) -#define __NR_mbind (__NR_SYSCALL_BASE+319) -#define __NR_get_mempolicy (__NR_SYSCALL_BASE+320) -#define __NR_set_mempolicy (__NR_SYSCALL_BASE+321) -#define __NR_openat (__NR_SYSCALL_BASE+322) -#define __NR_mkdirat (__NR_SYSCALL_BASE+323) -#define __NR_mknodat (__NR_SYSCALL_BASE+324) -#define __NR_fchownat (__NR_SYSCALL_BASE+325) -#define __NR_futimesat (__NR_SYSCALL_BASE+326) -#define __NR_fstatat64 (__NR_SYSCALL_BASE+327) -#define __NR_unlinkat (__NR_SYSCALL_BASE+328) -#define __NR_renameat (__NR_SYSCALL_BASE+329) -#define __NR_linkat (__NR_SYSCALL_BASE+330) -#define __NR_symlinkat (__NR_SYSCALL_BASE+331) -#define __NR_readlinkat (__NR_SYSCALL_BASE+332) -#define __NR_fchmodat (__NR_SYSCALL_BASE+333) -#define __NR_faccessat (__NR_SYSCALL_BASE+334) -#define __NR_pselect6 (__NR_SYSCALL_BASE+335) -#define __NR_ppoll (__NR_SYSCALL_BASE+336) -#define __NR_unshare (__NR_SYSCALL_BASE+337) -#define __NR_set_robust_list (__NR_SYSCALL_BASE+338) -#define __NR_get_robust_list (__NR_SYSCALL_BASE+339) -#define __NR_splice (__NR_SYSCALL_BASE+340) -#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE+341) +#include <asm/unistd-common.h> #define __NR_sync_file_range2 __NR_arm_sync_file_range -#define __NR_tee (__NR_SYSCALL_BASE+342) -#define __NR_vmsplice (__NR_SYSCALL_BASE+343) -#define __NR_move_pages (__NR_SYSCALL_BASE+344) -#define __NR_getcpu (__NR_SYSCALL_BASE+345) -#define __NR_epoll_pwait (__NR_SYSCALL_BASE+346) -#define __NR_kexec_load (__NR_SYSCALL_BASE+347) -#define __NR_utimensat (__NR_SYSCALL_BASE+348) -#define __NR_signalfd (__NR_SYSCALL_BASE+349) -#define __NR_timerfd_create (__NR_SYSCALL_BASE+350) -#define __NR_eventfd (__NR_SYSCALL_BASE+351) -#define __NR_fallocate (__NR_SYSCALL_BASE+352) -#define __NR_timerfd_settime (__NR_SYSCALL_BASE+353) -#define __NR_timerfd_gettime (__NR_SYSCALL_BASE+354) -#define __NR_signalfd4 (__NR_SYSCALL_BASE+355) -#define __NR_eventfd2 (__NR_SYSCALL_BASE+356) -#define __NR_epoll_create1 (__NR_SYSCALL_BASE+357) -#define __NR_dup3 (__NR_SYSCALL_BASE+358) -#define __NR_pipe2 (__NR_SYSCALL_BASE+359) -#define __NR_inotify_init1 (__NR_SYSCALL_BASE+360) -#define __NR_preadv (__NR_SYSCALL_BASE+361) -#define __NR_pwritev (__NR_SYSCALL_BASE+362) -#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) -#define __NR_perf_event_open (__NR_SYSCALL_BASE+364) -#define __NR_recvmmsg (__NR_SYSCALL_BASE+365) -#define __NR_accept4 (__NR_SYSCALL_BASE+366) -#define __NR_fanotify_init (__NR_SYSCALL_BASE+367) -#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) -#define __NR_prlimit64 (__NR_SYSCALL_BASE+369) -#define __NR_name_to_handle_at (__NR_SYSCALL_BASE+370) -#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371) -#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372) -#define __NR_syncfs (__NR_SYSCALL_BASE+373) -#define __NR_sendmmsg (__NR_SYSCALL_BASE+374) -#define __NR_setns (__NR_SYSCALL_BASE+375) -#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) -#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) -#define __NR_kcmp (__NR_SYSCALL_BASE+378) -#define __NR_finit_module (__NR_SYSCALL_BASE+379) -#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) -#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) -#define __NR_renameat2 (__NR_SYSCALL_BASE+382) -#define __NR_seccomp (__NR_SYSCALL_BASE+383) -#define __NR_getrandom (__NR_SYSCALL_BASE+384) -#define __NR_memfd_create (__NR_SYSCALL_BASE+385) -#define __NR_bpf (__NR_SYSCALL_BASE+386) -#define __NR_execveat (__NR_SYSCALL_BASE+387) -#define __NR_userfaultfd (__NR_SYSCALL_BASE+388) -#define __NR_membarrier (__NR_SYSCALL_BASE+389) -#define __NR_mlock2 (__NR_SYSCALL_BASE+390) -#define __NR_copy_file_range (__NR_SYSCALL_BASE+391) -#define __NR_preadv2 (__NR_SYSCALL_BASE+392) -#define __NR_pwritev2 (__NR_SYSCALL_BASE+393) /* * The following SWIs are ARM private. @@ -431,22 +36,4 @@ #define __ARM_NR_usr32 (__ARM_NR_BASE+4) #define __ARM_NR_set_tls (__ARM_NR_BASE+5) -/* - * The following syscalls are obsolete and no longer available for EABI. - */ -#if defined(__ARM_EABI__) -#undef __NR_time -#undef __NR_umount -#undef __NR_stime -#undef __NR_alarm -#undef __NR_utime -#undef __NR_getrlimit -#undef __NR_select -#undef __NR_readdir -#undef __NR_mmap -#undef __NR_socketcall -#undef __NR_syscall -#undef __NR_ipc -#endif - #endif /* __ASM_ARM_UNISTD_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index fd5a2761a5..651ec30040 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -201,10 +201,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* Device Control API on vcpu fd */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index c93cf35ce3..4edbe4bb0e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -413,6 +413,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -573,6 +593,10 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) +/* POWER9 registers */ +#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) +#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ @@ -596,6 +620,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ @@ -608,5 +633,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h index 1e66eba4c6..598043c7b6 100644 --- a/linux-headers/asm-powerpc/unistd.h +++ b/linux-headers/asm-powerpc/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h index e41c5c1a28..3a5397988e 100644 --- a/linux-headers/asm-x86/kvm_para.h +++ b/linux-headers/asm-x86/kvm_para.h @@ -45,7 +45,18 @@ struct kvm_steal_time { __u64 steal; __u32 version; __u32 flags; - __u32 pad[12]; + __u8 preempted; + __u8 u8_pad[3]; + __u32 pad[11]; +}; + +#define KVM_CLOCK_PAIRING_WALLCLOCK 0 +struct kvm_clock_pairing { + __s64 sec; + __s64 nsec; + __u64 tsc; + __u32 flags; + __u32 pad[9]; }; #define KVM_STEAL_ALIGNMENT_BITS 5 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index bb0ed71223..4e082a81b4 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -218,7 +218,8 @@ struct kvm_hyperv_exit { struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 immediate_exit; + __u8 padding1[6]; /* out */ __u32 exit_reason; @@ -651,6 +652,9 @@ struct kvm_enable_cap { }; /* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + struct kvm_ppc_pvinfo { /* out */ __u32 flags; @@ -682,7 +686,12 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; #define KVMIO 0xAE @@ -870,6 +879,10 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_INSTR0 130 #define KVM_CAP_MSI_DEVID 131 #define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_SPAPR_RESIZE_HPT 133 +#define KVM_CAP_PPC_MMU_RADIX 134 +#define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 #ifdef KVM_CAP_IRQ_ROUTING @@ -1186,6 +1199,13 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) /* Available with KVM_CAP_PPC_RTAS */ #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) +/* Available with KVM_CAP_SPAPR_RESIZE_HPT */ +#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) +#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) +/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) +/* Available with KVM_CAP_PPC_RADIX_MMU */ +#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h index e61661edf3..15b24ff6cf 100644 --- a/linux-headers/linux/kvm_para.h +++ b/linux-headers/linux/kvm_para.h @@ -14,6 +14,7 @@ #define KVM_EFAULT EFAULT #define KVM_E2BIG E2BIG #define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 @@ -23,6 +24,7 @@ #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 #define KVM_HC_MIPS_EXIT_VM 7 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 /* * hypercalls use architecture specific diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 19e8453249..2ed5dc3775 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -11,13 +11,18 @@ #include <linux/types.h> -#define UFFD_API ((__u64)0xAA) /* - * After implementing the respective features it will become: - * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ - * UFFD_FEATURE_EVENT_FORK) + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In + * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ + * means the userland is reading). */ -#define UFFD_API_FEATURES (0) +#define UFFD_API ((__u64)0xAA) +#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ + UFFD_FEATURE_EVENT_REMAP | \ + UFFD_FEATURE_EVENT_MADVDONTNEED | \ + UFFD_FEATURE_MISSING_HUGETLBFS | \ + UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -26,6 +31,9 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE) +#define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY) /* * Valid ioctl command number range with this API is from 0x00 to @@ -72,6 +80,21 @@ struct uffd_msg { } pagefault; struct { + __u32 ufd; + } fork; + + struct { + __u64 from; + __u64 to; + __u64 len; + } remap; + + struct { + __u64 start; + __u64 end; + } madv_dn; + + struct { /* unused reserved fields */ __u64 reserved1; __u64 reserved2; @@ -84,9 +107,9 @@ struct uffd_msg { * Start at 0x12 and not at 0 to be more strict against bugs. */ #define UFFD_EVENT_PAGEFAULT 0x12 -#if 0 /* not available yet */ #define UFFD_EVENT_FORK 0x13 -#endif +#define UFFD_EVENT_REMAP 0x14 +#define UFFD_EVENT_MADVDONTNEED 0x15 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -104,11 +127,37 @@ struct uffdio_api { * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE * are to be considered implicitly always enabled in all kernels as * long as the uffdio_api.api requested matches UFFD_API. + * + * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER + * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on + * hugetlbfs virtual memory ranges. Adding or not adding + * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has + * no real functional effect after UFFDIO_API returns, but + * it's only useful for an initial feature set probe at + * UFFDIO_API time. There are two ways to use it: + * + * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the + * uffdio_api.features before calling UFFDIO_API, an error + * will be returned by UFFDIO_API on a kernel without + * hugetlbfs missing support + * + * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in + * uffdio_api.features and instead it will be set by the + * kernel in the uffdio_api.features if the kernel supports + * it, so userland can later check if the feature flag is + * present in uffdio_api.features after UFFDIO_API + * succeeded. + * + * UFFD_FEATURE_MISSING_SHMEM works the same as + * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem + * (i.e. tmpfs and other shmem based APIs). */ -#if 0 /* not available yet */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) -#endif +#define UFFD_FEATURE_EVENT_REMAP (1<<2) +#define UFFD_FEATURE_EVENT_MADVDONTNEED (1<<3) +#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) +#define UFFD_FEATURE_MISSING_SHMEM (1<<5) __u64 features; __u64 ioctls; diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 759b850a3e..531cb2eda9 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -203,6 +203,16 @@ struct vfio_device_info { }; #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) +/* + * Vendor driver using Mediated device framework should provide device_api + * attribute in supported type attribute groups. Device API string should be one + * of the following corresponding to device flags in vfio_device_info structure. + */ + +#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" +#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" +#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) diff --git a/linux-user/main.c b/linux-user/main.c index 9645122aa6..10a3bb3a12 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -574,6 +574,7 @@ void cpu_loop(CPUARMState *env) switch(trapnr) { case EXCP_UDEF: case EXCP_NOCP: + case EXCP_INVSTATE: { TaskState *ts = cs->opaque; uint32_t opcode; diff --git a/linux-user/signal.c b/linux-user/signal.c index 8209539555..a67db04e1a 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -254,7 +254,7 @@ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset) } #if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \ - !defined(TARGET_X86_64) && !defined(TARGET_NIOS2) + !defined(TARGET_NIOS2) /* Just set the guest's signal mask to the specified value; the * caller is assumed to have called block_signals() already. */ @@ -512,7 +512,7 @@ void signal_init(void) } } -#if !(defined(TARGET_X86_64) || defined(TARGET_UNICORE32)) +#ifndef TARGET_UNICORE32 /* Force a synchronously taken signal. The kernel force_sig() function * also forces the signal to "not blocked, not ignored", but for QEMU * that work is done in process_pending_signals(). @@ -819,9 +819,8 @@ int do_sigaction(int sig, const struct target_sigaction *act, return ret; } -#if defined(TARGET_I386) && TARGET_ABI_BITS == 32 - -/* from the Linux kernel */ +#if defined(TARGET_I386) +/* from the Linux kernel - /arch/x86/include/uapi/asm/sigcontext.h */ struct target_fpreg { uint16_t significand[4]; @@ -835,58 +834,120 @@ struct target_fpxreg { }; struct target_xmmreg { - abi_ulong element[4]; + uint32_t element[4]; }; -struct target_fpstate { +struct target_fpstate_32 { /* Regular FPU environment */ - abi_ulong cw; - abi_ulong sw; - abi_ulong tag; - abi_ulong ipoff; - abi_ulong cssel; - abi_ulong dataoff; - abi_ulong datasel; - struct target_fpreg _st[8]; + uint32_t cw; + uint32_t sw; + uint32_t tag; + uint32_t ipoff; + uint32_t cssel; + uint32_t dataoff; + uint32_t datasel; + struct target_fpreg st[8]; uint16_t status; uint16_t magic; /* 0xffff = regular FPU data only */ /* FXSR FPU environment */ - abi_ulong _fxsr_env[6]; /* FXSR FPU env is ignored */ - abi_ulong mxcsr; - abi_ulong reserved; - struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ - struct target_xmmreg _xmm[8]; - abi_ulong padding[56]; + uint32_t _fxsr_env[6]; /* FXSR FPU env is ignored */ + uint32_t mxcsr; + uint32_t reserved; + struct target_fpxreg fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct target_xmmreg xmm[8]; + uint32_t padding[56]; }; -#define X86_FXSR_MAGIC 0x0000 +struct target_fpstate_64 { + /* FXSAVE format */ + uint16_t cw; + uint16_t sw; + uint16_t twd; + uint16_t fop; + uint64_t rip; + uint64_t rdp; + uint32_t mxcsr; + uint32_t mxcsr_mask; + uint32_t st_space[32]; + uint32_t xmm_space[64]; + uint32_t reserved[24]; +}; -struct target_sigcontext { +#ifndef TARGET_X86_64 +# define target_fpstate target_fpstate_32 +#else +# define target_fpstate target_fpstate_64 +#endif + +struct target_sigcontext_32 { uint16_t gs, __gsh; uint16_t fs, __fsh; uint16_t es, __esh; uint16_t ds, __dsh; - abi_ulong edi; - abi_ulong esi; - abi_ulong ebp; - abi_ulong esp; - abi_ulong ebx; - abi_ulong edx; - abi_ulong ecx; - abi_ulong eax; - abi_ulong trapno; - abi_ulong err; - abi_ulong eip; + uint32_t edi; + uint32_t esi; + uint32_t ebp; + uint32_t esp; + uint32_t ebx; + uint32_t edx; + uint32_t ecx; + uint32_t eax; + uint32_t trapno; + uint32_t err; + uint32_t eip; uint16_t cs, __csh; - abi_ulong eflags; - abi_ulong esp_at_signal; + uint32_t eflags; + uint32_t esp_at_signal; uint16_t ss, __ssh; - abi_ulong fpstate; /* pointer */ - abi_ulong oldmask; - abi_ulong cr2; + uint32_t fpstate; /* pointer */ + uint32_t oldmask; + uint32_t cr2; +}; + +struct target_sigcontext_64 { + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rdi; + uint64_t rsi; + uint64_t rbp; + uint64_t rbx; + uint64_t rdx; + uint64_t rax; + uint64_t rcx; + uint64_t rsp; + uint64_t rip; + + uint64_t eflags; + + uint16_t cs; + uint16_t gs; + uint16_t fs; + uint16_t ss; + + uint64_t err; + uint64_t trapno; + uint64_t oldmask; + uint64_t cr2; + + uint64_t fpstate; /* pointer */ + uint64_t padding[8]; }; +#ifndef TARGET_X86_64 +# define target_sigcontext target_sigcontext_32 +#else +# define target_sigcontext target_sigcontext_64 +#endif + +/* see Linux/include/uapi/asm-generic/ucontext.h */ struct target_ucontext { abi_ulong tuc_flags; abi_ulong tuc_link; @@ -895,8 +956,8 @@ struct target_ucontext { target_sigset_t tuc_sigmask; /* mask last for extensibility */ }; -struct sigframe -{ +#ifndef TARGET_X86_64 +struct sigframe { abi_ulong pretcode; int sig; struct target_sigcontext sc; @@ -905,8 +966,7 @@ struct sigframe char retcode[8]; }; -struct rt_sigframe -{ +struct rt_sigframe { abi_ulong pretcode; int sig; abi_ulong pinfo; @@ -917,6 +977,17 @@ struct rt_sigframe char retcode[8]; }; +#else + +struct rt_sigframe { + abi_ulong pretcode; + struct target_ucontext uc; + struct target_siginfo info; + struct target_fpstate fpstate; +}; + +#endif + /* * Set up a signal frame. */ @@ -927,6 +998,7 @@ static void setup_sigcontext(struct target_sigcontext *sc, abi_ulong fpstate_addr) { CPUState *cs = CPU(x86_env_get_cpu(env)); +#ifndef TARGET_X86_64 uint16_t magic; /* already locked in setup_frame() */ @@ -959,6 +1031,44 @@ static void setup_sigcontext(struct target_sigcontext *sc, /* non-iBCS2 extensions.. */ __put_user(mask, &sc->oldmask); __put_user(env->cr[2], &sc->cr2); +#else + __put_user(env->regs[R_EDI], &sc->rdi); + __put_user(env->regs[R_ESI], &sc->rsi); + __put_user(env->regs[R_EBP], &sc->rbp); + __put_user(env->regs[R_ESP], &sc->rsp); + __put_user(env->regs[R_EBX], &sc->rbx); + __put_user(env->regs[R_EDX], &sc->rdx); + __put_user(env->regs[R_ECX], &sc->rcx); + __put_user(env->regs[R_EAX], &sc->rax); + + __put_user(env->regs[8], &sc->r8); + __put_user(env->regs[9], &sc->r9); + __put_user(env->regs[10], &sc->r10); + __put_user(env->regs[11], &sc->r11); + __put_user(env->regs[12], &sc->r12); + __put_user(env->regs[13], &sc->r13); + __put_user(env->regs[14], &sc->r14); + __put_user(env->regs[15], &sc->r15); + + __put_user(cs->exception_index, &sc->trapno); + __put_user(env->error_code, &sc->err); + __put_user(env->eip, &sc->rip); + + __put_user(env->eflags, &sc->eflags); + __put_user(env->segs[R_CS].selector, &sc->cs); + __put_user((uint16_t)0, &sc->gs); + __put_user((uint16_t)0, &sc->fs); + __put_user(env->segs[R_SS].selector, &sc->ss); + + __put_user(mask, &sc->oldmask); + __put_user(env->cr[2], &sc->cr2); + + /* fpstate_addr must be 16 byte aligned for fxsave */ + assert(!(fpstate_addr & 0xf)); + + cpu_x86_fxsave(env, fpstate_addr); + __put_user(fpstate_addr, &sc->fpstate); +#endif } /* @@ -972,23 +1082,34 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size) /* Default to using normal stack */ esp = env->regs[R_ESP]; +#ifdef TARGET_X86_64 + esp -= 128; /* this is the redzone */ +#endif + /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa_flags & TARGET_SA_ONSTACK) { if (sas_ss_flags(esp) == 0) { esp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size; } } else { - +#ifndef TARGET_X86_64 /* This is the legacy signal stack switching. */ if ((env->segs[R_SS].selector & 0xffff) != __USER_DS && !(ka->sa_flags & TARGET_SA_RESTORER) && ka->sa_restorer) { esp = (unsigned long) ka->sa_restorer; } +#endif } + +#ifndef TARGET_X86_64 return (esp - frame_size) & -8ul; +#else + return ((esp - frame_size) & (~15ul)) - 8; +#endif } +#ifndef TARGET_X86_64 /* compare linux/arch/i386/kernel/signal.c:setup_frame() */ static void setup_frame(int sig, struct target_sigaction *ka, target_sigset_t *set, CPUX86State *env) @@ -1029,7 +1150,6 @@ static void setup_frame(int sig, struct target_sigaction *ka, __put_user(val16, (uint16_t *)(frame->retcode+6)); } - /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; env->eip = ka->_sa_handler; @@ -1047,13 +1167,17 @@ static void setup_frame(int sig, struct target_sigaction *ka, give_sigsegv: force_sigsegv(sig); } +#endif -/* compare linux/arch/i386/kernel/signal.c:setup_rt_frame() */ +/* compare linux/arch/x86/kernel/signal.c:setup_rt_frame() */ static void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, target_sigset_t *set, CPUX86State *env) { - abi_ulong frame_addr, addr; + abi_ulong frame_addr; +#ifndef TARGET_X86_64 + abi_ulong addr; +#endif struct rt_sigframe *frame; int i; @@ -1063,12 +1187,17 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; + /* These fields are only in rt_sigframe on 32 bit */ +#ifndef TARGET_X86_64 __put_user(sig, &frame->sig); addr = frame_addr + offsetof(struct rt_sigframe, info); __put_user(addr, &frame->pinfo); addr = frame_addr + offsetof(struct rt_sigframe, uc); __put_user(addr, &frame->puc); - tswap_siginfo(&frame->info, info); +#endif + if (ka->sa_flags & TARGET_SA_SIGINFO) { + tswap_siginfo(&frame->info, info); + } /* Create the ucontext. */ __put_user(0, &frame->uc.tuc_flags); @@ -1087,6 +1216,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ +#ifndef TARGET_X86_64 if (ka->sa_flags & TARGET_SA_RESTORER) { __put_user(ka->sa_restorer, &frame->pretcode); } else { @@ -1099,15 +1229,31 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, val16 = 0x80cd; __put_user(val16, (uint16_t *)(frame->retcode+5)); } +#else + /* XXX: Would be slightly better to return -EFAULT here if test fails + assert(ka->sa_flags & TARGET_SA_RESTORER); */ + __put_user(ka->sa_restorer, &frame->pretcode); +#endif /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; env->eip = ka->_sa_handler; +#ifndef TARGET_X86_64 + env->regs[R_EAX] = sig; + env->regs[R_EDX] = (unsigned long)&frame->info; + env->regs[R_ECX] = (unsigned long)&frame->uc; +#else + env->regs[R_EAX] = 0; + env->regs[R_EDI] = sig; + env->regs[R_ESI] = (unsigned long)&frame->info; + env->regs[R_EDX] = (unsigned long)&frame->uc; +#endif + cpu_x86_load_seg(env, R_DS, __USER_DS); cpu_x86_load_seg(env, R_ES, __USER_DS); - cpu_x86_load_seg(env, R_SS, __USER_DS); cpu_x86_load_seg(env, R_CS, __USER_CS); + cpu_x86_load_seg(env, R_SS, __USER_DS); env->eflags &= ~TF_MASK; unlock_user_struct(frame, frame_addr, 1); @@ -1125,6 +1271,7 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) abi_ulong fpstate_addr; unsigned int tmpflags; +#ifndef TARGET_X86_64 cpu_x86_load_seg(env, R_GS, tswap16(sc->gs)); cpu_x86_load_seg(env, R_FS, tswap16(sc->fs)); cpu_x86_load_seg(env, R_ES, tswap16(sc->es)); @@ -1138,7 +1285,29 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) env->regs[R_EDX] = tswapl(sc->edx); env->regs[R_ECX] = tswapl(sc->ecx); env->regs[R_EAX] = tswapl(sc->eax); + env->eip = tswapl(sc->eip); +#else + env->regs[8] = tswapl(sc->r8); + env->regs[9] = tswapl(sc->r9); + env->regs[10] = tswapl(sc->r10); + env->regs[11] = tswapl(sc->r11); + env->regs[12] = tswapl(sc->r12); + env->regs[13] = tswapl(sc->r13); + env->regs[14] = tswapl(sc->r14); + env->regs[15] = tswapl(sc->r15); + + env->regs[R_EDI] = tswapl(sc->rdi); + env->regs[R_ESI] = tswapl(sc->rsi); + env->regs[R_EBP] = tswapl(sc->rbp); + env->regs[R_EBX] = tswapl(sc->rbx); + env->regs[R_EDX] = tswapl(sc->rdx); + env->regs[R_EAX] = tswapl(sc->rax); + env->regs[R_ECX] = tswapl(sc->rcx); + env->regs[R_ESP] = tswapl(sc->rsp); + + env->eip = tswapl(sc->rip); +#endif cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3); cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3); @@ -1152,7 +1321,11 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) if (!access_ok(VERIFY_READ, fpstate_addr, sizeof(struct target_fpstate))) goto badframe; +#ifndef TARGET_X86_64 cpu_x86_frstor(env, fpstate_addr, 1); +#else + cpu_x86_fxrstor(env, fpstate_addr); +#endif } return err; @@ -1160,6 +1333,8 @@ badframe: return 1; } +/* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */ +#ifndef TARGET_X86_64 long do_sigreturn(CPUX86State *env) { struct sigframe *frame; @@ -1191,6 +1366,7 @@ badframe: force_sig(TARGET_SIGSEGV); return -TARGET_QEMU_ESIGRETURN; } +#endif long do_rt_sigreturn(CPUX86State *env) { @@ -1198,7 +1374,7 @@ long do_rt_sigreturn(CPUX86State *env) struct rt_sigframe *frame; sigset_t set; - frame_addr = env->regs[R_ESP] - 4; + frame_addr = env->regs[R_ESP] - sizeof(abi_ulong); trace_user_do_rt_sigreturn(env, frame_addr); if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; @@ -5500,6 +5676,7 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc, CPUM68KState *env) { target_greg_t *gregs = uc->tuc_mcontext.gregs; + uint32_t sr = cpu_m68k_get_ccr(env); __put_user(TARGET_MCONTEXT_VERSION, &uc->tuc_mcontext.version); __put_user(env->dregs[0], &gregs[0]); @@ -5519,7 +5696,7 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc, __put_user(env->aregs[6], &gregs[14]); __put_user(env->aregs[7], &gregs[15]); __put_user(env->pc, &gregs[16]); - __put_user(env->sr, &gregs[17]); + __put_user(sr, &gregs[17]); return 0; } @@ -5553,7 +5730,7 @@ static inline int target_rt_restore_ucontext(CPUM68KState *env, __get_user(env->aregs[7], &gregs[15]); __get_user(env->pc, &gregs[16]); __get_user(temp, &gregs[17]); - env->sr = (env->sr & 0xff00) | (temp & 0xff); + cpu_m68k_set_ccr(env, temp); return 0; @@ -5674,14 +5851,13 @@ long do_rt_sigreturn(CPUM68KState *env) { struct target_rt_sigframe *frame; abi_ulong frame_addr = env->aregs[7] - 4; - target_sigset_t target_set; sigset_t set; trace_user_do_rt_sigreturn(env, frame_addr); if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; - target_to_host_sigset_internal(&set, &target_set); + target_to_host_sigset(&set, &frame->uc.tuc_sigmask); set_sigmask(&set); /* restore registers */ @@ -6418,7 +6594,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig, #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \ || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX) \ || defined(TARGET_PPC64) || defined(TARGET_HPPA) \ - || defined(TARGET_NIOS2) + || defined(TARGET_NIOS2) || defined(TARGET_X86_64) /* These targets do not have traditional signals. */ setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env); #else diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f569f827fc..cec8428589 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -57,6 +57,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include <netinet/tcp.h> #include <linux/wireless.h> #include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/errqueue.h> #include "qemu-common.h" #ifdef CONFIG_TIMERFD #include <sys/timerfd.h> @@ -1634,6 +1636,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr, struct sockaddr_ll *target_ll = (struct sockaddr_ll *)target_saddr; target_ll->sll_ifindex = tswap32(target_ll->sll_ifindex); target_ll->sll_hatype = tswap16(target_ll->sll_hatype); + } else if (addr->sa_family == AF_INET6 && + len >= sizeof(struct target_sockaddr_in6)) { + struct target_sockaddr_in6 *target_in6 = + (struct target_sockaddr_in6 *)target_saddr; + target_in6->sin6_scope_id = tswap16(target_in6->sin6_scope_id); } unlock_user(target_saddr, target_addr, len); @@ -1839,6 +1846,78 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, } break; + case SOL_IP: + switch (cmsg->cmsg_type) { + case IP_TTL: + { + uint32_t *v = (uint32_t *)data; + uint32_t *t_int = (uint32_t *)target_data; + + __put_user(*v, t_int); + break; + } + case IP_RECVERR: + { + struct errhdr_t { + struct sock_extended_err ee; + struct sockaddr_in offender; + }; + struct errhdr_t *errh = (struct errhdr_t *)data; + struct errhdr_t *target_errh = + (struct errhdr_t *)target_data; + + __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno); + __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin); + __put_user(errh->ee.ee_type, &target_errh->ee.ee_type); + __put_user(errh->ee.ee_code, &target_errh->ee.ee_code); + __put_user(errh->ee.ee_pad, &target_errh->ee.ee_pad); + __put_user(errh->ee.ee_info, &target_errh->ee.ee_info); + __put_user(errh->ee.ee_data, &target_errh->ee.ee_data); + host_to_target_sockaddr((unsigned long) &target_errh->offender, + (void *) &errh->offender, sizeof(errh->offender)); + break; + } + default: + goto unimplemented; + } + break; + + case SOL_IPV6: + switch (cmsg->cmsg_type) { + case IPV6_HOPLIMIT: + { + uint32_t *v = (uint32_t *)data; + uint32_t *t_int = (uint32_t *)target_data; + + __put_user(*v, t_int); + break; + } + case IPV6_RECVERR: + { + struct errhdr6_t { + struct sock_extended_err ee; + struct sockaddr_in6 offender; + }; + struct errhdr6_t *errh = (struct errhdr6_t *)data; + struct errhdr6_t *target_errh = + (struct errhdr6_t *)target_data; + + __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno); + __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin); + __put_user(errh->ee.ee_type, &target_errh->ee.ee_type); + __put_user(errh->ee.ee_code, &target_errh->ee.ee_code); + __put_user(errh->ee.ee_pad, &target_errh->ee.ee_pad); + __put_user(errh->ee.ee_info, &target_errh->ee.ee_info); + __put_user(errh->ee.ee_data, &target_errh->ee.ee_data); + host_to_target_sockaddr((unsigned long) &target_errh->offender, + (void *) &errh->offender, sizeof(errh->offender)); + break; + } + default: + goto unimplemented; + } + break; + default: unimplemented: gemu_log("Unsupported ancillary data: %d/%d\n", @@ -2768,6 +2847,7 @@ static abi_long do_setsockopt(int sockfd, int level, int optname, case IP_PKTINFO: case IP_MTU_DISCOVER: case IP_RECVERR: + case IP_RECVTTL: case IP_RECVTOS: #ifdef IP_FREEBIND case IP_FREEBIND: @@ -2817,6 +2897,11 @@ static abi_long do_setsockopt(int sockfd, int level, int optname, case IPV6_MTU: case IPV6_V6ONLY: case IPV6_RECVPKTINFO: + case IPV6_UNICAST_HOPS: + case IPV6_RECVERR: + case IPV6_RECVHOPLIMIT: + case IPV6_2292HOPLIMIT: + case IPV6_CHECKSUM: val = 0; if (optlen < sizeof(uint32_t)) { return -TARGET_EINVAL; @@ -2827,6 +2912,50 @@ static abi_long do_setsockopt(int sockfd, int level, int optname, ret = get_errno(setsockopt(sockfd, level, optname, &val, sizeof(val))); break; + case IPV6_PKTINFO: + { + struct in6_pktinfo pki; + + if (optlen < sizeof(pki)) { + return -TARGET_EINVAL; + } + + if (copy_from_user(&pki, optval_addr, sizeof(pki))) { + return -TARGET_EFAULT; + } + + pki.ipi6_ifindex = tswap32(pki.ipi6_ifindex); + + ret = get_errno(setsockopt(sockfd, level, optname, + &pki, sizeof(pki))); + break; + } + default: + goto unimplemented; + } + break; + case SOL_ICMPV6: + switch (optname) { + case ICMPV6_FILTER: + { + struct icmp6_filter icmp6f; + + if (optlen > sizeof(icmp6f)) { + optlen = sizeof(icmp6f); + } + + if (copy_from_user(&icmp6f, optval_addr, optlen)) { + return -TARGET_EFAULT; + } + + for (val = 0; val < 8; val++) { + icmp6f.data[val] = tswap32(icmp6f.data[val]); + } + + ret = get_errno(setsockopt(sockfd, level, optname, + &icmp6f, optlen)); + break; + } default: goto unimplemented; } @@ -2834,7 +2963,8 @@ static abi_long do_setsockopt(int sockfd, int level, int optname, case SOL_RAW: switch (optname) { case ICMP_FILTER: - /* struct icmp_filter takes an u32 value */ + case IPV6_CHECKSUM: + /* those take an u32 value */ if (optlen < sizeof(uint32_t)) { return -TARGET_EINVAL; } @@ -7680,7 +7810,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #ifdef TARGET_NR_fork case TARGET_NR_fork: - ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, 0, 0, 0)); + ret = get_errno(do_fork(cpu_env, TARGET_SIGCHLD, 0, 0, 0, 0)); break; #endif #ifdef TARGET_NR_waitpid @@ -10490,7 +10620,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_vfork case TARGET_NR_vfork: - ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, + ret = get_errno(do_fork(cpu_env, + CLONE_VFORK | CLONE_VM | TARGET_SIGCHLD, 0, 0, 0, 0)); break; #endif @@ -11063,11 +11194,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_mincore: { void *a; + ret = -TARGET_ENOMEM; + a = lock_user(VERIFY_READ, arg1, arg2, 0); + if (!a) { + goto fail; + } ret = -TARGET_EFAULT; - if (!(a = lock_user(VERIFY_READ, arg1,arg2, 0))) - goto efault; - if (!(p = lock_user_string(arg3))) + p = lock_user_string(arg3); + if (!p) { goto mincore_fail; + } ret = get_errno(mincore(a, arg2, p)); unlock_user(p, arg3, ret); mincore_fail: diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 72ca5b11d6..40c5027e93 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -164,6 +164,14 @@ struct target_sockaddr_in { sizeof(struct target_in_addr)]; }; +struct target_sockaddr_in6 { + uint16_t sin6_family; + uint16_t sin6_port; /* big endian */ + uint32_t sin6_flowinfo; /* big endian */ + struct in6_addr sin6_addr; /* IPv6 address, big endian */ + uint32_t sin6_scope_id; +}; + struct target_sock_filter { abi_ushort code; uint8_t jt; diff --git a/migration/block.c b/migration/block.c index ebc10e628d..1941bc2402 100644 --- a/migration/block.c +++ b/migration/block.c @@ -379,7 +379,7 @@ static void unset_dirty_tracking(void) } } -static void init_blk_migration(QEMUFile *f) +static int init_blk_migration(QEMUFile *f) { BlockDriverState *bs; BlkMigDevState *bmds; @@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f) BlkMigDevState *bmds; BlockDriverState *bs; } *bmds_bs; + Error *local_err = NULL; + int ret; block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f) sectors = bdrv_nb_sectors(bs); if (sectors <= 0) { + ret = sectors; goto out; } bmds = g_new0(BlkMigDevState, 1); - bmds->blk = blk_new(); + bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; @@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f) BlockDriverState *bs = bmds_bs[i].bs; if (bmds) { - blk_insert_bs(bmds->blk, bs); + ret = blk_insert_bs(bmds->blk, bs, &local_err); + if (ret < 0) { + error_report_err(local_err); + goto out; + } alloc_aio_bitmap(bmds); error_setg(&bmds->blocker, "block device is in use by migration"); @@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f) } } + ret = 0; out: g_free(bmds_bs); + return ret; } /* Called with no lock taken. */ @@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque) block_mig_state.submitted, block_mig_state.transferred); qemu_mutex_lock_iothread(); - init_blk_migration(f); + ret = init_blk_migration(f); + if (ret < 0) { + qemu_mutex_unlock_iothread(); + return ret; + } /* start track dirty blocks */ ret = set_dirty_tracking(); diff --git a/migration/colo.c b/migration/colo.c index 712308ed5e..c19eb3f073 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -19,6 +19,8 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "migration/failover.h" +#include "replication.h" +#include "qmp-commands.h" static bool vmstate_loading; @@ -147,6 +149,53 @@ void colo_do_failover(MigrationState *s) } } +void qmp_xen_set_replication(bool enable, bool primary, + bool has_failover, bool failover, + Error **errp) +{ + ReplicationMode mode = primary ? + REPLICATION_MODE_PRIMARY : + REPLICATION_MODE_SECONDARY; + + if (has_failover && enable) { + error_setg(errp, "Parameter 'failover' is only for" + " stopping replication"); + return; + } + + if (enable) { + replication_start_all(mode, errp); + } else { + if (!has_failover) { + failover = NULL; + } + replication_stop_all(failover, failover ? NULL : errp); + } +} + +ReplicationStatus *qmp_query_xen_replication_status(Error **errp) +{ + Error *err = NULL; + ReplicationStatus *s = g_new0(ReplicationStatus, 1); + + replication_get_error_all(&err); + if (err) { + s->error = true; + s->has_desc = true; + s->desc = g_strdup(error_get_pretty(err)); + } else { + s->error = false; + } + + error_free(err); + return s; +} + +void qmp_xen_colo_do_checkpoint(Error **errp) +{ + replication_do_checkpoint_all(errp); +} + static void colo_send_message(QEMUFile *f, COLOMessage msg, Error **errp) { diff --git a/nbd/server.c b/nbd/server.c index ac92fa0727..924a1fe2db 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size, { BlockBackend *blk; NBDExport *exp = g_malloc0(sizeof(NBDExport)); + uint64_t perm; + int ret; - blk = blk_new(); - blk_insert_bs(blk, bs); + /* Don't allow resize while the NBD server is running, otherwise we don't + * care what happens with the node. */ + perm = BLK_PERM_CONSISTENT_READ; + if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { + perm |= BLK_PERM_WRITE; + } + blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + goto fail; + } blk_set_enable_write_cache(blk, !writethrough); exp->refcount = 1; diff --git a/net/vhost-user.c b/net/vhost-user.c index 77b8110f8c..e7e63408a1 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond, qemu_chr_fe_disconnect(&s->chr); - return FALSE; + return TRUE; +} + +static void net_vhost_user_event(void *opaque, int event); + +static void chr_closed_bh(void *opaque) +{ + const char *name = opaque; + NetClientState *ncs[MAX_QUEUE_NUM]; + VhostUserState *s; + Error *err = NULL; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_DRIVER_NIC, + MAX_QUEUE_NUM); + assert(queues < MAX_QUEUE_NUM); + + s = DO_UPCAST(VhostUserState, nc, ncs[0]); + + qmp_set_link(name, false, &err); + vhost_user_stop(queues, ncs); + + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, + opaque, NULL, true); + + if (err) { + error_report_err(err); + } } static void net_vhost_user_event(void *opaque, int event) @@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event) trace_vhost_user_event(chr->label, event); switch (event) { case CHR_EVENT_OPENED: - s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, - net_vhost_user_watch, s); if (vhost_user_start(queues, ncs, &s->chr) < 0) { qemu_chr_fe_disconnect(&s->chr); return; } + s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, + net_vhost_user_watch, s); qmp_set_link(name, true, &err); s->started = true; break; case CHR_EVENT_CLOSED: - qmp_set_link(name, false, &err); - vhost_user_stop(queues, ncs); - g_source_remove(s->watch); - s->watch = 0; + /* a close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear to idle. + * FIXME: better handle failure in vhost code, remove bh + */ + if (s->watch) { + AioContext *ctx = qemu_get_current_aio_context(); + + g_source_remove(s->watch); + s->watch = 0; + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, + NULL, NULL, false); + + aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); + } break; } diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex cf05bf0be2..2a4adfa654 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 611102e3ef..b21c877b53 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -724,11 +724,17 @@ static void zipl_load_vscsi(void) void zipl_load(void) { - if (virtio_get_device()->is_cdrom) { + VDev *vdev = virtio_get_device(); + + if (vdev->is_cdrom) { ipl_iso_el_torito(); panic("\n! Cannot IPL this ISO image !\n"); } + if (virtio_get_device_type() == VIRTIO_ID_NET) { + jump_to_IPL_code(vdev->netboot_start_addr); + } + ipl_scsi(); switch (virtio_get_device_type()) { diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h index 86abc56a90..890aed9ece 100644 --- a/pc-bios/s390-ccw/iplb.h +++ b/pc-bios/s390-ccw/iplb.h @@ -13,7 +13,8 @@ #define IPLB_H struct IplBlockCcw { - uint8_t reserved0[85]; + uint64_t netboot_start_addr; + uint8_t reserved0[77]; uint8_t ssid; uint16_t devno; uint8_t vm_flags; diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index 345b848752..0946766d86 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no) if (!virtio_is_supported(blk_schid)) { continue; } + /* Skip net devices since no IPLB is created and therefore no + * no network bootloader has been loaded + */ + if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) { + continue; + } if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) { return true; } @@ -67,6 +73,7 @@ static void virtio_setup(void) int ssid; bool found = false; uint16_t dev_no; + VDev *vdev = virtio_get_device(); /* * We unconditionally enable mss support. In every sane configuration, @@ -85,9 +92,6 @@ static void virtio_setup(void) found = find_dev(&schib, dev_no); break; case S390_IPL_TYPE_QEMU_SCSI: - { - VDev *vdev = virtio_get_device(); - vdev->scsi_device_selected = true; vdev->selected_scsi_device.channel = iplb.scsi.channel; vdev->selected_scsi_device.target = iplb.scsi.target; @@ -95,7 +99,6 @@ static void virtio_setup(void) blk_schid.ssid = iplb.scsi.ssid & 0x3; found = find_dev(&schib, iplb.scsi.devno); break; - } default: panic("List-directed IPL not supported yet!\n"); } @@ -111,9 +114,14 @@ static void virtio_setup(void) IPL_assert(found, "No virtio device found"); - virtio_setup_device(blk_schid); + if (virtio_get_device_type() == VIRTIO_ID_NET) { + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = iplb.ccw.netboot_start_addr; + } else { + virtio_setup_device(blk_schid); - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } } int main(void) diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index b333734955..6ee93d56db 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid) switch (vdev.senseid.cu_model) { case VIRTIO_ID_BLOCK: case VIRTIO_ID_SCSI: + case VIRTIO_ID_NET: return true; } } diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index eb35ea5faf..3388a423e5 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -276,6 +276,7 @@ struct VDev { uint8_t scsi_dev_heads; bool scsi_device_selected; ScsiDevice selected_scsi_device; + uint64_t netboot_start_addr; }; typedef struct VDev VDev; diff --git a/qapi-schema.json b/qapi-schema.json index 150ee98e9e..d6186d4c9e 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -5990,6 +5990,79 @@ { 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} } ## +# @xen-set-replication: +# +# Enable or disable replication. +# +# @enable: true to enable, false to disable. +# +# @primary: true for primary or false for secondary. +# +# @failover: #optional true to do failover, false to stop. but cannot be +# specified if 'enable' is true. default value is false. +# +# Returns: nothing. +# +# Example: +# +# -> { "execute": "xen-set-replication", +# "arguments": {"enable": true, "primary": false} } +# <- { "return": {} } +# +# Since: 2.9 +## +{ 'command': 'xen-set-replication', + 'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' } } + +## +# @ReplicationStatus: +# +# The result format for 'query-xen-replication-status'. +# +# @error: true if an error happened, false if replication is normal. +# +# @desc: #optional the human readable error description string, when +# @error is 'true'. +# +# Since: 2.9 +## +{ 'struct': 'ReplicationStatus', + 'data': { 'error': 'bool', '*desc': 'str' } } + +## +# @query-xen-replication-status: +# +# Query replication status while the vm is running. +# +# Returns: A @ReplicationResult object showing the status. +# +# Example: +# +# -> { "execute": "query-xen-replication-status" } +# <- { "return": { "error": false } } +# +# Since: 2.9 +## +{ 'command': 'query-xen-replication-status', + 'returns': 'ReplicationStatus' } + +## +# @xen-colo-do-checkpoint: +# +# Xen uses this command to notify replication to trigger a checkpoint. +# +# Returns: nothing. +# +# Example: +# +# -> { "execute": "xen-colo-do-checkpoint" } +# <- { "return": {} } +# +# Since: 2.9 +## +{ 'command': 'xen-colo-do-checkpoint' } + +## # @GICCapability: # # The struct describes capability for a specific GIC (Generic diff --git a/qapi/block-core.json b/qapi/block-core.json index 5f82d35fab..5cc992fb8f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1304,6 +1304,11 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the commit job inserts into the graph +# above @top. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: Nothing on success # If commit or stream is already active on this device, DeviceInUse # If @device does not exist, DeviceNotFound @@ -1323,7 +1328,8 @@ ## { 'command': 'block-commit', 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', - '*backing-file': 'str', '*speed': 'int' } } + '*backing-file': 'str', '*speed': 'int', + '*filter-node-name': 'str' } } ## # @drive-backup: @@ -1671,6 +1677,11 @@ # default 'report' (no limitations, since this applies to # a different block device than @device). # +# @filter-node-name: #optional the node name that should be assigned to the +# filter driver that the mirror job inserts into the graph +# above @device. If this option is not given, a node name is +# autogenerated. (Since: 2.9) +# # Returns: nothing on success. # # Since: 2.6 @@ -1690,7 +1701,8 @@ 'sync': 'MirrorSyncMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError' } } + '*on-target-error': 'BlockdevOnError', + '*filter-node-name': 'str' } } ## # @block_set_io_throttle: @@ -2625,29 +2637,29 @@ ## # @BlockdevOptionsIscsi: # -# @transport The iscsi transport type +# @transport: The iscsi transport type # -# @portal The address of the iscsi portal +# @portal: The address of the iscsi portal # -# @target The target iqn name +# @target: The target iqn name # -# @lun #optional LUN to connect to. Defaults to 0. +# @lun: #optional LUN to connect to. Defaults to 0. # -# @user #optional User name to log in with. If omitted, no CHAP +# @user: #optional User name to log in with. If omitted, no CHAP # authentication is performed. # -# @password-secret #optional The ID of a QCryptoSecret object providing +# @password-secret: #optional The ID of a QCryptoSecret object providing # the password for the login. This option is required if # @user is specified. # -# @initiator-name #optional The iqn name we want to identify to the target +# @initiator-name: #optional The iqn name we want to identify to the target # as. If this option is not specified, an initiator name is # generated automatically. # -# @header-digest #optional The desired header digest. Defaults to +# @header-digest: #optional The desired header digest. Defaults to # none-crc32c. # -# @timeout #optional Timeout in seconds after which a request will +# @timeout: #optional Timeout in seconds after which a request will # timeout. 0 means no timeout and is the default. # # Driver specific block device options for iscsi diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index f054599a91..9c9702cc62 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -40,9 +40,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, diff --git a/qemu-img.c b/qemu-img.c index df3aefd35a..98b836b030 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void) " kinds of errors, with a higher risk of choosing the wrong fix or\n" " hiding corruption that has already occurred.\n" "\n" + "Parameters to convert subcommand:\n" + " '-m' specifies how many coroutines work in parallel during the convert\n" + " process (defaults to 8)\n" + " '-W' allow to write to the target out of order rather than sequential\n" + "\n" "Parameters to snapshot subcommand:\n" " 'snapshot' is the name of the snapshot to create, apply or delete\n" " '-a' applies a snapshot (revert disk to saved state)\n" @@ -809,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp) { AioContext *aio_context = blk_get_aio_context(job->blk); + /* FIXME In error cases, the job simply goes away and we access a dangling + * pointer below. */ aio_context_acquire(aio_context); do { aio_poll(aio_context, true); @@ -830,6 +837,7 @@ static int img_commit(int argc, char **argv) const char *filename, *fmt, *cache, *base; BlockBackend *blk; BlockDriverState *bs, *base_bs; + BlockJob *job; bool progress = false, quiet = false, drop = false; bool writethrough; Error *local_err = NULL; @@ -950,8 +958,8 @@ static int img_commit(int argc, char **argv) aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0, - BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi, - &local_err, false); + BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, + &cbi, &local_err, false); aio_context_release(aio_context); if (local_err) { goto done; @@ -965,7 +973,8 @@ static int img_commit(int argc, char **argv) bdrv_ref(bs); } - run_block_job(bs->job, &local_err); + job = block_job_get("commit"); + run_block_job(job, &local_err); if (local_err) { goto unref_backing; } @@ -1462,48 +1471,61 @@ enum ImgConvertBlockStatus { BLK_BACKING_FILE, }; +#define MAX_COROUTINES 16 + typedef struct ImgConvertState { BlockBackend **src; int64_t *src_sectors; - int src_cur, src_num; - int64_t src_cur_offset; + int src_num; int64_t total_sectors; int64_t allocated_sectors; + int64_t allocated_done; + int64_t sector_num; + int64_t wr_offs; enum ImgConvertBlockStatus status; int64_t sector_next_status; BlockBackend *target; bool has_zero_init; bool compressed; bool target_has_backing; + bool wr_in_order; int min_sparse; size_t cluster_sectors; size_t buf_sectors; + int num_coroutines; + int running_coroutines; + Coroutine *co[MAX_COROUTINES]; + int64_t wait_sector_num[MAX_COROUTINES]; + CoMutex lock; + int ret; } ImgConvertState; -static void convert_select_part(ImgConvertState *s, int64_t sector_num) +static void convert_select_part(ImgConvertState *s, int64_t sector_num, + int *src_cur, int64_t *src_cur_offset) { - assert(sector_num >= s->src_cur_offset); - while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) { - s->src_cur_offset += s->src_sectors[s->src_cur]; - s->src_cur++; - assert(s->src_cur < s->src_num); + *src_cur = 0; + *src_cur_offset = 0; + while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) { + *src_cur_offset += s->src_sectors[*src_cur]; + (*src_cur)++; + assert(*src_cur < s->src_num); } } static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) { - int64_t ret; - int n; + int64_t ret, src_cur_offset; + int n, src_cur; - convert_select_part(s, sector_num); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); assert(s->total_sectors > sector_num); n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (s->sector_next_status <= sector_num) { BlockDriverState *file; - ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]), - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status(blk_bs(s->src[src_cur]), + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1519,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) /* Check block status of the backing file chain to avoid * needlessly reading zeroes and limiting the iteration to the * buffer size */ - ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL, - sector_num - s->src_cur_offset, + ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL, + sector_num - src_cur_offset, n, &n, &file); if (ret < 0) { return ret; @@ -1558,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) return n; } -static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, - uint8_t *buf) +static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf) { - int n; - int ret; + int n, ret; + QEMUIOVector qiov; + struct iovec iov; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { BlockBackend *blk; - int64_t bs_sectors; + int src_cur; + int64_t bs_sectors, src_cur_offset; /* In the case of compression with multiple source files, we can get a * nb_sectors that spreads into the next part. So we must be able to * read across multiple BDSes for one convert_read() call. */ - convert_select_part(s, sector_num); - blk = s->src[s->src_cur]; - bs_sectors = s->src_sectors[s->src_cur]; - - n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset)); - ret = blk_pread(blk, - (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + convert_select_part(s, sector_num, &src_cur, &src_cur_offset); + blk = s->src[src_cur]; + bs_sectors = s->src_sectors[src_cur]; + + n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_preadv( + blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1592,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, - const uint8_t *buf) + +static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, + int nb_sectors, uint8_t *buf, + enum ImgConvertBlockStatus status) { int ret; + QEMUIOVector qiov; + struct iovec iov; while (nb_sectors > 0) { int n = nb_sectors; - - switch (s->status) { + switch (status) { case BLK_BACKING_FILE: /* If we have a backing file, leave clusters unallocated that are * unallocated in the source image, so that the backing file is @@ -1621,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, break; } - ret = blk_pwrite_compressed(s->target, - sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, + BDRV_REQ_WRITE_COMPRESSED); if (ret < 0) { return ret; } @@ -1636,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (!s->min_sparse || is_allocated_sectors_min(buf, n, &n, s->min_sparse)) { - ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS, - buf, n << BDRV_SECTOR_BITS, 0); + iov.iov_base = buf; + iov.iov_len = n << BDRV_SECTOR_BITS; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, &qiov, 0); if (ret < 0) { return ret; } @@ -1649,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (s->has_zero_init) { break; } - ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, - n << BDRV_SECTOR_BITS, 0); + ret = blk_co_pwrite_zeroes(s->target, + sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, 0); if (ret < 0) { return ret; } @@ -1665,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, return 0; } -static int convert_do_copy(ImgConvertState *s) +static void coroutine_fn convert_co_do_copy(void *opaque) { + ImgConvertState *s = opaque; uint8_t *buf = NULL; - int64_t sector_num, allocated_done; - int ret; - int n; + int ret, i; + int index = -1; + + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] == qemu_coroutine_self()) { + index = i; + break; + } + } + assert(index >= 0); + + s->running_coroutines++; + buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); + + while (1) { + int n; + int64_t sector_num; + enum ImgConvertBlockStatus status; + + qemu_co_mutex_lock(&s->lock); + if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { + qemu_co_mutex_unlock(&s->lock); + goto out; + } + n = convert_iteration_sectors(s, s->sector_num); + if (n < 0) { + qemu_co_mutex_unlock(&s->lock); + s->ret = n; + goto out; + } + /* save current sector and allocation status to local variables */ + sector_num = s->sector_num; + status = s->status; + if (!s->min_sparse && s->status == BLK_ZERO) { + n = MIN(n, s->buf_sectors); + } + /* increment global sector counter so that other coroutines can + * already continue reading beyond this request */ + s->sector_num += n; + qemu_co_mutex_unlock(&s->lock); + + if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) { + s->allocated_done += n; + qemu_progress_print(100.0 * s->allocated_done / + s->allocated_sectors, 0); + } + + if (status == BLK_DATA) { + ret = convert_co_read(s, sector_num, n, buf); + if (ret < 0) { + error_report("error while reading sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + } else if (!s->min_sparse && status == BLK_ZERO) { + status = BLK_DATA; + memset(buf, 0x00, n * BDRV_SECTOR_SIZE); + } + + if (s->wr_in_order) { + /* keep writes in order */ + while (s->wr_offs != sector_num) { + if (s->ret != -EINPROGRESS) { + goto out; + } + s->wait_sector_num[index] = sector_num; + qemu_coroutine_yield(); + } + s->wait_sector_num[index] = -1; + } + + ret = convert_co_write(s, sector_num, n, buf, status); + if (ret < 0) { + error_report("error while writing sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + goto out; + } + + if (s->wr_in_order) { + /* reenter the coroutine that might have waited + * for this write to complete */ + s->wr_offs = sector_num + n; + for (i = 0; i < s->num_coroutines; i++) { + if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) { + /* + * A -> B -> A cannot occur because A has + * s->wait_sector_num[i] == -1 during A -> B. Therefore + * B will never enter A during this time window. + */ + qemu_coroutine_enter(s->co[i]); + break; + } + } + } + } + +out: + qemu_vfree(buf); + s->co[index] = NULL; + s->running_coroutines--; + if (!s->running_coroutines && s->ret == -EINPROGRESS) { + /* the convert job finished successfully */ + s->ret = 0; + } +} + +static int convert_do_copy(ImgConvertState *s) +{ + int ret, i, n; + int64_t sector_num = 0; /* Check whether we have zero initialisation or can get it efficiently */ s->has_zero_init = s->min_sparse && !s->target_has_backing @@ -1691,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s) if (s->compressed) { if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) { error_report("invalid cluster size"); - ret = -EINVAL; - goto fail; + return -EINVAL; } s->buf_sectors = s->cluster_sectors; } - buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); - /* Calculate allocated sectors for progress */ - s->allocated_sectors = 0; - sector_num = 0; while (sector_num < s->total_sectors) { n = convert_iteration_sectors(s, sector_num); if (n < 0) { - ret = n; - goto fail; + return n; } if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) { @@ -1715,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s) } /* Do the copy */ - s->src_cur = 0; - s->src_cur_offset = 0; s->sector_next_status = 0; + s->ret = -EINPROGRESS; - sector_num = 0; - allocated_done = 0; - - while (sector_num < s->total_sectors) { - n = convert_iteration_sectors(s, sector_num); - if (n < 0) { - ret = n; - goto fail; - } - if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) - { - allocated_done += n; - qemu_progress_print(100.0 * allocated_done / s->allocated_sectors, - 0); - } - - if (s->status == BLK_DATA) { - ret = convert_read(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while reading sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } - } else if (!s->min_sparse && s->status == BLK_ZERO) { - n = MIN(n, s->buf_sectors); - memset(buf, 0, n * BDRV_SECTOR_SIZE); - s->status = BLK_DATA; - } - - ret = convert_write(s, sector_num, n, buf); - if (ret < 0) { - error_report("error while writing sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - goto fail; - } + qemu_co_mutex_init(&s->lock); + for (i = 0; i < s->num_coroutines; i++) { + s->co[i] = qemu_coroutine_create(convert_co_do_copy, s); + s->wait_sector_num[i] = -1; + qemu_coroutine_enter(s->co[i]); + } - sector_num += n; + while (s->ret == -EINPROGRESS) { + main_loop_wait(false); } - if (s->compressed) { + if (s->compressed && !s->ret) { /* signal EOF to align */ ret = blk_pwrite_compressed(s->target, 0, NULL, 0); if (ret < 0) { - goto fail; + return ret; } } - ret = 0; -fail: - qemu_vfree(buf); - return ret; + return s->ret; } static int img_convert(int argc, char **argv) @@ -1797,6 +1909,8 @@ static int img_convert(int argc, char **argv) QemuOpts *sn_opts = NULL; ImgConvertState state; bool image_opts = false; + bool wr_in_order = true; + long num_coroutines = 8; fmt = NULL; out_fmt = "raw"; @@ -1812,7 +1926,7 @@ static int img_convert(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn", + c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W", long_options, NULL); if (c == -1) { break; @@ -1904,6 +2018,18 @@ static int img_convert(int argc, char **argv) case 'n': skip_create = 1; break; + case 'm': + if (qemu_strtol(optarg, NULL, 0, &num_coroutines) || + num_coroutines < 1 || num_coroutines > MAX_COROUTINES) { + error_report("Invalid number of coroutines. Allowed number of" + " coroutines is between 1 and %d", MAX_COROUTINES); + ret = -1; + goto fail_getopt; + } + break; + case 'W': + wr_in_order = false; + break; case OPTION_OBJECT: opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); @@ -1923,6 +2049,12 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } + if (!wr_in_order && compress) { + error_report("Out of order write and compress are mutually exclusive"); + ret = -1; + goto fail_getopt; + } + /* Initialize before goto out */ if (quiet) { progress = 0; @@ -2163,6 +2295,8 @@ static int img_convert(int argc, char **argv) .min_sparse = min_sparse, .cluster_sectors = cluster_sectors, .buf_sectors = bufsectors, + .wr_in_order = wr_in_order, + .num_coroutines = num_coroutines, }; ret = convert_do_copy(&state); @@ -3289,7 +3423,7 @@ static int img_resize(int argc, char **argv) qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, - BDRV_O_RDWR, false, quiet); + BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet); if (!blk) { ret = -1; goto out; diff --git a/qemu-img.texi b/qemu-img.texi index 174aae38b7..c81db3e81c 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -137,6 +137,12 @@ Parameters to convert subcommand: @item -n Skip the creation of the target volume +@item -m +Number of parallel coroutines for the convert process +@item -W +Allow out-of-order writes to the destination. This option improves performance, +but is only recommended for preallocated devices like host devices or other +raw block devices. @end table Parameters to dd subcommand: @@ -296,7 +302,7 @@ Error on reading data @end table -@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated) to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} @@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target volume has already been created with site specific options that cannot be supplied through qemu-img. +Out of order writes can be enabled with @code{-W} to improve performance. +This is only recommended for preallocated devices like host devices or other +raw block devices. Out of order write does not work in combination with +creating compressed images. + +@var{num_coroutines} specifies how many coroutines work in parallel during +the convert process (defaults to 8). + @item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} Dd copies from @var{input} file to @var{output} file converting it from diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 7ac1576d4c..2c48f9ce1a 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, } return 0; } + + /* Request additional permissions if necessary for this command. The caller + * is responsible for restoring the original permissions afterwards if this + * is what it wants. */ + if (ct->perm && blk_is_available(blk)) { + uint64_t orig_perm, orig_shared_perm; + blk_get_perm(blk, &orig_perm, &orig_shared_perm); + + if (ct->perm & ~orig_perm) { + uint64_t new_perm; + Error *local_err = NULL; + int ret; + + new_perm = orig_perm | ct->perm; + + ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err); + if (ret < 0) { + error_report_err(local_err); + return 0; + } + } + } + optind = 0; return ct->cfunc(blk, argc, argv); } @@ -918,6 +941,7 @@ static const cmdinfo_t write_cmd = { .name = "write", .altname = "w", .cfunc = write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-bcCfquz] [-P pattern] off len", @@ -1093,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t writev_cmd = { .name = "writev", .cfunc = writev_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfq] [-P pattern] off len [len..]", @@ -1392,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv); static const cmdinfo_t aio_write_cmd = { .name = "aio_write", .cfunc = aio_write_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cfiquz] [-P pattern] off len [len..]", @@ -1556,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = { .name = "truncate", .altname = "t", .cfunc = truncate_f, + .perm = BLK_PERM_WRITE | BLK_PERM_RESIZE, .argmin = 1, .argmax = 1, .args = "off", @@ -1653,6 +1680,7 @@ static const cmdinfo_t discard_cmd = { .name = "discard", .altname = "d", .cfunc = discard_f, + .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, .args = "[-Cq] off len", diff --git a/qemu-options.hx b/qemu-options.hx index bf458f83c3..c85f77d1d8 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -744,7 +744,12 @@ ETEXI DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" - " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n", + " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n" + " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n" + " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n" + " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n" + " [[,throttling.iops-total-max=im]|[[,throttling.iops-read-max=irm][,throttling.iops-write-max=iwm]]]\n" + " [[,throttling.iops-size=is]]\n", QEMU_ARCH_ALL) STEXI @@ -2146,7 +2151,7 @@ Example: @example qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -numa node,memdev=mem \ - -chardev socket,path=/path/to/socket \ + -chardev socket,id=chr0,path=/path/to/socket \ -netdev type=vhost-user,id=net0,chardev=chr0 \ -device virtio-net-pci,netdev=net0 @end example diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 72cf1fbf0a..6a370a8669 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -75,7 +75,13 @@ for arch in $ARCHLIST; do continue fi - make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install + if [ "$arch" = x86 ]; then + arch_var=SRCARCH + else + arch_var=ARCH + fi + + make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" @@ -92,6 +98,11 @@ for arch in $ARCHLIST; do cp_portable "$tmpdir/include/asm/kvm_virtio.h" "$output/include/standard-headers/asm-s390/" cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" fi + if [ $arch = arm ]; then + cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" + cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi if [ $arch = x86 ]; then cp_portable "$tmpdir/include/asm/hyperv.h" "$output/include/standard-headers/asm-x86/" cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h index b08d1601d1..691ac00c0b 100644 --- a/target/alpha/cpu.h +++ b/target/alpha/cpu.h @@ -28,6 +28,9 @@ #define CPUArchState struct CPUAlphaState +/* Alpha processors have a weak memory model */ +#define TCG_GUEST_DEFAULT_MO (0) + #include "exec/cpu-defs.h" #include "fpu/softfloat.h" diff --git a/target/arm/cpu.c b/target/arm/cpu.c index f7157dc0e5..04b062cb7e 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -338,13 +338,6 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) CPUARMState *env = &cpu->env; bool ret = false; - - if (interrupt_request & CPU_INTERRUPT_FIQ - && !(env->daif & PSTATE_F)) { - cs->exception_index = EXCP_FIQ; - cc->do_interrupt(cs); - ret = true; - } /* ARMv7-M interrupt return works by loading a magic value * into the PC. On real hardware the load causes the * return to occur. The qemu implementation performs the @@ -354,9 +347,16 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) * the stack if an interrupt occurred at the wrong time. * We avoid this by disabling interrupts when * pc contains a magic address. + * + * ARMv7-M interrupt masking works differently than -A or -R. + * There is no FIQ/IRQ distinction. Instead of I and F bits + * masking FIQ and IRQ interrupts, an exception is taken only + * if it is higher priority than the current execution priority + * (which depends on state like BASEPRI, FAULTMASK and the + * currently active exception). */ if (interrupt_request & CPU_INTERRUPT_HARD - && !(env->daif & PSTATE_I) + && (armv7m_nvic_can_take_pending_exception(env->nvic)) && (env->regs[15] < 0xfffffff0)) { cs->exception_index = EXCP_IRQ; cc->do_interrupt(cs); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 38a8e00908..25ceaabb5d 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -57,6 +57,7 @@ #define EXCP_VFIQ 15 #define EXCP_SEMIHOST 16 /* semihosting call */ #define EXCP_NOCP 17 /* v7M NOCP UsageFault */ +#define EXCP_INVSTATE 18 /* v7M INVSTATE UsageFault */ #define ARMV7M_EXCP_RESET 1 #define ARMV7M_EXCP_NMI 2 @@ -520,6 +521,8 @@ typedef struct CPUARMState { void *nvic; const struct arm_boot_info *boot_info; + /* Store GICv3CPUState to access from this struct */ + void *gicv3state; } CPUARMState; /** @@ -1356,9 +1359,27 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, uint32_t cur_el, bool secure); /* Interface between CPU and Interrupt controller. */ +#ifndef CONFIG_USER_ONLY +bool armv7m_nvic_can_take_pending_exception(void *opaque); +#else +static inline bool armv7m_nvic_can_take_pending_exception(void *opaque) +{ + return true; +} +#endif void armv7m_nvic_set_pending(void *opaque, int irq); -int armv7m_nvic_acknowledge_irq(void *opaque); -void armv7m_nvic_complete_irq(void *opaque, int irq); +void armv7m_nvic_acknowledge_irq(void *opaque); +/** + * armv7m_nvic_complete_irq: complete specified interrupt or exception + * @opaque: the NVIC + * @irq: the exception number to complete + * + * Returns: -1 if the irq was not active + * 1 if completing this irq brought us back to base (no active irqs) + * 0 if there is still an irq active after this one was completed + * (Ignoring -1, this is the same as the RETTOBASE value before completion.) + */ +int armv7m_nvic_complete_irq(void *opaque, int irq); /* Interface for defining coprocessor registers. * Registers are defined in tables of arm_cp_reginfo structs diff --git a/target/arm/helper.c b/target/arm/helper.c index bcedb4a808..3f4211b572 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6002,22 +6002,165 @@ static void switch_v7m_sp(CPUARMState *env, bool new_spsel) } } -static void do_v7m_exception_exit(CPUARMState *env) +static uint32_t arm_v7m_load_vector(ARMCPU *cpu) { + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + MemTxResult result; + hwaddr vec = env->v7m.vecbase + env->v7m.exception * 4; + uint32_t addr; + + addr = address_space_ldl(cs->as, vec, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + /* Architecturally this should cause a HardFault setting HSFR.VECTTBL, + * which would then be immediately followed by our failing to load + * the entry vector for that HardFault, which is a Lockup case. + * Since we don't model Lockup, we just report this guest error + * via cpu_abort(). + */ + cpu_abort(cs, "Failed to read from exception vector table " + "entry %08x\n", (unsigned)vec); + } + return addr; +} + +static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr) +{ + /* Do the "take the exception" parts of exception entry, + * but not the pushing of state to the stack. This is + * similar to the pseudocode ExceptionTaken() function. + */ + CPUARMState *env = &cpu->env; + uint32_t addr; + + armv7m_nvic_acknowledge_irq(env->nvic); + switch_v7m_sp(env, 0); + /* Clear IT bits */ + env->condexec_bits = 0; + env->regs[14] = lr; + addr = arm_v7m_load_vector(cpu); + env->regs[15] = addr & 0xfffffffe; + env->thumb = addr & 1; +} + +static void v7m_push_stack(ARMCPU *cpu) +{ + /* Do the "set up stack frame" part of exception entry, + * similar to pseudocode PushStack(). + */ + CPUARMState *env = &cpu->env; + uint32_t xpsr = xpsr_read(env); + + /* Align stack pointer if the guest wants that */ + if ((env->regs[13] & 4) && (env->v7m.ccr & R_V7M_CCR_STKALIGN_MASK)) { + env->regs[13] -= 4; + xpsr |= 0x200; + } + /* Switch to the handler mode. */ + v7m_push(env, xpsr); + v7m_push(env, env->regs[15]); + v7m_push(env, env->regs[14]); + v7m_push(env, env->regs[12]); + v7m_push(env, env->regs[3]); + v7m_push(env, env->regs[2]); + v7m_push(env, env->regs[1]); + v7m_push(env, env->regs[0]); +} + +static void do_v7m_exception_exit(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; uint32_t type; uint32_t xpsr; - + bool ufault = false; + bool return_to_sp_process = false; + bool return_to_handler = false; + bool rettobase = false; + + /* We can only get here from an EXCP_EXCEPTION_EXIT, and + * arm_v7m_do_unassigned_access() enforces the architectural rule + * that jumps to magic addresses don't have magic behaviour unless + * we're in Handler mode (compare pseudocode BXWritePC()). + */ + assert(env->v7m.exception != 0); + + /* In the spec pseudocode ExceptionReturn() is called directly + * from BXWritePC() and gets the full target PC value including + * bit zero. In QEMU's implementation we treat it as a normal + * jump-to-register (which is then caught later on), and so split + * the target value up between env->regs[15] and env->thumb in + * gen_bx(). Reconstitute it. + */ type = env->regs[15]; + if (env->thumb) { + type |= 1; + } + + qemu_log_mask(CPU_LOG_INT, "Exception return: magic PC %" PRIx32 + " previous exception %d\n", + type, env->v7m.exception); + + if (extract32(type, 5, 23) != extract32(-1, 5, 23)) { + qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero high bits in exception " + "exit PC value 0x%" PRIx32 " are UNPREDICTABLE\n", type); + } + if (env->v7m.exception != ARMV7M_EXCP_NMI) { /* Auto-clear FAULTMASK on return from other than NMI */ env->daif &= ~PSTATE_F; } - if (env->v7m.exception != 0) { - armv7m_nvic_complete_irq(env->nvic, env->v7m.exception); + + switch (armv7m_nvic_complete_irq(env->nvic, env->v7m.exception)) { + case -1: + /* attempt to exit an exception that isn't active */ + ufault = true; + break; + case 0: + /* still an irq active now */ + break; + case 1: + /* we returned to base exception level, no nesting. + * (In the pseudocode this is written using "NestedActivation != 1" + * where we have 'rettobase == false'.) + */ + rettobase = true; + break; + default: + g_assert_not_reached(); + } + + switch (type & 0xf) { + case 1: /* Return to Handler */ + return_to_handler = true; + break; + case 13: /* Return to Thread using Process stack */ + return_to_sp_process = true; + /* fall through */ + case 9: /* Return to Thread using Main stack */ + if (!rettobase && + !(env->v7m.ccr & R_V7M_CCR_NONBASETHRDENA_MASK)) { + ufault = true; + } + break; + default: + ufault = true; + } + + if (ufault) { + /* Bad exception return: instead of popping the exception + * stack, directly take a usage fault on the current stack. + */ + env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK; + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); + v7m_exception_taken(cpu, type | 0xf0000000); + qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing " + "stackframe: failed exception return integrity check\n"); + return; } /* Switch to the target stack. */ - switch_v7m_sp(env, (type & 4) != 0); + switch_v7m_sp(env, return_to_sp_process); /* Pop registers. */ env->regs[0] = v7m_pop(env); env->regs[1] = v7m_pop(env); @@ -6041,11 +6184,24 @@ static void do_v7m_exception_exit(CPUARMState *env) /* Undo stack alignment. */ if (xpsr & 0x200) env->regs[13] |= 4; - /* ??? The exception return type specifies Thread/Handler mode. However - this is also implied by the xPSR value. Not sure what to do - if there is a mismatch. */ - /* ??? Likewise for mismatches between the CONTROL register and the stack - pointer. */ + + /* The restored xPSR exception field will be zero if we're + * resuming in Thread mode. If that doesn't match what the + * exception return type specified then this is a UsageFault. + */ + if (return_to_handler == (env->v7m.exception == 0)) { + /* Take an INVPC UsageFault by pushing the stack again. */ + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); + env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK; + v7m_push_stack(cpu); + v7m_exception_taken(cpu, type | 0xf0000000); + qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: " + "failed exception return integrity check\n"); + return; + } + + /* Otherwise, we have a successful exception exit. */ + qemu_log_mask(CPU_LOG_INT, "...successful exception return\n"); } static void arm_log_exception(int idx) @@ -6063,37 +6219,11 @@ static void arm_log_exception(int idx) } } -static uint32_t arm_v7m_load_vector(ARMCPU *cpu) - -{ - CPUState *cs = CPU(cpu); - CPUARMState *env = &cpu->env; - MemTxResult result; - hwaddr vec = env->v7m.vecbase + env->v7m.exception * 4; - uint32_t addr; - - addr = address_space_ldl(cs->as, vec, - MEMTXATTRS_UNSPECIFIED, &result); - if (result != MEMTX_OK) { - /* Architecturally this should cause a HardFault setting HSFR.VECTTBL, - * which would then be immediately followed by our failing to load - * the entry vector for that HardFault, which is a Lockup case. - * Since we don't model Lockup, we just report this guest error - * via cpu_abort(). - */ - cpu_abort(cs, "Failed to read from exception vector table " - "entry %08x\n", (unsigned)vec); - } - return addr; -} - void arm_v7m_cpu_do_interrupt(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - uint32_t xpsr = xpsr_read(env); uint32_t lr; - uint32_t addr; arm_log_exception(cs->exception_index); @@ -6106,28 +6236,30 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) /* For exceptions we just mark as pending on the NVIC, and let that handle it. */ - /* TODO: Need to escalate if the current priority is higher than the - one we're raising. */ switch (cs->exception_index) { case EXCP_UDEF: armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); env->v7m.cfsr |= R_V7M_CFSR_UNDEFINSTR_MASK; - return; + break; case EXCP_NOCP: armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); env->v7m.cfsr |= R_V7M_CFSR_NOCP_MASK; - return; + break; + case EXCP_INVSTATE: + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); + env->v7m.cfsr |= R_V7M_CFSR_INVSTATE_MASK; + break; case EXCP_SWI: /* The PC already points to the next instruction. */ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC); - return; + break; case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: /* TODO: if we implemented the MPU registers, this is where we * should set the MMFAR, etc from exception.fsr and exception.vaddress. */ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM); - return; + break; case EXCP_BKPT: if (semihosting_enabled()) { int nr; @@ -6142,39 +6274,20 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) } } armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG); - return; + break; case EXCP_IRQ: - env->v7m.exception = armv7m_nvic_acknowledge_irq(env->nvic); break; case EXCP_EXCEPTION_EXIT: - do_v7m_exception_exit(env); + do_v7m_exception_exit(cpu); return; default: cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); return; /* Never happens. Keep compiler happy. */ } - /* Align stack pointer if the guest wants that */ - if ((env->regs[13] & 4) && (env->v7m.ccr & R_V7M_CCR_STKALIGN_MASK)) { - env->regs[13] -= 4; - xpsr |= 0x200; - } - /* Switch to the handler mode. */ - v7m_push(env, xpsr); - v7m_push(env, env->regs[15]); - v7m_push(env, env->regs[14]); - v7m_push(env, env->regs[12]); - v7m_push(env, env->regs[3]); - v7m_push(env, env->regs[2]); - v7m_push(env, env->regs[1]); - v7m_push(env, env->regs[0]); - switch_v7m_sp(env, 0); - /* Clear IT bits */ - env->condexec_bits = 0; - env->regs[14] = lr; - addr = arm_v7m_load_vector(cpu); - env->regs[15] = addr & 0xfffffffe; - env->thumb = addr & 1; + v7m_push_stack(cpu); + v7m_exception_taken(cpu, lr); + qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception); } /* Function used to synchronize QEMU's AArch64 register set with AArch32 diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e15eae6d41..24de30d92c 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10933,6 +10933,10 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) return; } + if (!fp_access_check(s)) { + return; + } + /* Note that we convert the Vx register indexes into the * index within the vfp.regs[] array, so we can share the * helper with the AArch32 instructions. @@ -10997,6 +11001,10 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) return; } + if (!fp_access_check(s)) { + return; + } + tcg_rd_regno = tcg_const_i32(rd << 1); tcg_rn_regno = tcg_const_i32(rn << 1); tcg_rm_regno = tcg_const_i32(rm << 1); @@ -11060,6 +11068,10 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) return; } + if (!fp_access_check(s)) { + return; + } + tcg_rd_regno = tcg_const_i32(rd << 1); tcg_rn_regno = tcg_const_i32(rn << 1); diff --git a/target/arm/translate.c b/target/arm/translate.c index abc1f77ee4..b859f10755 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7990,9 +7990,13 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) TCGv_i32 addr; TCGv_i64 tmp64; - /* M variants do not implement ARM mode. */ + /* M variants do not implement ARM mode; this must raise the INVSTATE + * UsageFault exception. + */ if (arm_dc_feature(s, ARM_FEATURE_M)) { - goto illegal_op; + gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(), + default_exception_el(s)); + return; } cond = insn >> 28; if (cond == 0xf){ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8df124f332..573f2aa988 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1417,6 +1417,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper); void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector); void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); +void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); +void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); /* you can call this signal handler from your SIGBUS and SIGSEGV signal handlers to inform the virtual CPU of exceptions. non zero diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c index 66474ad98e..69ea33a5c2 100644 --- a/target/i386/fpu_helper.c +++ b/target/i386/fpu_helper.c @@ -1377,6 +1377,18 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr) } } +#if defined(CONFIG_USER_ONLY) +void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +{ + helper_fxsave(env, ptr); +} + +void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) +{ + helper_fxrstor(env, ptr); +} +#endif + void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { uintptr_t ra = GETPC(); diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 5b66d3325d..2a894eec65 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -671,7 +671,7 @@ static S390CPUModel *get_max_cpu_model(Error **errp) if (kvm_enabled()) { kvm_s390_get_host_cpu_model(&max_model, errp); } else { - /* TCG enulates a z900 */ + /* TCG emulates a z900 */ max_model.def = &s390_cpu_defs[0]; bitmap_copy(max_model.features, max_model.def->default_feat, S390_FEAT_MAX); diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c index 6d227a5a6a..290de6dae6 100644 --- a/tcg/aarch64/tcg-target.inc.c +++ b/tcg/aarch64/tcg-target.inc.c @@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) } } -static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, +static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, TCGArg b, bool b_const, TCGLabel *l) { intptr_t offset; @@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, } } -static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, +static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, TCGReg rh, TCGReg al, TCGReg ah, tcg_target_long bl, tcg_target_long bh, bool const_bl, bool const_bh, bool sub) diff --git a/tests/Makefile.include b/tests/Makefile.include index e60bb6ce58..3310c170a3 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -308,8 +308,7 @@ check-qtest-sparc-y = tests/prom-env-test$(EXESUF) check-qtest-sparc64-y = tests/endianness-test$(EXESUF) #check-qtest-sparc64-y += tests/m48t59-test$(EXESUF) #gcov-files-sparc64-y += hw/timer/m48t59.c -#Disabled for now, triggers a TCG bug on 32-bit hosts -#check-qtest-sparc64-y += tests/prom-env-test$(EXESUF) +check-qtest-sparc64-y += tests/prom-env-test$(EXESUF) check-qtest-arm-y = tests/tmp105-test$(EXESUF) check-qtest-arm-y += tests/ds1338-test$(EXESUF) diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker new file mode 100644 index 0000000000..bbb21ed088 --- /dev/null +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -0,0 +1,22 @@ +# +# Docker s390 cross-compiler target +# +# This docker target is based on stretch (testing) as the stable build +# doesn't have the cross compiler available. +# +FROM debian:testing-slim + +# Duplicate deb line as deb-src +RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list + +# Add the s390x architecture +RUN dpkg --add-architecture s390x + +# Grab the updated list of packages +RUN apt update +RUN apt dist-upgrade -yy +RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install +RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch + +# Specify the cross prefix for this image (see tests/docker/common.rc) +ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu- diff --git a/tests/ide-test.c b/tests/ide-test.c index fb541f88b5..b57c2b1676 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -544,6 +544,7 @@ static void make_dirty(uint8_t device) guest_buf = guest_alloc(guest_malloc, len); buf = g_malloc(len); + memset(buf, rand() % 255 + 1, len); g_assert(guest_buf); g_assert(buf); diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c index bd33bc353d..eac207b30e 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -76,7 +76,7 @@ static void add_tests(const char *machines[]) int main(int argc, char *argv[]) { const char *sparc_machines[] = { "SPARCbook", "Voyager", "SS-20", NULL }; - const char *sparc64_machines[] = { "sun4u", "sun4v", NULL }; + const char *sparc64_machines[] = { "sun4u", NULL }; const char *ppc_machines[] = { "mac99", "g3beige", NULL }; const char *ppc64_machines[] = { "mac99", "g3beige", "pseries", NULL }; const char *arch = qtest_get_arch(); diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 4673b67f37..34e66db691 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1024' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a non-negative number below 2^64 +qemu-img: Value '-1k' is out of range for parameter 'size' qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte @@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. +qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 -qemu-img: Parameter 'size' expects a size -You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes. +qemu-img: Parameter 'size' expects a non-negative number below 2^64 +Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- +and exabytes, respectively. qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index e206ad6c29..c6f4eef215 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -179,7 +179,7 @@ q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: Can't use a read-only drive +(qemu) QEMU_PROG: Block node is read-only QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on @@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive +(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 1d3fd04b65..aafcd249f6 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") self.vm.launch() def tearDown(self): @@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase): def setUp(self): qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.add_drive(blockdev_target_img, interface="none") if iotests.qemu_default_machine == 'pc': self.vm.add_drive(None, 'media=cdrom', 'ide') self.vm.launch() @@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase): qemu_img('create', '-f', fmt, blockdev_target_img, str(TestDriveCompression.image_len), *args) - self.vm.add_drive(blockdev_target_img, format=fmt) + self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") self.vm.launch() diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 08e4bb7218..182acb42cf 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ === Invalid command - snapshot node used as backing hd === -{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}} +{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} === Invalid command - snapshot node has a backing image === diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 index 3ba79f027a..6d8f0a1a84 100755 --- a/tests/qemu-iotests/141 +++ b/tests/qemu-iotests/141 @@ -67,7 +67,7 @@ test_blockjob() _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'x-blockdev-del', 'arguments': {'node-name': 'drv0'}}" \ - 'error' + 'error' | _filter_generated_node_ids _send_qemu_cmd $QEMU_HANDLE \ "{'execute': 'block-job-cancel', diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out index 195ca1a604..82e763b68d 100644 --- a/tests/qemu-iotests/141.out +++ b/tests/qemu-iotests/141.out @@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} {"return": {}} @@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} +{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 6b7edaf28f..54b53293d7 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -28,6 +28,7 @@ Testing: opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 @@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 @@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 @@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 @@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 @@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 @@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 @@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" @@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 @@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 @@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 @@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 @@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) @@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 @@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 @@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) @@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 @@ -964,6 +1009,7 @@ Testing: -device floppy opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" Testing: -device floppy,drive-type=120 @@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -device floppy,drive-type=144 @@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -device floppy,drive-type=288 @@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288 opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "120" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288 @@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "288" @@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512 @@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica opt_io_size = 0 (0x0) discard_granularity = 4294967295 (0xffffffff) write-cache = "auto" + share-rw = false drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096 diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index f6dfd08746..4ccbda14af 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -101,9 +101,9 @@ static BlockJob *test_block_job_start(unsigned int iterations, g_assert_nonnull(bs); snprintf(job_id, sizeof(job_id), "job%u", counter++); - s = block_job_create(job_id, &test_block_job_driver, bs, 0, - BLOCK_JOB_DEFAULT, test_block_job_cb, - data, &error_abort); + s = block_job_create(job_id, &test_block_job_driver, bs, + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, + test_block_job_cb, data, &error_abort); s->iterations = iterations; s->use_timer = use_timer; s->rc = rc; diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 068c9e419b..740e740398 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, BlockJob *job; Error *errp = NULL; - job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0, - BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp); + job = block_job_create(id, &test_block_job_driver, blk_bs(blk), + 0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb, + NULL, &errp); if (should_succeed) { g_assert_null(errp); g_assert_nonnull(job); @@ -53,13 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, * BlockDriverState inserted. */ static BlockBackend *create_blk(const char *name) { - BlockBackend *blk = blk_new(); + /* No I/O is performed on this device */ + BlockBackend *blk = blk_new(0, BLK_PERM_ALL); BlockDriverState *bs; bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); g_assert_nonnull(bs); - blk_insert_bs(blk, bs); + blk_insert_bs(blk, bs, &error_abort); bdrv_unref(bs); if (name) { diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 363b59a38f..bd7c501b2e 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -593,9 +593,10 @@ static void test_groups(void) BlockBackend *blk1, *blk2, *blk3; BlockBackendPublic *blkp1, *blkp2, *blkp3; - blk1 = blk_new(); - blk2 = blk_new(); - blk3 = blk_new(); + /* No actual I/O is performed on these devices */ + blk1 = blk_new(0, BLK_PERM_ALL); + blk2 = blk_new(0, BLK_PERM_ALL); + blk3 = blk_new(0, BLK_PERM_ALL); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); diff --git a/util/qemu-option.c b/util/qemu-option.c index 419f2528b8..5ce1b5c246 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -179,7 +179,7 @@ void parse_option_size(const char *name, const char *value, err = qemu_strtosz(value, NULL, &size); if (err == -ERANGE) { - error_setg(errp, "Value '%s' is too large for parameter '%s'", + error_setg(errp, "Value '%s' is out of range for parameter '%s'", value, name); return; } |