diff options
Diffstat (limited to 'block/rbd.c')
-rw-r--r-- | block/rbd.c | 447 |
1 files changed, 168 insertions, 279 deletions
diff --git a/block/rbd.c b/block/rbd.c index ee13f3d9d3..6471f4fd2b 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -13,14 +13,14 @@ #include "qemu/osdep.h" +#include <rbd/librbd.h> #include "qapi/error.h" #include "qemu/error-report.h" #include "block/block_int.h" #include "crypto/secret.h" #include "qemu/cutils.h" #include "qapi/qmp/qstring.h" - -#include <rbd/librbd.h> +#include "qapi/qmp/qjson.h" /* * When specifying the image filename use: @@ -56,11 +56,6 @@ #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) -#define RBD_MAX_CONF_NAME_SIZE 128 -#define RBD_MAX_CONF_VAL_SIZE 512 -#define RBD_MAX_CONF_SIZE 1024 -#define RBD_MAX_POOL_NAME_SIZE 128 -#define RBD_MAX_SNAP_NAME_SIZE 128 #define RBD_MAX_SNAPS 100 /* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */ @@ -99,43 +94,28 @@ typedef struct BDRVRBDState { rados_t cluster; rados_ioctx_t io_ctx; rbd_image_t image; - char name[RBD_MAX_IMAGE_NAME_SIZE]; + char *image_name; char *snap; } BDRVRBDState; -static char *qemu_rbd_next_tok(int max_len, - char *src, char delim, - const char *name, - char **p, Error **errp) +static char *qemu_rbd_next_tok(char *src, char delim, char **p) { - int l; char *end; *p = NULL; - if (delim != '\0') { - for (end = src; *end; ++end) { - if (*end == delim) { - break; - } - if (*end == '\\' && end[1] != '\0') { - end++; - } - } + for (end = src; *end; ++end) { if (*end == delim) { - *p = end + 1; - *end = '\0'; + break; + } + if (*end == '\\' && end[1] != '\0') { + end++; } } - l = strlen(src); - if (l >= max_len) { - error_setg(errp, "%s too long", name); - return NULL; - } else if (l == 0) { - error_setg(errp, "%s too short", name); - return NULL; + if (*end == delim) { + *p = end + 1; + *end = '\0'; } - return src; } @@ -156,26 +136,19 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, Error **errp) { const char *start; - char *p, *buf, *keypairs; + char *p, *buf; + QList *keypairs = NULL; char *found_str; - size_t max_keypair_size; - Error *local_err = NULL; if (!strstart(filename, "rbd:", &start)) { error_setg(errp, "File name must start with 'rbd:'"); return; } - max_keypair_size = strlen(start) + 1; buf = g_strdup(start); - keypairs = g_malloc0(max_keypair_size); p = buf; - found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p, - '/', "pool name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, '/', &p); if (!p) { error_setg(errp, "Pool name is required"); goto done; @@ -184,27 +157,15 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, qdict_put(options, "pool", qstring_from_str(found_str)); if (strchr(p, '@')) { - found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p, - '@', "object name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, '@', &p); qemu_rbd_unescape(found_str); qdict_put(options, "image", qstring_from_str(found_str)); - found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p, - ':', "snap name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, ':', &p); qemu_rbd_unescape(found_str); qdict_put(options, "snapshot", qstring_from_str(found_str)); } else { - found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p, - ':', "object name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, ':', &p); qemu_rbd_unescape(found_str); qdict_put(options, "image", qstring_from_str(found_str)); } @@ -212,24 +173,11 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, goto done; } - found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, - '\0', "configuration", &p, &local_err); - if (local_err) { - goto done; - } - - p = found_str; - /* The following are essentially all key/value pairs, and we treat * 'id' and 'conf' a bit special. Key/value pairs may be in any order. */ while (p) { char *name, *value; - name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, - '=', "conf option name", &p, &local_err); - if (local_err) { - break; - } - + name = qemu_rbd_next_tok(p, '=', &p); if (!p) { error_setg(errp, "conf option %s has no value", name); break; @@ -237,11 +185,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, qemu_rbd_unescape(name); - value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p, - ':', "conf option value", &p, &local_err); - if (local_err) { - break; - } + value = qemu_rbd_next_tok(p, ':', &p); qemu_rbd_unescape(value); if (!strcmp(name, "conf")) { @@ -249,36 +193,30 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, } else if (!strcmp(name, "id")) { qdict_put(options, "user" , qstring_from_str(value)); } else { - /* FIXME: This is pretty ugly, and not the right way to do this. - * These should be contained in a structure, and then - * passed explicitly as individual key/value pairs to - * rados. Consider this legacy code that needs to be - * updated. */ - char *tmp = g_malloc0(max_keypair_size); - /* only use a delimiter if it is not the first keypair found */ - /* These are sets of unknown key/value pairs we'll pass along - * to ceph */ - if (keypairs[0]) { - snprintf(tmp, max_keypair_size, ":%s=%s", name, value); - pstrcat(keypairs, max_keypair_size, tmp); - } else { - snprintf(keypairs, max_keypair_size, "%s=%s", name, value); + /* + * We pass these internally to qemu_rbd_set_keypairs(), so + * we can get away with the simpler list of [ "key1", + * "value1", "key2", "value2" ] rather than a raw dict + * { "key1": "value1", "key2": "value2" } where we can't + * guarantee order, or even a more correct but complex + * [ { "key1": "value1" }, { "key2": "value2" } ] + */ + if (!keypairs) { + keypairs = qlist_new(); } - g_free(tmp); + qlist_append(keypairs, qstring_from_str(name)); + qlist_append(keypairs, qstring_from_str(value)); } } - if (keypairs[0]) { - qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs)); + if (keypairs) { + qdict_put(options, "=keyvalue-pairs", + qobject_to_json(QOBJECT(keypairs))); } - done: - if (local_err) { - error_propagate(errp, local_err); - } g_free(buf); - g_free(keypairs); + QDECREF(keypairs); return; } @@ -302,50 +240,41 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid, return 0; } -static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs, +static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json, Error **errp) { - char *p, *buf; - char *name; - char *value; - Error *local_err = NULL; + QList *keypairs; + QString *name; + QString *value; + const char *key; + size_t remaining; int ret = 0; - buf = g_strdup(keypairs); - p = buf; - - while (p) { - name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, - '=', "conf option name", &p, &local_err); - if (local_err) { - break; - } - - if (!p) { - error_setg(errp, "conf option %s has no value", name); - ret = -EINVAL; - break; - } + if (!keypairs_json) { + return ret; + } + keypairs = qobject_to_qlist(qobject_from_json(keypairs_json, + &error_abort)); + remaining = qlist_size(keypairs) / 2; + assert(remaining); - value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p, - ':', "conf option value", &p, &local_err); - if (local_err) { - break; - } + while (remaining--) { + name = qobject_to_qstring(qlist_pop(keypairs)); + value = qobject_to_qstring(qlist_pop(keypairs)); + assert(name && value); + key = qstring_get_str(name); - ret = rados_conf_set(cluster, name, value); + ret = rados_conf_set(cluster, key, qstring_get_str(value)); + QDECREF(name); + QDECREF(value); if (ret < 0) { - error_setg_errno(errp, -ret, "invalid conf option %s", name); + error_setg_errno(errp, -ret, "invalid conf option %s", key); ret = -EINVAL; break; } } - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - } - g_free(buf); + QDECREF(keypairs); return ret; } @@ -365,14 +294,14 @@ static QemuOptsList runtime_opts = { .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { - .name = "filename", + .name = "pool", .type = QEMU_OPT_STRING, - .help = "Specification of the rbd image", + .help = "Rados pool name", }, { - .name = "password-secret", + .name = "image", .type = QEMU_OPT_STRING, - .help = "ID of secret providing the password", + .help = "Image name in the pool", }, { .name = "conf", @@ -380,16 +309,6 @@ static QemuOptsList runtime_opts = { .help = "Rados config file location", }, { - .name = "pool", - .type = QEMU_OPT_STRING, - .help = "Rados pool name", - }, - { - .name = "image", - .type = QEMU_OPT_STRING, - .help = "Image name in the pool", - }, - { .name = "snapshot", .type = QEMU_OPT_STRING, .help = "Ceph snapshot name", @@ -400,23 +319,26 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "Rados id name", }, + /* + * server.* extracted manually, see qemu_rbd_mon_host() + */ { - .name = "keyvalue-pairs", - .type = QEMU_OPT_STRING, - .help = "Legacy rados key/value option parameters", - }, - { - .name = "host", - .type = QEMU_OPT_STRING, - }, - { - .name = "port", + .name = "password-secret", .type = QEMU_OPT_STRING, + .help = "ID of secret providing the password", }, + + /* + * Keys for qemu_rbd_parse_filename(), not in the QAPI schema + */ { - .name = "auth", + /* + * HACK: name starts with '=' so that qemu_opts_parse() + * can't set it + */ + .name = "=keyvalue-pairs", .type = QEMU_OPT_STRING, - .help = "Supported authentication method, either cephx or none", + .help = "Legacy rados key/value option parameters", }, { /* end of list */ } }, @@ -428,12 +350,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) int64_t bytes = 0; int64_t objsize; int obj_order = 0; - const char *pool, *name, *conf, *clientname, *keypairs; + const char *pool, *image_name, *conf, *user, *keypairs; const char *secretid; rados_t cluster; rados_ioctx_t io_ctx; QDict *options = NULL; - QemuOpts *rbd_opts = NULL; int ret = 0; secretid = qemu_opt_get(opts, "password-secret"); @@ -464,21 +385,19 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(rbd_opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto exit; - } - - pool = qemu_opt_get(rbd_opts, "pool"); - conf = qemu_opt_get(rbd_opts, "conf"); - clientname = qemu_opt_get(rbd_opts, "user"); - name = qemu_opt_get(rbd_opts, "image"); - keypairs = qemu_opt_get(rbd_opts, "keyvalue-pairs"); + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from -blockdev + * or blockdev_add, its members are typed according to the QAPI + * schema, but when they come from -drive, they're all QString. + */ + pool = qdict_get_try_str(options, "pool"); + conf = qdict_get_try_str(options, "conf"); + user = qdict_get_try_str(options, "user"); + image_name = qdict_get_try_str(options, "image"); + keypairs = qdict_get_try_str(options, "=keyvalue-pairs"); - ret = rados_create(&cluster, clientname); + ret = rados_create(&cluster, user); if (ret < 0) { error_setg_errno(errp, -ret, "error initializing"); goto exit; @@ -515,7 +434,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) goto shutdown; } - ret = rbd_create(io_ctx, name, bytes, &obj_order); + ret = rbd_create(io_ctx, image_name, bytes, &obj_order); if (ret < 0) { error_setg_errno(errp, -ret, "error rbd create"); } @@ -527,7 +446,6 @@ shutdown: exit: QDECREF(options); - qemu_opts_del(rbd_opts); return ret; } @@ -578,91 +496,43 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) qemu_aio_unref(acb); } -#define RBD_MON_HOST 0 -#define RBD_AUTH_SUPPORTED 1 - -static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type, - Error **errp) +static char *qemu_rbd_mon_host(QDict *options, Error **errp) { - int num_entries; - QemuOpts *opts = NULL; - QDict *sub_options; - const char *host; - const char *port; - char *str; - char *rados_str = NULL; - Error *local_err = NULL; + const char **vals = g_new(const char *, qdict_size(options) + 1); + char keybuf[32]; + const char *host, *port; + char *rados_str; int i; - assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED); - - num_entries = qdict_array_entries(options, prefix); - - if (num_entries < 0) { - error_setg(errp, "Parse error on RBD QDict array"); - return NULL; - } - - for (i = 0; i < num_entries; i++) { - char *strbuf = NULL; - const char *value; - char *rados_str_tmp; - - str = g_strdup_printf("%s%d.", prefix, i); - qdict_extract_subqdict(options, &sub_options, str); - g_free(str); - - opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, sub_options, &local_err); - QDECREF(sub_options); - if (local_err) { - error_propagate(errp, local_err); - g_free(rados_str); + for (i = 0;; i++) { + sprintf(keybuf, "server.%d.host", i); + host = qdict_get_try_str(options, keybuf); + qdict_del(options, keybuf); + sprintf(keybuf, "server.%d.port", i); + port = qdict_get_try_str(options, keybuf); + qdict_del(options, keybuf); + if (!host && !port) { + break; + } + if (!host) { + error_setg(errp, "Parameter server.%d.host is missing", i); rados_str = NULL; - goto exit; + goto out; } - if (type == RBD_MON_HOST) { - host = qemu_opt_get(opts, "host"); - port = qemu_opt_get(opts, "port"); - - value = host; - if (port) { - /* check for ipv6 */ - if (strchr(host, ':')) { - strbuf = g_strdup_printf("[%s]:%s", host, port); - } else { - strbuf = g_strdup_printf("%s:%s", host, port); - } - value = strbuf; - } else if (strchr(host, ':')) { - strbuf = g_strdup_printf("[%s]", host); - value = strbuf; - } + if (strchr(host, ':')) { + vals[i] = port ? g_strdup_printf("[%s]:%s", host, port) + : g_strdup_printf("[%s]", host); } else { - value = qemu_opt_get(opts, "auth"); + vals[i] = port ? g_strdup_printf("%s:%s", host, port) + : g_strdup(host); } - - - /* each iteration in the for loop will build upon the string, and if - * rados_str is NULL then it is our first pass */ - if (rados_str) { - /* separate options with ';', as that is what rados_conf_set() - * requires */ - rados_str_tmp = rados_str; - rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value); - g_free(rados_str_tmp); - } else { - rados_str = g_strdup(value); - } - - g_free(strbuf); - qemu_opts_del(opts); - opts = NULL; } + vals[i] = NULL; -exit: - qemu_opts_del(opts); + rados_str = i ? g_strjoinv(";", (char **)vals) : NULL; +out: + g_strfreev((char **)vals); return rados_str; } @@ -670,32 +540,22 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVRBDState *s = bs->opaque; - const char *pool, *snap, *conf, *clientname, *name, *keypairs; + const char *pool, *snap, *conf, *user, *image_name, *keypairs; const char *secretid; QemuOpts *opts; Error *local_err = NULL; char *mon_host = NULL; - char *auth_supported = NULL; int r; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { error_propagate(errp, local_err); - qemu_opts_del(opts); - return -EINVAL; - } - - auth_supported = qemu_rbd_array_opts(options, "auth-supported.", - RBD_AUTH_SUPPORTED, &local_err); - if (local_err) { - error_propagate(errp, local_err); r = -EINVAL; goto failed_opts; } - mon_host = qemu_rbd_array_opts(options, "server.", - RBD_MON_HOST, &local_err); + mon_host = qemu_rbd_mon_host(options, &local_err); if (local_err) { error_propagate(errp, local_err); r = -EINVAL; @@ -707,20 +567,24 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, pool = qemu_opt_get(opts, "pool"); conf = qemu_opt_get(opts, "conf"); snap = qemu_opt_get(opts, "snapshot"); - clientname = qemu_opt_get(opts, "user"); - name = qemu_opt_get(opts, "image"); - keypairs = qemu_opt_get(opts, "keyvalue-pairs"); + user = qemu_opt_get(opts, "user"); + image_name = qemu_opt_get(opts, "image"); + keypairs = qemu_opt_get(opts, "=keyvalue-pairs"); - r = rados_create(&s->cluster, clientname); + if (!pool || !image_name) { + error_setg(errp, "Parameters 'pool' and 'image' are required"); + r = -EINVAL; + goto failed_opts; + } + + r = rados_create(&s->cluster, user); if (r < 0) { error_setg_errno(errp, -r, "error initializing"); goto failed_opts; } s->snap = g_strdup(snap); - if (name) { - pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name); - } + s->image_name = g_strdup(image_name); /* try default location when conf=NULL, but ignore failure */ r = rados_conf_read_file(s->cluster, conf); @@ -741,13 +605,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, } } - if (auth_supported) { - r = rados_conf_set(s->cluster, "auth_supported", auth_supported); - if (r < 0) { - goto failed_shutdown; - } - } - if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) { r = -EIO; goto failed_shutdown; @@ -778,13 +635,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, goto failed_shutdown; } - r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); + /* rbd_open is always r/w */ + r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap); if (r < 0) { - error_setg_errno(errp, -r, "error reading header from %s", s->name); + error_setg_errno(errp, -r, "error reading header from %s", + s->image_name); goto failed_open; } - bs->read_only = (s->snap != NULL); + /* If we are using an rbd snapshot, we must be r/o, otherwise + * leave as-is */ + if (s->snap != NULL) { + r = bdrv_set_read_only(bs, true, &local_err); + if (r < 0) { + error_propagate(errp, local_err); + goto failed_open; + } + } qemu_opts_del(opts); return 0; @@ -794,13 +661,33 @@ failed_open: failed_shutdown: rados_shutdown(s->cluster); g_free(s->snap); + g_free(s->image_name); failed_opts: qemu_opts_del(opts); g_free(mon_host); - g_free(auth_supported); return r; } + +/* Since RBD is currently always opened R/W via the API, + * we just need to check if we are using a snapshot or not, in + * order to determine if we will allow it to be R/W */ +static int qemu_rbd_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRBDState *s = state->bs->opaque; + int ret = 0; + + if (s->snap && state->flags & BDRV_O_RDWR) { + error_setg(errp, + "Cannot change node '%s' to r/w when using RBD snapshot", + bdrv_get_device_or_node_name(state->bs)); + ret = -EINVAL; + } + + return ret; +} + static void qemu_rbd_close(BlockDriverState *bs) { BDRVRBDState *s = bs->opaque; @@ -808,6 +695,7 @@ static void qemu_rbd_close(BlockDriverState *bs) rbd_close(s->image); rados_ioctx_destroy(s->io_ctx); g_free(s->snap); + g_free(s->image_name); rados_shutdown(s->cluster); } @@ -1206,6 +1094,7 @@ static BlockDriver bdrv_rbd = { .bdrv_parse_filename = qemu_rbd_parse_filename, .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, + .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, .bdrv_create = qemu_rbd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_get_info = qemu_rbd_getinfo, |