diff options
Diffstat (limited to 'block')
-rwxr-xr-x | block/blkreplay.c | 8 | ||||
-rw-r--r-- | block/block-backend.c | 22 | ||||
-rw-r--r-- | block/dirty-bitmap.c | 6 | ||||
-rw-r--r-- | block/gluster.c | 642 | ||||
-rw-r--r-- | block/io.c | 242 | ||||
-rw-r--r-- | block/iscsi.c | 32 | ||||
-rw-r--r-- | block/mirror.c | 157 | ||||
-rw-r--r-- | block/nbd-client.c | 107 | ||||
-rw-r--r-- | block/nbd-client.h | 11 | ||||
-rw-r--r-- | block/nbd.c | 30 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 4 | ||||
-rw-r--r-- | block/qcow2.c | 10 | ||||
-rw-r--r-- | block/raw-posix.c | 24 | ||||
-rw-r--r-- | block/raw-win32.c | 19 | ||||
-rw-r--r-- | block/raw_bsd.c | 59 | ||||
-rw-r--r-- | block/rbd.c | 29 | ||||
-rw-r--r-- | block/sheepdog.c | 17 | ||||
-rw-r--r-- | block/trace-events | 4 |
18 files changed, 909 insertions, 514 deletions
diff --git a/block/blkreplay.c b/block/blkreplay.c index 3368c8c98d..30f9d5ff6c 100755 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -114,11 +114,11 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs, return ret; } -static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs, + int64_t offset, int count) { uint64_t reqid = request_id++; - int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors); + int ret = bdrv_co_pdiscard(bs->file->bs, offset, count); block_request_create(reqid, bs, qemu_coroutine_self()); qemu_coroutine_yield(); @@ -148,7 +148,7 @@ static BlockDriver bdrv_blkreplay = { .bdrv_co_pwritev = blkreplay_co_pwritev, .bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes, - .bdrv_co_discard = blkreplay_co_discard, + .bdrv_co_pdiscard = blkreplay_co_pdiscard, .bdrv_co_flush = blkreplay_co_flush, }; diff --git a/block/block-backend.c b/block/block-backend.c index f9cea1b304..effa038924 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1065,16 +1065,16 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, return bdrv_aio_flush(blk_bs(blk), cb, opaque); } -BlockAIOCB *blk_aio_discard(BlockBackend *blk, - int64_t sector_num, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, + int64_t offset, int count, + BlockCompletionFunc *cb, void *opaque) { - int ret = blk_check_request(blk, sector_num, nb_sectors); + int ret = blk_check_byte_request(blk, offset, count); if (ret < 0) { return blk_abort_aio_request(blk, cb, opaque, ret); } - return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque); + return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque); } void blk_aio_cancel(BlockAIOCB *acb) @@ -1106,14 +1106,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque); } -int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) +int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count) { - int ret = blk_check_request(blk, sector_num, nb_sectors); + int ret = blk_check_byte_request(blk, offset, count); if (ret < 0) { return ret; } - return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors); + return bdrv_co_pdiscard(blk_bs(blk), offset, count); } int blk_co_flush(BlockBackend *blk) @@ -1504,14 +1504,14 @@ int blk_truncate(BlockBackend *blk, int64_t offset) return bdrv_truncate(blk_bs(blk), offset); } -int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) +int blk_pdiscard(BlockBackend *blk, int64_t offset, int count) { - int ret = blk_check_request(blk, sector_num, nb_sectors); + int ret = blk_check_byte_request(blk, offset, count); if (ret < 0) { return ret; } - return bdrv_discard(blk_bs(blk), sector_num, nb_sectors); + return bdrv_pdiscard(blk_bs(blk), offset, count); } int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 4902ca557f..f2bfdcfdea 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) + int64_t cur_sector, int64_t nr_sectors) { assert(bdrv_dirty_bitmap_enabled(bitmap)); hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors) + int64_t cur_sector, int64_t nr_sectors) { assert(bdrv_dirty_bitmap_enabled(bitmap)); hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); @@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) + int64_t nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { diff --git a/block/gluster.c b/block/gluster.c index 406c1e6357..01b479fbb9 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -11,7 +11,27 @@ #include <glusterfs/api/glfs.h> #include "block/block_int.h" #include "qapi/error.h" +#include "qapi/qmp/qerror.h" #include "qemu/uri.h" +#include "qemu/error-report.h" + +#define GLUSTER_OPT_FILENAME "filename" +#define GLUSTER_OPT_VOLUME "volume" +#define GLUSTER_OPT_PATH "path" +#define GLUSTER_OPT_TYPE "type" +#define GLUSTER_OPT_SERVER_PATTERN "server." +#define GLUSTER_OPT_HOST "host" +#define GLUSTER_OPT_PORT "port" +#define GLUSTER_OPT_TO "to" +#define GLUSTER_OPT_IPV4 "ipv4" +#define GLUSTER_OPT_IPV6 "ipv6" +#define GLUSTER_OPT_SOCKET "socket" +#define GLUSTER_OPT_DEBUG "debug" +#define GLUSTER_DEFAULT_PORT 24007 +#define GLUSTER_DEBUG_DEFAULT 4 +#define GLUSTER_DEBUG_MAX 9 + +#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n" typedef struct GlusterAIOCB { int64_t size; @@ -28,27 +48,141 @@ typedef struct BDRVGlusterState { int debug_level; } BDRVGlusterState; -typedef struct GlusterConf { - char *server; - int port; - char *volname; - char *image; - char *transport; - int debug_level; -} GlusterConf; +typedef struct BDRVGlusterReopenState { + struct glfs *glfs; + struct glfs_fd *fd; +} BDRVGlusterReopenState; -static void qemu_gluster_gconf_free(GlusterConf *gconf) -{ - if (gconf) { - g_free(gconf->server); - g_free(gconf->volname); - g_free(gconf->image); - g_free(gconf->transport); - g_free(gconf); + +static QemuOptsList qemu_gluster_create_opts = { + .name = "qemu-gluster-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode (allowed values: off, full)" + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } } -} +}; + +static QemuOptsList runtime_opts = { + .name = "gluster", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = GLUSTER_OPT_FILENAME, + .type = QEMU_OPT_STRING, + .help = "URL to the gluster image", + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_json_opts = { + .name = "gluster_json", + .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head), + .desc = { + { + .name = GLUSTER_OPT_VOLUME, + .type = QEMU_OPT_STRING, + .help = "name of gluster volume where VM image resides", + }, + { + .name = GLUSTER_OPT_PATH, + .type = QEMU_OPT_STRING, + .help = "absolute path to image file in gluster volume", + }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_type_opts = { + .name = "gluster_type", + .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head), + .desc = { + { + .name = GLUSTER_OPT_TYPE, + .type = QEMU_OPT_STRING, + .help = "tcp|unix", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_unix_opts = { + .name = "gluster_unix", + .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head), + .desc = { + { + .name = GLUSTER_OPT_SOCKET, + .type = QEMU_OPT_STRING, + .help = "socket file path)", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_tcp_opts = { + .name = "gluster_tcp", + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head), + .desc = { + { + .name = GLUSTER_OPT_TYPE, + .type = QEMU_OPT_STRING, + .help = "tcp|unix", + }, + { + .name = GLUSTER_OPT_HOST, + .type = QEMU_OPT_STRING, + .help = "host address (hostname/ipv4/ipv6 addresses)", + }, + { + .name = GLUSTER_OPT_PORT, + .type = QEMU_OPT_NUMBER, + .help = "port number on which glusterd is listening (default 24007)", + }, + { + .name = "to", + .type = QEMU_OPT_NUMBER, + .help = "max port number, not supported by gluster", + }, + { + .name = "ipv4", + .type = QEMU_OPT_BOOL, + .help = "ipv4 bool value, not supported by gluster", + }, + { + .name = "ipv6", + .type = QEMU_OPT_BOOL, + .help = "ipv6 bool value, not supported by gluster", + }, + { /* end of list */ } + }, +}; -static int parse_volume_options(GlusterConf *gconf, char *path) +static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path) { char *p, *q; @@ -62,31 +196,29 @@ static int parse_volume_options(GlusterConf *gconf, char *path) if (*p == '\0') { return -EINVAL; } - gconf->volname = g_strndup(q, p - q); + gconf->volume = g_strndup(q, p - q); - /* image */ + /* path */ p += strspn(p, "/"); if (*p == '\0') { return -EINVAL; } - gconf->image = g_strdup(p); + gconf->path = g_strdup(p); return 0; } /* - * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...] * * 'gluster' is the protocol. * * 'transport' specifies the transport type used to connect to gluster * management daemon (glusterd). Valid transport types are - * tcp, unix and rdma. If a transport type isn't specified, then tcp - * type is assumed. + * tcp or unix. If a transport type isn't specified, then tcp type is assumed. * - * 'server' specifies the server where the volume file specification for - * the given volume resides. This can be either hostname, ipv4 address - * or ipv6 address. ipv6 address needs to be within square brackets [ ]. - * If transport type is 'unix', then 'server' field should not be specified. + * 'host' specifies the host where the volume file specification for + * the given volume resides. This can be either hostname or ipv4 address. + * If transport type is 'unix', then 'host' field should not be specified. * The 'socket' field needs to be populated with the path to unix domain * socket. * @@ -95,23 +227,22 @@ static int parse_volume_options(GlusterConf *gconf, char *path) * default port. If the transport type is unix, then 'port' should not be * specified. * - * 'volname' is the name of the gluster volume which contains the VM image. + * 'volume' is the name of the gluster volume which contains the VM image. * - * 'image' is the path to the actual VM image that resides on gluster volume. + * 'path' is the path to the actual VM image that resides on gluster volume. * * Examples: * * file=gluster://1.2.3.4/testvol/a.img * file=gluster+tcp://1.2.3.4/testvol/a.img * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img - * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img - * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img - * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket - * file=gluster+rdma://1.2.3.4:24007/testvol/a.img */ -static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf, + const char *filename) { + GlusterServer *gsconf; URI *uri; QueryParams *qp = NULL; bool is_unix = false; @@ -122,16 +253,21 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) return -EINVAL; } + gconf->server = g_new0(GlusterServerList, 1); + gconf->server->value = gsconf = g_new0(GlusterServer, 1); + /* transport */ if (!uri->scheme || !strcmp(uri->scheme, "gluster")) { - gconf->transport = g_strdup("tcp"); + gsconf->type = GLUSTER_TRANSPORT_TCP; } else if (!strcmp(uri->scheme, "gluster+tcp")) { - gconf->transport = g_strdup("tcp"); + gsconf->type = GLUSTER_TRANSPORT_TCP; } else if (!strcmp(uri->scheme, "gluster+unix")) { - gconf->transport = g_strdup("unix"); + gsconf->type = GLUSTER_TRANSPORT_UNIX; is_unix = true; } else if (!strcmp(uri->scheme, "gluster+rdma")) { - gconf->transport = g_strdup("rdma"); + gsconf->type = GLUSTER_TRANSPORT_TCP; + error_report("Warning: rdma feature is not supported, falling " + "back to tcp"); } else { ret = -EINVAL; goto out; @@ -157,10 +293,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) ret = -EINVAL; goto out; } - gconf->server = g_strdup(qp->p[0].value); + gsconf->u.q_unix.path = g_strdup(qp->p[0].value); } else { - gconf->server = g_strdup(uri->server ? uri->server : "localhost"); - gconf->port = uri->port; + gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost"); + if (uri->port) { + gsconf->u.tcp.port = g_strdup_printf("%d", uri->port); + } else { + gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT); + } } out: @@ -171,30 +311,34 @@ out: return ret; } -static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, - Error **errp) +static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf, + Error **errp) { - struct glfs *glfs = NULL; + struct glfs *glfs; int ret; int old_errno; + GlusterServerList *server; - ret = qemu_gluster_parseuri(gconf, filename); - if (ret < 0) { - error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/" - "volname/image[?socket=...]"); - errno = -ret; - goto out; - } - - glfs = glfs_new(gconf->volname); + glfs = glfs_new(gconf->volume); if (!glfs) { goto out; } - ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, - gconf->port); - if (ret < 0) { - goto out; + for (server = gconf->server; server; server = server->next) { + if (server->value->type == GLUSTER_TRANSPORT_UNIX) { + ret = glfs_set_volfile_server(glfs, + GlusterTransport_lookup[server->value->type], + server->value->u.q_unix.path, 0); + } else { + ret = glfs_set_volfile_server(glfs, + GlusterTransport_lookup[server->value->type], + server->value->u.tcp.host, + atoi(server->value->u.tcp.port)); + } + + if (ret < 0) { + goto out; + } } ret = glfs_set_logging(glfs, "-", gconf->debug_level); @@ -204,15 +348,25 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, ret = glfs_init(glfs); if (ret) { - error_setg_errno(errp, errno, - "Gluster connection failed for server=%s port=%d " - "volume=%s image=%s transport=%s", gconf->server, - gconf->port, gconf->volname, gconf->image, - gconf->transport); + error_setg(errp, "Gluster connection for volume %s, path %s failed" + " to connect", gconf->volume, gconf->path); + for (server = gconf->server; server; server = server->next) { + if (server->value->type == GLUSTER_TRANSPORT_UNIX) { + error_append_hint(errp, "hint: failed on socket %s ", + server->value->u.q_unix.path); + } else { + error_append_hint(errp, "hint: failed on host %s and port %s ", + server->value->u.tcp.host, + server->value->u.tcp.port); + } + } + + error_append_hint(errp, "Please refer to gluster logs for more info\n"); /* glfs_init sometimes doesn't set errno although docs suggest that */ - if (errno == 0) + if (errno == 0) { errno = EINVAL; + } goto out; } @@ -227,6 +381,226 @@ out: return NULL; } +static int qapi_enum_parse(const char *opt) +{ + int i; + + if (!opt) { + return GLUSTER_TRANSPORT__MAX; + } + + for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) { + if (!strcmp(opt, GlusterTransport_lookup[i])) { + return i; + } + } + + return i; +} + +/* + * Convert the json formatted command line into qapi. +*/ +static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf, + QDict *options, Error **errp) +{ + QemuOpts *opts; + GlusterServer *gsconf; + GlusterServerList *curr = NULL; + QDict *backing_options = NULL; + Error *local_err = NULL; + char *str = NULL; + const char *ptr; + size_t num_servers; + int i; + + /* create opts info from runtime_json_opts list */ + opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + goto out; + } + + num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN); + if (num_servers < 1) { + error_setg(&local_err, QERR_MISSING_PARAMETER, "server"); + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME); + goto out; + } + gconf->volume = g_strdup(ptr); + + ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH); + goto out; + } + gconf->path = g_strdup(ptr); + qemu_opts_del(opts); + + for (i = 0; i < num_servers; i++) { + str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i); + qdict_extract_subqdict(options, &backing_options, str); + + /* create opts info from runtime_type_opts list */ + opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, backing_options, &local_err); + if (local_err) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE); + gsconf = g_new0(GlusterServer, 1); + gsconf->type = qapi_enum_parse(ptr); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + + } + if (gsconf->type == GLUSTER_TRANSPORT__MAX) { + error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, + GLUSTER_OPT_TYPE, "tcp or unix"); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + qemu_opts_del(opts); + + if (gsconf->type == GLUSTER_TRANSPORT_TCP) { + /* create opts info from runtime_tcp_opts list */ + opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, backing_options, &local_err); + if (local_err) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_HOST); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.tcp.host = g_strdup(ptr); + ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_PORT); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.tcp.port = g_strdup(ptr); + + /* defend for unsupported fields in InetSocketAddress, + * i.e. @ipv4, @ipv6 and @to + */ + ptr = qemu_opt_get(opts, GLUSTER_OPT_TO); + if (ptr) { + gsconf->u.tcp.has_to = true; + } + ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4); + if (ptr) { + gsconf->u.tcp.has_ipv4 = true; + } + ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6); + if (ptr) { + gsconf->u.tcp.has_ipv6 = true; + } + if (gsconf->u.tcp.has_to) { + error_setg(&local_err, "Parameter 'to' not supported"); + goto out; + } + if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) { + error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported"); + goto out; + } + qemu_opts_del(opts); + } else { + /* create opts info from runtime_unix_opts list */ + opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, backing_options, &local_err); + if (local_err) { + goto out; + } + + ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET); + if (!ptr) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + GLUSTER_OPT_SOCKET); + error_append_hint(&local_err, GERR_INDEX_HINT, i); + goto out; + } + gsconf->u.q_unix.path = g_strdup(ptr); + qemu_opts_del(opts); + } + + if (gconf->server == NULL) { + gconf->server = g_new0(GlusterServerList, 1); + gconf->server->value = gsconf; + curr = gconf->server; + } else { + curr->next = g_new0(GlusterServerList, 1); + curr->next->value = gsconf; + curr = curr->next; + } + + qdict_del(backing_options, str); + g_free(str); + str = NULL; + } + + return 0; + +out: + error_propagate(errp, local_err); + qemu_opts_del(opts); + if (str) { + qdict_del(backing_options, str); + g_free(str); + } + errno = EINVAL; + return -errno; +} + +static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf, + const char *filename, + QDict *options, Error **errp) +{ + int ret; + if (filename) { + ret = qemu_gluster_parse_uri(gconf, filename); + if (ret < 0) { + error_setg(errp, "invalid URI"); + error_append_hint(errp, "Usage: file=gluster[+transport]://" + "[host[:port]]/volume/path[?socket=...]\n"); + errno = -ret; + return NULL; + } + } else { + ret = qemu_gluster_parse_json(gconf, options, errp); + if (ret < 0) { + error_append_hint(errp, "Usage: " + "-drive driver=qcow2,file.driver=gluster," + "file.volume=testvol,file.path=/path/a.qcow2" + "[,file.debug=9],file.server.0.type=tcp," + "file.server.0.host=1.2.3.4," + "file.server.0.port=24007," + "file.server.1.transport=unix," + "file.server.1.socket=/var/run/glusterd.socket ..." + "\n"); + errno = -ret; + return NULL; + } + + } + + return qemu_gluster_glfs_init(gconf, errp); +} + static void qemu_gluster_complete_aio(void *opaque) { GlusterAIOCB *acb = (GlusterAIOCB *)opaque; @@ -255,30 +629,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) qemu_bh_schedule(acb->bh); } -#define GLUSTER_OPT_FILENAME "filename" -#define GLUSTER_OPT_DEBUG "debug" -#define GLUSTER_DEBUG_DEFAULT 4 -#define GLUSTER_DEBUG_MAX 9 - -/* TODO Convert to fine grained options */ -static QemuOptsList runtime_opts = { - .name = "gluster", - .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), - .desc = { - { - .name = GLUSTER_OPT_FILENAME, - .type = QEMU_OPT_STRING, - .help = "URL to the gluster image", - }, - { - .name = GLUSTER_OPT_DEBUG, - .type = QEMU_OPT_NUMBER, - .help = "Gluster log level, valid range is 0-9", - }, - { /* end of list */ } - }, -}; - static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags) { assert(open_flags != NULL); @@ -324,7 +674,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, BDRVGlusterState *s = bs->opaque; int open_flags = 0; int ret = 0; - GlusterConf *gconf = g_new0(GlusterConf, 1); + BlockdevOptionsGluster *gconf = NULL; QemuOpts *opts; Error *local_err = NULL; const char *filename; @@ -347,8 +697,10 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, s->debug_level = GLUSTER_DEBUG_MAX; } + gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = s->debug_level; - s->glfs = qemu_gluster_init(gconf, filename, errp); + gconf->has_debug_level = true; + s->glfs = qemu_gluster_init(gconf, filename, options, errp); if (!s->glfs) { ret = -errno; goto out; @@ -373,7 +725,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, qemu_gluster_parse_flags(bdrv_flags, &open_flags); - s->fd = glfs_open(s->glfs, gconf->image, open_flags); + s->fd = glfs_open(s->glfs, gconf->path, open_flags); if (!s->fd) { ret = -errno; } @@ -382,7 +734,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, out: qemu_opts_del(opts); - qemu_gluster_gconf_free(gconf); + qapi_free_BlockdevOptionsGluster(gconf); if (!ret) { return ret; } @@ -395,19 +747,13 @@ out: return ret; } -typedef struct BDRVGlusterReopenState { - struct glfs *glfs; - struct glfs_fd *fd; -} BDRVGlusterReopenState; - - static int qemu_gluster_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { int ret = 0; BDRVGlusterState *s; BDRVGlusterReopenState *reop_s; - GlusterConf *gconf = NULL; + BlockdevOptionsGluster *gconf; int open_flags = 0; assert(state != NULL); @@ -420,10 +766,10 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, qemu_gluster_parse_flags(state->flags, &open_flags); - gconf = g_new0(GlusterConf, 1); - + gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = s->debug_level; - reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp); + gconf->has_debug_level = true; + reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp); if (reop_s->glfs == NULL) { ret = -errno; goto exit; @@ -439,7 +785,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, } #endif - reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags); + reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags); if (reop_s->fd == NULL) { /* reops->glfs will be cleaned up in _abort */ ret = -errno; @@ -448,7 +794,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, exit: /* state->opaque will be freed in either the _abort or _commit */ - qemu_gluster_gconf_free(gconf); + qapi_free_BlockdevOptionsGluster(gconf); return ret; } @@ -501,7 +847,9 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state) #ifdef CONFIG_GLUSTERFS_ZEROFILL static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int size, BdrvRequestFlags flags) + int64_t offset, + int size, + BdrvRequestFlags flags) { int ret; GlusterAIOCB acb; @@ -527,7 +875,7 @@ static inline bool gluster_supports_zerofill(void) } static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, - int64_t size) + int64_t size) { return glfs_zerofill(fd, offset, size); } @@ -539,7 +887,7 @@ static inline bool gluster_supports_zerofill(void) } static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, - int64_t size) + int64_t size) { return 0; } @@ -548,14 +896,15 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, static int qemu_gluster_create(const char *filename, QemuOpts *opts, Error **errp) { + BlockdevOptionsGluster *gconf; struct glfs *glfs; struct glfs_fd *fd; int ret = 0; int prealloc = 0; int64_t total_size = 0; char *tmp = NULL; - GlusterConf *gconf = g_new0(GlusterConf, 1); + gconf = g_new0(BlockdevOptionsGluster, 1); gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG, GLUSTER_DEBUG_DEFAULT); if (gconf->debug_level < 0) { @@ -563,8 +912,9 @@ static int qemu_gluster_create(const char *filename, } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) { gconf->debug_level = GLUSTER_DEBUG_MAX; } + gconf->has_debug_level = true; - glfs = qemu_gluster_init(gconf, filename, errp); + glfs = qemu_gluster_init(gconf, filename, NULL, errp); if (!glfs) { ret = -errno; goto out; @@ -576,19 +926,17 @@ static int qemu_gluster_create(const char *filename, tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); if (!tmp || !strcmp(tmp, "off")) { prealloc = 0; - } else if (!strcmp(tmp, "full") && - gluster_supports_zerofill()) { + } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) { prealloc = 1; } else { error_setg(errp, "Invalid preallocation mode: '%s'" - " or GlusterFS doesn't support zerofill API", - tmp); + " or GlusterFS doesn't support zerofill API", tmp); ret = -EINVAL; goto out; } - fd = glfs_creat(glfs, gconf->image, - O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + fd = glfs_creat(glfs, gconf->path, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); if (!fd) { ret = -errno; } else { @@ -606,7 +954,7 @@ static int qemu_gluster_create(const char *filename, } out: g_free(tmp); - qemu_gluster_gconf_free(gconf); + qapi_free_BlockdevOptionsGluster(gconf); if (glfs) { glfs_fini(glfs); } @@ -614,7 +962,8 @@ out: } static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov, int write) { int ret; GlusterAIOCB acb; @@ -629,10 +978,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, - gluster_finish_aiocb, &acb); + gluster_finish_aiocb, &acb); } else { ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, - gluster_finish_aiocb, &acb); + gluster_finish_aiocb, &acb); } if (ret < 0) { @@ -657,13 +1006,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) } static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov) { return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0); } static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, + int nb_sectors, + QEMUIOVector *qiov) { return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); } @@ -724,14 +1077,12 @@ error: } #ifdef CONFIG_GLUSTERFS_DISCARD -static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs, + int64_t offset, int size) { int ret; GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; - size_t size = nb_sectors * BDRV_SECTOR_SIZE; - off_t offset = sector_num * BDRV_SECTOR_SIZE; acb.size = 0; acb.ret = 0; @@ -934,34 +1285,11 @@ static int64_t coroutine_fn qemu_gluster_co_get_block_status( } -static QemuOptsList qemu_gluster_create_opts = { - .name = "qemu-gluster-create-opts", - .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, - .type = QEMU_OPT_SIZE, - .help = "Virtual disk size" - }, - { - .name = BLOCK_OPT_PREALLOC, - .type = QEMU_OPT_STRING, - .help = "Preallocation mode (allowed values: off, full)" - }, - { - .name = GLUSTER_OPT_DEBUG, - .type = QEMU_OPT_NUMBER, - .help = "Gluster log level, valid range is 0-9", - }, - { /* end of list */ } - } -}; - static BlockDriver bdrv_gluster = { .format_name = "gluster", .protocol_name = "gluster", .instance_size = sizeof(BDRVGlusterState), - .bdrv_needs_filename = true, + .bdrv_needs_filename = false, .bdrv_file_open = qemu_gluster_open, .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, .bdrv_reopen_commit = qemu_gluster_reopen_commit, @@ -976,7 +1304,7 @@ static BlockDriver bdrv_gluster = { .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_co_discard = qemu_gluster_co_discard, + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, @@ -989,7 +1317,7 @@ static BlockDriver bdrv_gluster_tcp = { .format_name = "gluster", .protocol_name = "gluster+tcp", .instance_size = sizeof(BDRVGlusterState), - .bdrv_needs_filename = true, + .bdrv_needs_filename = false, .bdrv_file_open = qemu_gluster_open, .bdrv_reopen_prepare = qemu_gluster_reopen_prepare, .bdrv_reopen_commit = qemu_gluster_reopen_commit, @@ -1004,7 +1332,7 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_co_discard = qemu_gluster_co_discard, + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, @@ -1032,7 +1360,7 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_co_discard = qemu_gluster_co_discard, + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, @@ -1041,6 +1369,12 @@ static BlockDriver bdrv_gluster_unix = { .create_opts = &qemu_gluster_create_opts, }; +/* rdma is deprecated (actually never supported for volfile fetch). + * Let's maintain it for the protocol compatibility, to make sure things + * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp + * protocol with a warning. + * TODO: remove gluster+rdma interface support + */ static BlockDriver bdrv_gluster_rdma = { .format_name = "gluster", .protocol_name = "gluster+rdma", @@ -1060,7 +1394,7 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_co_discard = qemu_gluster_co_discard, + .bdrv_co_pdiscard = qemu_gluster_co_pdiscard, #endif #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, diff --git a/block/io.c b/block/io.c index cfda7148d8..7323f0fb7b 100644 --- a/block/io.c +++ b/block/io.c @@ -33,14 +33,13 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BdrvRequestFlags flags, - BlockCompletionFunc *cb, - void *opaque, - bool is_write); +static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, + int64_t offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags, + BlockCompletionFunc *cb, + void *opaque, + bool is_write); static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int count, BdrvRequestFlags flags); @@ -971,21 +970,25 @@ err: /* * Forwards an already correctly aligned request to the BlockDriver. This - * handles copy on read and zeroing after EOF; any other features must be - * implemented by the caller. + * handles copy on read, zeroing after EOF, and fragmentation of large + * reads; any other features must be implemented by the caller. */ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { int64_t total_bytes, max_bytes; - int ret; + int ret = 0; + uint64_t bytes_remaining = bytes; + int max_transfer; assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); + max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), + align); /* TODO: We would need a per-BDS .supported_read_flags and * potential fallback support, if we ever implement any read flags @@ -1024,7 +1027,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } } - /* Forward the request to the BlockDriver */ + /* Forward the request to the BlockDriver, possibly fragmenting it */ total_bytes = bdrv_getlength(bs); if (total_bytes < 0) { ret = total_bytes; @@ -1032,30 +1035,39 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); - if (bytes <= max_bytes) { + if (bytes <= max_bytes && bytes <= max_transfer) { ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else if (max_bytes > 0) { - QEMUIOVector local_qiov; + goto out; + } - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes); + while (bytes_remaining) { + int num; - ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0); + if (max_bytes) { + QEMUIOVector local_qiov; - qemu_iovec_destroy(&local_qiov); - } else { - ret = 0; - } + num = MIN(bytes_remaining, MIN(max_bytes, max_transfer)); + assert(num); + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); - /* Reading beyond end of file is supposed to produce zeroes */ - if (ret == 0 && total_bytes < offset + bytes) { - uint64_t zero_offset = MAX(0, total_bytes - offset); - uint64_t zero_bytes = offset + bytes - zero_offset; - qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes); + ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining, + num, &local_qiov, 0); + max_bytes -= num; + qemu_iovec_destroy(&local_qiov); + } else { + num = bytes_remaining; + ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0, + bytes_remaining); + } + if (ret < 0) { + goto out; + } + bytes_remaining -= num; } out: - return ret; + return ret < 0 ? ret : 0; } /* @@ -1256,7 +1268,8 @@ fail: } /* - * Forwards an already correctly aligned write request to the BlockDriver. + * Forwards an already correctly aligned write request to the BlockDriver, + * after possibly fragmenting it. */ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, @@ -1268,6 +1281,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, int64_t start_sector = offset >> BDRV_SECTOR_BITS; int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + uint64_t bytes_remaining = bytes; + int max_transfer; assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); @@ -1275,6 +1290,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); assert(!(flags & ~BDRV_REQ_MASK)); + max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), + align); waited = wait_serialising_requests(req); assert(!waited || !req->serialising); @@ -1297,9 +1314,34 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, } else if (flags & BDRV_REQ_ZERO_WRITE) { bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); - } else { + } else if (bytes <= max_transfer) { bdrv_debug_event(bs, BLKDBG_PWRITEV); ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags); + } else { + bdrv_debug_event(bs, BLKDBG_PWRITEV); + while (bytes_remaining) { + int num = MIN(bytes_remaining, max_transfer); + QEMUIOVector local_qiov; + int local_flags = flags; + + assert(num); + if (num < bytes_remaining && (flags & BDRV_REQ_FUA) && + !(bs->supported_write_flags & BDRV_REQ_FUA)) { + /* If FUA is going to be emulated by flush, we only + * need to flush on the last iteration */ + local_flags &= ~BDRV_REQ_FUA; + } + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); + + ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining, + num, &local_qiov, local_flags); + qemu_iovec_destroy(&local_qiov); + if (ret < 0) { + break; + } + bytes_remaining -= num; + } } bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); @@ -1312,6 +1354,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, if (ret >= 0) { bs->total_sectors = MAX(bs->total_sectors, end_sector); + ret = 0; } return ret; @@ -1971,8 +2014,9 @@ BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num, { trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque); - return bdrv_co_aio_rw_vector(child, sector_num, qiov, nb_sectors, 0, - cb, opaque, false); + assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size); + return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov, + 0, cb, opaque, false); } BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num, @@ -1981,8 +2025,9 @@ BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num, { trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque); - return bdrv_co_aio_rw_vector(child, sector_num, qiov, nb_sectors, 0, - cb, opaque, true); + assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size); + return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov, + 0, cb, opaque, true); } void bdrv_aio_cancel(BlockAIOCB *acb) @@ -2018,8 +2063,8 @@ typedef struct BlockRequest { union { /* Used during read, write, trim */ struct { - int64_t sector; - int nb_sectors; + int64_t offset; + int bytes; int flags; QEMUIOVector *qiov; }; @@ -2083,24 +2128,23 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) BlockAIOCBCoroutine *acb = opaque; if (!acb->is_write) { - acb->req.error = bdrv_co_do_readv(acb->child, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov, acb->req.flags); + acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset, + acb->req.qiov->size, acb->req.qiov, acb->req.flags); } else { - acb->req.error = bdrv_co_do_writev(acb->child, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov, acb->req.flags); + acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset, + acb->req.qiov->size, acb->req.qiov, acb->req.flags); } bdrv_co_complete(acb); } -static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BdrvRequestFlags flags, - BlockCompletionFunc *cb, - void *opaque, - bool is_write) +static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, + int64_t offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags, + BlockCompletionFunc *cb, + void *opaque, + bool is_write) { Coroutine *co; BlockAIOCBCoroutine *acb; @@ -2109,8 +2153,7 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child, acb->child = child; acb->need_bh = true; acb->req.error = -EINPROGRESS; - acb->req.sector = sector_num; - acb->req.nb_sectors = nb_sectors; + acb->req.offset = offset; acb->req.qiov = qiov; acb->req.flags = flags; acb->is_write = is_write; @@ -2150,30 +2193,29 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, return &acb->common; } -static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) +static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque) { BlockAIOCBCoroutine *acb = opaque; BlockDriverState *bs = acb->common.bs; - acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); + acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes); bdrv_co_complete(acb); } -BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) +BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count, + BlockCompletionFunc *cb, void *opaque) { Coroutine *co; BlockAIOCBCoroutine *acb; - trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); + trace_bdrv_aio_pdiscard(bs, offset, count, opaque); acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->need_bh = true; acb->req.error = -EINPROGRESS; - acb->req.sector = sector_num; - acb->req.nb_sectors = nb_sectors; - co = qemu_coroutine_create(bdrv_aio_discard_co_entry, acb); + acb->req.offset = offset; + acb->req.bytes = count; + co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb); qemu_coroutine_enter(co); bdrv_co_maybe_schedule_bh(acb); @@ -2346,28 +2388,29 @@ int bdrv_flush(BlockDriverState *bs) typedef struct DiscardCo { BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; + int64_t offset; + int count; int ret; } DiscardCo; -static void coroutine_fn bdrv_discard_co_entry(void *opaque) +static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) { DiscardCo *rwco = opaque; - rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); + rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count); } -int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, + int count) { BdrvTrackedRequest req; - int max_discard, ret; + int max_pdiscard, ret; + int head, align; if (!bs->drv) { return -ENOMEDIUM; } - ret = bdrv_check_request(bs, sector_num, nb_sectors); + ret = bdrv_check_byte_request(bs, offset, count); if (ret < 0) { return ret; } else if (bs->read_only) { @@ -2380,50 +2423,47 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return 0; } - if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) { + if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) { + return 0; + } + + /* Discard is advisory, so ignore any unaligned head or tail */ + align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); + assert(is_power_of_2(align)); + head = MIN(count, -offset & (align - 1)); + if (head) { + count -= head; + offset += head; + } + count = QEMU_ALIGN_DOWN(count, align); + if (!count) { return 0; } - tracked_request_begin(&req, bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, BDRV_TRACKED_DISCARD); + tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); if (ret < 0) { goto out; } - max_discard = MIN_NON_ZERO(bs->bl.max_pdiscard >> BDRV_SECTOR_BITS, - BDRV_REQUEST_MAX_SECTORS); - while (nb_sectors > 0) { - int ret; - int num = nb_sectors; - int discard_alignment = bs->bl.pdiscard_alignment >> BDRV_SECTOR_BITS; - - /* align request */ - if (discard_alignment && - num >= discard_alignment && - sector_num % discard_alignment) { - if (num > discard_alignment) { - num = discard_alignment; - } - num -= sector_num % discard_alignment; - } + max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX), + align); - /* limit request size */ - if (num > max_discard) { - num = max_discard; - } + while (count > 0) { + int ret; + int num = MIN(count, max_pdiscard); - if (bs->drv->bdrv_co_discard) { - ret = bs->drv->bdrv_co_discard(bs, sector_num, num); + if (bs->drv->bdrv_co_pdiscard) { + ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); } else { BlockAIOCB *acb; CoroutineIOCompletion co = { .coroutine = qemu_coroutine_self(), }; - acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, - bdrv_co_io_em_complete, &co); + acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num, + bdrv_co_io_em_complete, &co); if (acb == NULL) { ret = -EIO; goto out; @@ -2436,8 +2476,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, goto out; } - sector_num += num; - nb_sectors -= num; + offset += num; + count -= num; } ret = 0; out: @@ -2448,23 +2488,23 @@ out: return ret; } -int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) +int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count) { Coroutine *co; DiscardCo rwco = { .bs = bs, - .sector_num = sector_num, - .nb_sectors = nb_sectors, + .offset = offset, + .count = count, .ret = NOT_DONE, }; if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ - bdrv_discard_co_entry(&rwco); + bdrv_pdiscard_co_entry(&rwco); } else { AioContext *aio_context = bdrv_get_aio_context(bs); - co = qemu_coroutine_create(bdrv_discard_co_entry, &rwco); + co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco); qemu_coroutine_enter(co); while (rwco.ret == NOT_DONE) { aio_poll(aio_context, true); diff --git a/block/iscsi.c b/block/iscsi.c index 129c3afa68..95ce9e139e 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -586,11 +586,8 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, return -EINVAL; } - if (bs->bl.max_transfer && - nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) { - error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len " - "of %" PRIu32 " bytes", nb_sectors, bs->bl.max_transfer); - return -EINVAL; + if (bs->bl.max_transfer) { + assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer); } lba = sector_qemu2lun(sector_num, iscsilun); @@ -754,11 +751,8 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, return -EINVAL; } - if (bs->bl.max_transfer && - nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) { - error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len " - "of %" PRIu32 " bytes", nb_sectors, bs->bl.max_transfer); - return -EINVAL; + if (bs->bl.max_transfer) { + assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer); } /* if cache.direct is off and we have a valid entry in our allocation map @@ -1048,29 +1042,26 @@ iscsi_getlength(BlockDriverState *bs) } static int -coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; struct unmap_list list; - if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { - return -EINVAL; - } + assert(is_byte_request_lun_aligned(offset, count, iscsilun)); if (!iscsilun->lbp.lbpu) { /* UNMAP is not supported by the target */ return 0; } - list.lba = sector_qemu2lun(sector_num, iscsilun); - list.num = sector_qemu2lun(nb_sectors, iscsilun); + list.lba = offset / iscsilun->block_size; + list.num = count / iscsilun->block_size; iscsi_co_init_iscsitask(iscsilun, &iTask); retry: if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, - iscsi_co_generic_cb, &iTask) == NULL) { + iscsi_co_generic_cb, &iTask) == NULL) { return -ENOMEM; } @@ -1100,7 +1091,8 @@ retry: return iTask.err_code; } - iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors); + iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, + count >> BDRV_SECTOR_BITS); return 0; } @@ -2004,7 +1996,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_refresh_limits = iscsi_refresh_limits, .bdrv_co_get_block_status = iscsi_co_get_block_status, - .bdrv_co_discard = iscsi_co_discard, + .bdrv_co_pdiscard = iscsi_co_pdiscard, .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, .bdrv_co_readv = iscsi_co_readv, .bdrv_co_writev_flags = iscsi_co_writev_flags, diff --git a/block/mirror.c b/block/mirror.c index 9ae11e5276..69a1a7cc96 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -58,9 +58,10 @@ typedef struct MirrorBlockJob { QSIMPLEQ_HEAD(, MirrorBuffer) buf_free; int buf_free_count; + uint64_t last_pause_ns; unsigned long *in_flight_bitmap; int in_flight; - int sectors_in_flight; + int64_t sectors_in_flight; int ret; bool unmap; bool waiting_for_io; @@ -303,8 +304,9 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, s->in_flight++; s->sectors_in_flight += nb_sectors; if (is_discard) { - blk_aio_discard(s->target, sector_num, op->nb_sectors, - mirror_write_complete, op); + blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS, + op->nb_sectors << BDRV_SECTOR_BITS, + mirror_write_complete, op); } else { blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE, op->nb_sectors * BDRV_SECTOR_SIZE, @@ -322,6 +324,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int nb_chunks = 1; int64_t end = s->bdev_length / BDRV_SECTOR_SIZE; int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); sector_num = hbitmap_iter_next(&s->hbi); if (sector_num < 0) { @@ -372,7 +375,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks); while (nb_chunks > 0 && sector_num < end) { int ret; - int io_sectors; + int io_sectors, io_sectors_acct; BlockDriverState *file; enum MirrorMethod { MIRROR_METHOD_COPY, @@ -405,16 +408,26 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } } + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); + mirror_wait_for_io(s); + } + mirror_clip_sectors(s, sector_num, &io_sectors); switch (mirror_method) { case MIRROR_METHOD_COPY: io_sectors = mirror_do_read(s, sector_num, io_sectors); + io_sectors_acct = io_sectors; break; case MIRROR_METHOD_ZERO: - mirror_do_zero_or_discard(s, sector_num, io_sectors, false); - break; case MIRROR_METHOD_DISCARD: - mirror_do_zero_or_discard(s, sector_num, io_sectors, true); + mirror_do_zero_or_discard(s, sector_num, io_sectors, + mirror_method == MIRROR_METHOD_DISCARD); + if (write_zeroes_ok) { + io_sectors_acct = 0; + } else { + io_sectors_acct = io_sectors; + } break; default: abort(); @@ -423,7 +436,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) sector_num += io_sectors; nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk); if (s->common.speed) { - delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors); + delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct); } } return delay_ns; @@ -511,19 +524,94 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_unref(src); } +static void mirror_throttle(MirrorBlockJob *s) +{ + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + if (now - s->last_pause_ns > SLICE_TIME) { + s->last_pause_ns = now; + block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); + } else { + block_job_pause_point(&s->common); + } +} + +static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) +{ + int64_t sector_num, end; + BlockDriverState *base = s->base; + BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *target_bs = blk_bs(s->target); + int ret, n; + + end = s->bdev_length / BDRV_SECTOR_SIZE; + + if (base == NULL && !bdrv_has_zero_init(target_bs)) { + if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); + return 0; + } + + for (sector_num = 0; sector_num < end; ) { + int nb_sectors = MIN(end - sector_num, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS); + + mirror_throttle(s); + + if (block_job_is_cancelled(&s->common)) { + return 0; + } + + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1); + mirror_wait_for_io(s); + continue; + } + + mirror_do_zero_or_discard(s, sector_num, nb_sectors, false); + sector_num += nb_sectors; + } + + mirror_drain(s); + } + + /* First part, loop on the sectors and initialize the dirty bitmap. */ + for (sector_num = 0; sector_num < end; ) { + /* Just to make sure we are not exceeding int limit. */ + int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, + end - sector_num); + + mirror_throttle(s); + + if (block_job_is_cancelled(&s->common)) { + return 0; + } + + ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n); + if (ret < 0) { + return ret; + } + + assert(n > 0); + if (ret == 1) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); + } + sector_num += n; + } + return 0; +} + static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; BlockDriverState *bs = blk_bs(s->common.blk); BlockDriverState *target_bs = blk_bs(s->target); - int64_t sector_num, end, length; - uint64_t last_pause_ns; + int64_t length; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ int ret = 0; - int n; int target_cluster_size = BDRV_SECTOR_SIZE; if (block_job_is_cancelled(&s->common)) { @@ -565,7 +653,6 @@ static void coroutine_fn mirror_run(void *opaque) s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS; s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); - end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); if (s->buf == NULL) { ret = -ENOMEM; @@ -574,47 +661,18 @@ static void coroutine_fn mirror_run(void *opaque) mirror_free_init(s); - last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); if (!s->is_none_mode) { - /* First part, loop on the sectors and initialize the dirty bitmap. */ - BlockDriverState *base = s->base; - bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs); - - for (sector_num = 0; sector_num < end; ) { - /* Just to make sure we are not exceeding int limit. */ - int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, - end - sector_num); - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - - if (now - last_pause_ns > SLICE_TIME) { - last_pause_ns = now; - block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0); - } else { - block_job_pause_point(&s->common); - } - - if (block_job_is_cancelled(&s->common)) { - goto immediate_exit; - } - - ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n); - - if (ret < 0) { - goto immediate_exit; - } - - assert(n > 0); - if (ret == 1 || mark_all_dirty) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); - } - sector_num += n; + ret = mirror_dirty_init(s); + if (ret < 0 || block_job_is_cancelled(&s->common)) { + goto immediate_exit; } } bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); for (;;) { uint64_t delay_ns = 0; - int64_t cnt; + int64_t cnt, delta; bool should_complete; if (s->ret < 0) { @@ -637,9 +695,10 @@ static void coroutine_fn mirror_run(void *opaque) * We do so every SLICE_TIME nanoseconds, or when there is an error, * or when the source is clean, whichever comes first. */ - if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME && + delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns; + if (delta < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { - if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); mirror_wait_for_io(s); @@ -707,7 +766,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.cancelled = false; break; } - last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: diff --git a/block/nbd-client.c b/block/nbd-client.c index 4cc408d206..2cf3237ef3 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -116,7 +116,7 @@ static void nbd_restart_write(void *opaque) static int nbd_co_send_request(BlockDriverState *bs, struct nbd_request *request, - QEMUIOVector *qiov, int offset) + QEMUIOVector *qiov) { NbdClientSession *s = nbd_get_client_session(bs); AioContext *aio_context; @@ -149,8 +149,8 @@ static int nbd_co_send_request(BlockDriverState *bs, qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); if (rc >= 0) { - ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, - offset, request->len, 0); + ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, + false); if (ret != request->len) { rc = -EIO; } @@ -167,8 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs, } static void nbd_co_receive_reply(NbdClientSession *s, - struct nbd_request *request, struct nbd_reply *reply, - QEMUIOVector *qiov, int offset) + struct nbd_request *request, + struct nbd_reply *reply, + QEMUIOVector *qiov) { int ret; @@ -181,8 +182,8 @@ static void nbd_co_receive_reply(NbdClientSession *s, reply->error = EIO; } else { if (qiov && reply->error == 0) { - ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, - offset, request->len, 1); + ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, + true); if (ret != request->len) { reply->error = EIO; } @@ -217,36 +218,41 @@ static void nbd_coroutine_end(NbdClientSession *s, } } -static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) +int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) { NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { .type = NBD_CMD_READ }; + struct nbd_request request = { + .type = NBD_CMD_READ, + .from = offset, + .len = bytes, + }; struct nbd_reply reply; ssize_t ret; - request.from = sector_num * 512; - request.len = nb_sectors * 512; + assert(bytes <= NBD_MAX_BUFFER_SIZE); + assert(!flags); nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(bs, &request, NULL, 0); + ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(client, &request, &reply, qiov, offset); + nbd_co_receive_reply(client, &request, &reply, qiov); } nbd_coroutine_end(client, &request); return -reply.error; - } -static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset, int flags) +int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) { NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { .type = NBD_CMD_WRITE }; + struct nbd_request request = { + .type = NBD_CMD_WRITE, + .from = offset, + .len = bytes, + }; struct nbd_reply reply; ssize_t ret; @@ -255,55 +261,19 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, request.type |= NBD_CMD_FLAG_FUA; } - request.from = sector_num * 512; - request.len = nb_sectors * 512; + assert(bytes <= NBD_MAX_BUFFER_SIZE); nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(bs, &request, qiov, offset); + ret = nbd_co_send_request(bs, &request, qiov); if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(client, &request, &reply, NULL, 0); + nbd_co_receive_reply(client, &request, &reply, NULL); } nbd_coroutine_end(client, &request); return -reply.error; } -int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - int offset = 0; - int ret; - while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); - if (ret < 0) { - return ret; - } - offset += NBD_MAX_SECTORS * 512; - sector_num += NBD_MAX_SECTORS; - nb_sectors -= NBD_MAX_SECTORS; - } - return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset); -} - -int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, int flags) -{ - int offset = 0; - int ret; - while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset, - flags); - if (ret < 0) { - return ret; - } - offset += NBD_MAX_SECTORS * 512; - sector_num += NBD_MAX_SECTORS; - nb_sectors -= NBD_MAX_SECTORS; - } - return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags); -} - int nbd_client_co_flush(BlockDriverState *bs) { NbdClientSession *client = nbd_get_client_session(bs); @@ -319,36 +289,37 @@ int nbd_client_co_flush(BlockDriverState *bs) request.len = 0; nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(bs, &request, NULL, 0); + ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(client, &request, &reply, NULL, 0); + nbd_co_receive_reply(client, &request, &reply, NULL); } nbd_coroutine_end(client, &request); return -reply.error; } -int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) { NbdClientSession *client = nbd_get_client_session(bs); - struct nbd_request request = { .type = NBD_CMD_TRIM }; + struct nbd_request request = { + .type = NBD_CMD_TRIM, + .from = offset, + .len = count, + }; struct nbd_reply reply; ssize_t ret; if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) { return 0; } - request.from = sector_num * 512; - request.len = nb_sectors * 512; nbd_coroutine_start(client, &request); - ret = nbd_co_send_request(bs, &request, NULL, 0); + ret = nbd_co_send_request(bs, &request, NULL); if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(client, &request, &reply, NULL, 0); + nbd_co_receive_reply(client, &request, &reply, NULL); } nbd_coroutine_end(client, &request); return -reply.error; diff --git a/block/nbd-client.h b/block/nbd-client.h index c618dadc39..fa9817b7d7 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -44,13 +44,12 @@ int nbd_client_init(BlockDriverState *bs, Error **errp); void nbd_client_close(BlockDriverState *bs); -int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors); +int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); int nbd_client_co_flush(BlockDriverState *bs); -int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, int flags); -int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); +int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags); +int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags); void nbd_client_detach_aio_context(BlockDriverState *bs); void nbd_client_attach_aio_context(BlockDriverState *bs, diff --git a/block/nbd.c b/block/nbd.c index 08e5b67b2f..8d57220f18 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -349,12 +349,6 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, return ret; } -static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov); -} - static int nbd_co_flush(BlockDriverState *bs) { return nbd_client_co_flush(bs); @@ -366,12 +360,6 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; } -static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) -{ - return nbd_client_co_discard(bs, sector_num, nb_sectors); -} - static void nbd_close(BlockDriverState *bs) { nbd_client_close(bs); @@ -450,11 +438,11 @@ static BlockDriver bdrv_nbd = { .instance_size = sizeof(BDRVNBDState), .bdrv_parse_filename = nbd_parse_filename, .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev_flags = nbd_client_co_writev, + .bdrv_co_preadv = nbd_client_co_preadv, + .bdrv_co_pwritev = nbd_client_co_pwritev, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, - .bdrv_co_discard = nbd_co_discard, + .bdrv_co_pdiscard = nbd_client_co_pdiscard, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_detach_aio_context, @@ -468,11 +456,11 @@ static BlockDriver bdrv_nbd_tcp = { .instance_size = sizeof(BDRVNBDState), .bdrv_parse_filename = nbd_parse_filename, .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev_flags = nbd_client_co_writev, + .bdrv_co_preadv = nbd_client_co_preadv, + .bdrv_co_pwritev = nbd_client_co_pwritev, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, - .bdrv_co_discard = nbd_co_discard, + .bdrv_co_pdiscard = nbd_client_co_pdiscard, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_detach_aio_context, @@ -486,11 +474,11 @@ static BlockDriver bdrv_nbd_unix = { .instance_size = sizeof(BDRVNBDState), .bdrv_parse_filename = nbd_parse_filename, .bdrv_file_open = nbd_open, - .bdrv_co_readv = nbd_co_readv, - .bdrv_co_writev_flags = nbd_client_co_writev, + .bdrv_co_preadv = nbd_client_co_preadv, + .bdrv_co_pwritev = nbd_client_co_pwritev, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, - .bdrv_co_discard = nbd_co_discard, + .bdrv_co_pdiscard = nbd_client_co_pdiscard, .bdrv_refresh_limits = nbd_refresh_limits, .bdrv_getlength = nbd_getlength, .bdrv_detach_aio_context = nbd_detach_aio_context, diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 49b6ce6bfd..cbfb3fe064 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -615,9 +615,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret) /* Discard is optional, ignore the return value */ if (ret >= 0) { - bdrv_discard(bs->file->bs, - d->offset >> BDRV_SECTOR_BITS, - d->bytes >> BDRV_SECTOR_BITS); + bdrv_pdiscard(bs->file->bs, d->offset, d->bytes); } g_free(d); diff --git a/block/qcow2.c b/block/qcow2.c index a6bca735e5..d620d0a85b 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2479,15 +2479,15 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, return ret; } -static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, + int64_t offset, int count) { int ret; BDRVQcow2State *s = bs->opaque; qemu_co_mutex_lock(&s->lock); - ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors, QCOW2_DISCARD_REQUEST, false); + ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS, + QCOW2_DISCARD_REQUEST, false); qemu_co_mutex_unlock(&s->lock); return ret; } @@ -3410,7 +3410,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_co_flush_to_os = qcow2_co_flush_to_os, .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, - .bdrv_co_discard = qcow2_co_discard, + .bdrv_co_pdiscard = qcow2_co_pdiscard, .bdrv_truncate = qcow2_truncate, .bdrv_write_compressed = qcow2_write_compressed, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/raw-posix.c b/block/raw-posix.c index 20f4d7aa8d..6ed7547392 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1214,7 +1214,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd, } static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + int64_t offset, QEMUIOVector *qiov, int count, BlockCompletionFunc *cb, void *opaque, int type) { RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); @@ -1224,8 +1224,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, acb->aio_type = type; acb->aio_fildes = fd; - acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE; - acb->aio_offset = sector_num * BDRV_SECTOR_SIZE; + acb->aio_nbytes = count; + acb->aio_offset = offset; if (qiov) { acb->aio_iov = qiov->iov; @@ -1233,7 +1233,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, assert(qiov->size == acb->aio_nbytes); } - trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); + trace_paio_submit(acb, opaque, offset, count, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } @@ -1786,13 +1786,13 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, return ret | BDRV_BLOCK_OFFSET_VALID | start; } -static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, +static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, + int64_t offset, int count, BlockCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + return paio_submit(bs, s->fd, offset, NULL, count, cb, opaque, QEMU_AIO_DISCARD); } @@ -1864,7 +1864,7 @@ BlockDriver bdrv_file = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, .bdrv_aio_flush = raw_aio_flush, - .bdrv_aio_discard = raw_aio_discard, + .bdrv_aio_pdiscard = raw_aio_pdiscard, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, @@ -2203,8 +2203,8 @@ static int fd_open(BlockDriverState *bs) return -EIO; } -static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, +static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs, + int64_t offset, int count, BlockCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; @@ -2212,7 +2212,7 @@ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs, if (fd_open(bs) < 0) { return NULL; } - return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + return paio_submit(bs, s->fd, offset, NULL, count, cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); } @@ -2307,7 +2307,7 @@ static BlockDriver bdrv_host_device = { .bdrv_co_preadv = raw_co_preadv, .bdrv_co_pwritev = raw_co_pwritev, .bdrv_aio_flush = raw_aio_flush, - .bdrv_aio_discard = hdev_aio_discard, + .bdrv_aio_pdiscard = hdev_aio_pdiscard, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, diff --git a/block/raw-win32.c b/block/raw-win32.c index 9b813d99ae..56f45fea9e 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -142,7 +142,7 @@ static int aio_worker(void *arg) } static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + int64_t offset, QEMUIOVector *qiov, int count, BlockCompletionFunc *cb, void *opaque, int type) { RawWin32AIOData *acb = g_new(RawWin32AIOData, 1); @@ -155,11 +155,12 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, if (qiov) { acb->aio_iov = qiov->iov; acb->aio_niov = qiov->niov; + assert(qiov->size == count); } - acb->aio_nbytes = nb_sectors * 512; - acb->aio_offset = sector_num * 512; + acb->aio_nbytes = count; + acb->aio_offset = offset; - trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); + trace_paio_submit(acb, opaque, offset, count, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } @@ -378,9 +379,10 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs, BDRVRawState *s = bs->opaque; if (s->aio) { return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, - nb_sectors, cb, opaque, QEMU_AIO_READ); + nb_sectors, cb, opaque, QEMU_AIO_READ); } else { - return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov, + nb_sectors << BDRV_SECTOR_BITS, cb, opaque, QEMU_AIO_READ); } } @@ -392,9 +394,10 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs, BDRVRawState *s = bs->opaque; if (s->aio) { return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, - nb_sectors, cb, opaque, QEMU_AIO_WRITE); + nb_sectors, cb, opaque, QEMU_AIO_WRITE); } else { - return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov, + nb_sectors << BDRV_SECTOR_BITS, cb, opaque, QEMU_AIO_WRITE); } } diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 5f9dd299a6..588d4080f9 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -50,33 +50,30 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state, return 0; } -static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov); + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } -static int coroutine_fn -raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - QEMUIOVector *qiov, int flags) +static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, + int flags) { void *buf = NULL; BlockDriver *drv; QEMUIOVector local_qiov; int ret; - if (bs->probed && sector_num == 0) { - /* As long as these conditions are true, we can't get partial writes to - * the probe buffer and can just directly check the request. */ + if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { + /* Handling partial writes would be a pain - so we just + * require that guests have 512-byte request alignment if + * probing occurred */ QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512); QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512); - - if (nb_sectors == 0) { - /* qemu_iovec_to_buf() would fail, but we want to return success - * instead of -EINVAL in this case. */ - return 0; - } + assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE); buf = qemu_try_blockalign(bs->file->bs, 512); if (!buf) { @@ -105,8 +102,7 @@ raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_co_pwritev(bs->file, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, qiov, flags); + ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); fail: if (qiov == &local_qiov) { @@ -134,10 +130,10 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); } -static int coroutine_fn raw_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, + int64_t offset, int count) { - return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors); + return bdrv_co_pdiscard(bs->file->bs, offset, count); } static int64_t raw_getlength(BlockDriverState *bs) @@ -150,6 +146,16 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) return bdrv_get_info(bs->file->bs, bdi); } +static void raw_refresh_limits(BlockDriverState *bs, Error **errp) +{ + if (bs->probed) { + /* To make it easier to protect the first sector, any probed + * image is restricted to read-modify-write on sub-sector + * operations. */ + bs->bl.request_alignment = BDRV_SECTOR_SIZE; + } +} + static int raw_truncate(BlockDriverState *bs, int64_t offset) { return bdrv_truncate(bs->file->bs, offset); @@ -192,8 +198,10 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { bs->sg = bs->file->bs->sg; - bs->supported_write_flags = BDRV_REQ_FUA; - bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP; + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags; if (bs->probed && !bdrv_is_read_only(bs)) { fprintf(stderr, @@ -238,15 +246,16 @@ BlockDriver bdrv_raw = { .bdrv_open = &raw_open, .bdrv_close = &raw_close, .bdrv_create = &raw_create, - .bdrv_co_readv = &raw_co_readv, - .bdrv_co_writev_flags = &raw_co_writev_flags, + .bdrv_co_preadv = &raw_co_preadv, + .bdrv_co_pwritev = &raw_co_pwritev, .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, - .bdrv_co_discard = &raw_co_discard, + .bdrv_co_pdiscard = &raw_co_pdiscard, .bdrv_co_get_block_status = &raw_co_get_block_status, .bdrv_truncate = &raw_truncate, .bdrv_getlength = &raw_getlength, .has_variable_length = true, .bdrv_get_info = &raw_get_info, + .bdrv_refresh_limits = &raw_refresh_limits, .bdrv_probe_blocksizes = &raw_probe_blocksizes, .bdrv_probe_geometry = &raw_probe_geometry, .bdrv_media_changed = &raw_media_changed, diff --git a/block/rbd.c b/block/rbd.c index 0a5840d08b..0106fea45f 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -649,9 +649,9 @@ static int rbd_aio_flush_wrapper(rbd_image_t image, } static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, - int64_t sector_num, + int64_t off, QEMUIOVector *qiov, - int nb_sectors, + int64_t size, BlockCompletionFunc *cb, void *opaque, RBDAIOCmd cmd) @@ -659,7 +659,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, RBDAIOCB *acb; RADOSCB *rcb = NULL; rbd_completion_t c; - int64_t off, size; char *buf; int r; @@ -668,6 +667,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque); acb->cmd = cmd; acb->qiov = qiov; + assert(!qiov || qiov->size == size); if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { acb->bounce = NULL; } else { @@ -687,9 +687,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, buf = acb->bounce; - off = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; - rcb = g_new(RADOSCB, 1); rcb->acb = acb; rcb->buf = buf; @@ -739,7 +736,8 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { - return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque, + return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov, + nb_sectors << BDRV_SECTOR_BITS, cb, opaque, RBD_AIO_READ); } @@ -750,7 +748,8 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { - return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque, + return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov, + nb_sectors << BDRV_SECTOR_BITS, cb, opaque, RBD_AIO_WRITE); } @@ -931,13 +930,13 @@ static int qemu_rbd_snap_list(BlockDriverState *bs, } #ifdef LIBRBD_SUPPORTS_DISCARD -static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) +static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs, + int64_t offset, + int count, + BlockCompletionFunc *cb, + void *opaque) { - return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque, + return rbd_start_aio(bs, offset, NULL, count, cb, opaque, RBD_AIO_DISCARD); } #endif @@ -1001,7 +1000,7 @@ static BlockDriver bdrv_rbd = { #endif #ifdef LIBRBD_SUPPORTS_DISCARD - .bdrv_aio_discard = qemu_rbd_aio_discard, + .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard, #endif .bdrv_snapshot_create = qemu_rbd_snap_create, diff --git a/block/sheepdog.c b/block/sheepdog.c index e739c56f08..66e1cb2b2d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2800,8 +2800,8 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, } -static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, + int count) { SheepdogAIOCB *acb; BDRVSheepdogState *s = bs->opaque; @@ -2811,7 +2811,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, uint32_t zero = 0; if (!s->discard_supported) { - return 0; + return 0; } memset(&discard_iov, 0, sizeof(discard_iov)); @@ -2820,7 +2820,10 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, iov.iov_len = sizeof(zero); discard_iov.iov = &iov; discard_iov.niov = 1; - acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors); + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((count & (BDRV_SECTOR_SIZE - 1)) == 0); + acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS, + count >> BDRV_SECTOR_BITS); acb->aiocb_type = AIOCB_DISCARD_OBJ; acb->aio_done_func = sd_finish_aiocb; @@ -2954,7 +2957,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, - .bdrv_co_discard = sd_co_discard, + .bdrv_co_pdiscard = sd_co_pdiscard, .bdrv_co_get_block_status = sd_co_get_block_status, .bdrv_snapshot_create = sd_snapshot_create, @@ -2990,7 +2993,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, - .bdrv_co_discard = sd_co_discard, + .bdrv_co_pdiscard = sd_co_pdiscard, .bdrv_co_get_block_status = sd_co_get_block_status, .bdrv_snapshot_create = sd_snapshot_create, @@ -3026,7 +3029,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_co_readv = sd_co_readv, .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, - .bdrv_co_discard = sd_co_discard, + .bdrv_co_pdiscard = sd_co_pdiscard, .bdrv_co_get_block_status = sd_co_get_block_status, .bdrv_snapshot_create = sd_snapshot_create, diff --git a/block/trace-events b/block/trace-events index 354967eacb..978ef4f02a 100644 --- a/block/trace-events +++ b/block/trace-events @@ -9,7 +9,7 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x" # block/io.c -bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" +bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p" bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p" bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" @@ -58,7 +58,7 @@ qmp_block_stream(void *bs, void *job) "bs %p job %p" # block/raw-win32.c # block/raw-posix.c paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d" -paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" +paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" # block/qcow2.c qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d" |