32 files changed, 1840 insertions, 381 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index fd88c03ece..858d2b387b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -17,6 +17,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 endif
 
@@ -35,5 +36,6 @@ gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
+archipelago.o-libs := $(ARCHIPELAGO_LIBS)
 qcow.o-libs        := -lz
 linux-aio.o-libs   := -laio
diff --git a/block/archipelago.c b/block/archipelago.c
new file mode 100644
index 0000000000..6629d03a1d
--- /dev/null
+++ b/block/archipelago.c
@@ -0,0 +1,1069 @@
+/*
+ * QEMU Block driver for Archipelago
+ *
+ * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * VM Image on Archipelago volume is specified like this:
+ *
+ * file.driver=archipelago,file.volume=<volumename>
+ * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>]
+ * [,file.segment=<segment_name>]]
+ *
+ * or
+ *
+ * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][:
+ * segment=<segment_name>]]
+ *
+ * 'archipelago' is the protocol.
+ *
+ * 'mport' is the port number on which mapperd is listening. This is optional
+ * and if not specified, QEMU will make Archipelago to use the default port.
+ *
+ * 'vport' is the port number on which vlmcd is listening. This is optional
+ * and if not specified, QEMU will make Archipelago to use the default port.
+ *
+ * 'segment' is the name of the shared memory segment Archipelago stack
+ * is using. This is optional and if not specified, QEMU will make Archipelago
+ * to use the default value, 'archipelago'.
+ *
+ * Examples:
+ *
+ * file.driver=archipelago,file.volume=my_vm_volume
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
+ *  file.vport=1234
+ * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
+ *  file.vport=1234,file.segment=my_segment
+ *
+ * or
+ *
+ * file=archipelago:my_vm_volume
+ * file=archipelago:my_vm_volume/mport=123
+ * file=archipelago:my_vm_volume/mport=123:vport=1234
+ * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/error-report.h"
+#include "qemu/thread.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"
+
+#include <inttypes.h>
+#include <xseg/xseg.h>
+#include <xseg/protocol.h>
+
+#define ARCHIP_FD_READ      0
+#define ARCHIP_FD_WRITE     1
+#define MAX_REQUEST_SIZE    524288
+
+#define ARCHIPELAGO_OPT_VOLUME      "volume"
+#define ARCHIPELAGO_OPT_SEGMENT     "segment"
+#define ARCHIPELAGO_OPT_MPORT       "mport"
+#define ARCHIPELAGO_OPT_VPORT       "vport"
+#define ARCHIPELAGO_DFL_MPORT       1001
+#define ARCHIPELAGO_DFL_VPORT       501
+
+#define archipelagolog(fmt, ...) \
+    do {                         \
+        fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \
+    } while (0)
+
+typedef enum {
+    ARCHIP_OP_READ,
+    ARCHIP_OP_WRITE,
+    ARCHIP_OP_FLUSH,
+    ARCHIP_OP_VOLINFO,
+} ARCHIPCmd;
+
+typedef struct ArchipelagoAIOCB {
+    BlockDriverAIOCB common;
+    QEMUBH *bh;
+    struct BDRVArchipelagoState *s;
+    QEMUIOVector *qiov;
+    ARCHIPCmd cmd;
+    bool cancelled;
+    int status;
+    int64_t size;
+    int64_t ret;
+} ArchipelagoAIOCB;
+
+typedef struct BDRVArchipelagoState {
+    ArchipelagoAIOCB *event_acb;
+    char *volname;
+    char *segment_name;
+    uint64_t size;
+    /* Archipelago specific */
+    struct xseg *xseg;
+    struct xseg_port *port;
+    xport srcport;
+    xport sport;
+    xport mportno;
+    xport vportno;
+    QemuMutex archip_mutex;
+    QemuCond archip_cond;
+    bool is_signaled;
+    /* Request handler specific */
+    QemuThread request_th;
+    QemuCond request_cond;
+    QemuMutex request_mutex;
+    bool th_is_signaled;
+    bool stopping;
+} BDRVArchipelagoState;
+
+typedef struct ArchipelagoSegmentedRequest {
+    size_t count;
+    size_t total;
+    int ref;
+    int failed;
+} ArchipelagoSegmentedRequest;
+
+typedef struct AIORequestData {
+    const char *volname;
+    off_t offset;
+    size_t size;
+    uint64_t bufidx;
+    int ret;
+    int op;
+    ArchipelagoAIOCB *aio_cb;
+    ArchipelagoSegmentedRequest *segreq;
+} AIORequestData;
+
+static void qemu_archipelago_complete_aio(void *opaque);
+
+static void init_local_signal(struct xseg *xseg, xport sport, xport srcport)
+{
+    if (xseg && (sport != srcport)) {
+        xseg_init_local_signal(xseg, srcport);
+        sport = srcport;
+    }
+}
+
+static void archipelago_finish_aiocb(AIORequestData *reqdata)
+{
+    if (reqdata->aio_cb->ret != reqdata->segreq->total) {
+        reqdata->aio_cb->ret = -EIO;
+    } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
+        reqdata->aio_cb->ret = 0;
+    }
+    reqdata->aio_cb->bh = aio_bh_new(
+                        bdrv_get_aio_context(reqdata->aio_cb->common.bs),
+                        qemu_archipelago_complete_aio, reqdata
+                        );
+    qemu_bh_schedule(reqdata->aio_cb->bh);
+}
+
+static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
+                      struct xseg_request *expected_req)
+{
+    struct xseg_request *req;
+    xseg_prepare_wait(xseg, srcport);
+    void *psd = xseg_get_signal_desc(xseg, port);
+    while (1) {
+        req = xseg_receive(xseg, srcport, X_NONBLOCK);
+        if (req) {
+            if (req != expected_req) {
+                archipelagolog("Unknown received request\n");
+                xseg_put_request(xseg, req, srcport);
+            } else if (!(req->state & XS_SERVED)) {
+                return -1;
+            } else {
+                break;
+            }
+        }
+        xseg_wait_signal(xseg, psd, 100000UL);
+    }
+    xseg_cancel_wait(xseg, srcport);
+    return 0;
+}
+
+static void xseg_request_handler(void *state)
+{
+    BDRVArchipelagoState *s = (BDRVArchipelagoState *) state;
+    void *psd = xseg_get_signal_desc(s->xseg, s->port);
+    qemu_mutex_lock(&s->request_mutex);
+
+    while (!s->stopping) {
+        struct xseg_request *req;
+        void *data;
+        xseg_prepare_wait(s->xseg, s->srcport);
+        req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK);
+        if (req) {
+            AIORequestData *reqdata;
+            ArchipelagoSegmentedRequest *segreq;
+            xseg_get_req_data(s->xseg, req, (void **)&reqdata);
+
+            switch (reqdata->op) {
+            case ARCHIP_OP_READ:
+                data = xseg_get_data(s->xseg, req);
+                segreq = reqdata->segreq;
+                segreq->count += req->serviced;
+
+                qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx,
+                                    data,
+                                    req->serviced);
+
+                xseg_put_request(s->xseg, req, s->srcport);
+
+                if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+                    if (!segreq->failed) {
+                        reqdata->aio_cb->ret = segreq->count;
+                        archipelago_finish_aiocb(reqdata);
+                        g_free(segreq);
+                    } else {
+                        g_free(segreq);
+                        g_free(reqdata);
+                    }
+                } else {
+                    g_free(reqdata);
+                }
+                break;
+            case ARCHIP_OP_WRITE:
+            case ARCHIP_OP_FLUSH:
+                segreq = reqdata->segreq;
+                segreq->count += req->serviced;
+                xseg_put_request(s->xseg, req, s->srcport);
+
+                if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+                    if (!segreq->failed) {
+                        reqdata->aio_cb->ret = segreq->count;
+                        archipelago_finish_aiocb(reqdata);
+                        g_free(segreq);
+                    } else {
+                        g_free(segreq);
+                        g_free(reqdata);
+                    }
+                } else {
+                    g_free(reqdata);
+                }
+                break;
+            case ARCHIP_OP_VOLINFO:
+                s->is_signaled = true;
+                qemu_cond_signal(&s->archip_cond);
+                break;
+            }
+        } else {
+            xseg_wait_signal(s->xseg, psd, 100000UL);
+        }
+        xseg_cancel_wait(s->xseg, s->srcport);
+    }
+
+    s->th_is_signaled = true;
+    qemu_cond_signal(&s->request_cond);
+    qemu_mutex_unlock(&s->request_mutex);
+    qemu_thread_exit(NULL);
+}
+
+static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s)
+{
+    if (xseg_initialize()) {
+        archipelagolog("Cannot initialize XSEG\n");
+        goto err_exit;
+    }
+
+    s->xseg = xseg_join("posix", s->segment_name,
+                        "posixfd", NULL);
+    if (!s->xseg) {
+        archipelagolog("Cannot join XSEG shared memory segment\n");
+        goto err_exit;
+    }
+    s->port = xseg_bind_dynport(s->xseg);
+    s->srcport = s->port->portno;
+    init_local_signal(s->xseg, s->sport, s->srcport);
+    return 0;
+
+err_exit:
+    return -1;
+}
+
+static int qemu_archipelago_init(BDRVArchipelagoState *s)
+{
+    int ret;
+
+    ret = qemu_archipelago_xseg_init(s);
+    if (ret < 0) {
+        error_report("Cannot initialize XSEG. Aborting...\n");
+        goto err_exit;
+    }
+
+    qemu_cond_init(&s->archip_cond);
+    qemu_mutex_init(&s->archip_mutex);
+    qemu_cond_init(&s->request_cond);
+    qemu_mutex_init(&s->request_mutex);
+    s->th_is_signaled = false;
+    qemu_thread_create(&s->request_th, "xseg_io_th",
+                       (void *) xseg_request_handler,
+                       (void *) s, QEMU_THREAD_JOINABLE);
+
+err_exit:
+    return ret;
+}
+
+static void qemu_archipelago_complete_aio(void *opaque)
+{
+    AIORequestData *reqdata = (AIORequestData *) opaque;
+    ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
+
+    qemu_bh_delete(aio_cb->bh);
+    aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
+    aio_cb->status = 0;
+
+    if (!aio_cb->cancelled) {
+        qemu_aio_release(aio_cb);
+    }
+    g_free(reqdata);
+}
+
+static void xseg_find_port(char *pstr, const char *needle, xport *aport)
+{
+    const char *a;
+    char *endptr = NULL;
+    unsigned long port;
+    if (strstart(pstr, needle, &a)) {
+        if (strlen(a) > 0) {
+            port = strtoul(a, &endptr, 10);
+            if (strlen(endptr)) {
+                *aport = -2;
+                return;
+            }
+            *aport = (xport) port;
+        }
+    }
+}
+
+static void xseg_find_segment(char *pstr, const char *needle,
+                              char **segment_name)
+{
+    const char *a;
+    if (strstart(pstr, needle, &a)) {
+        if (strlen(a) > 0) {
+            *segment_name = g_strdup(a);
+        }
+    }
+}
+
+static void parse_filename_opts(const char *filename, Error **errp,
+                                char **volume, char **segment_name,
+                                xport *mport, xport *vport)
+{
+    const char *start;
+    char *tokens[4], *ds;
+    int idx;
+    xport lmport = NoPort, lvport = NoPort;
+
+    strstart(filename, "archipelago:", &start);
+
+    ds = g_strdup(start);
+    tokens[0] = strtok(ds, "/");
+    tokens[1] = strtok(NULL, ":");
+    tokens[2] = strtok(NULL, ":");
+    tokens[3] = strtok(NULL, "\0");
+
+    if (!strlen(tokens[0])) {
+        error_setg(errp, "volume name must be specified first");
+        g_free(ds);
+        return;
+    }
+
+    for (idx = 1; idx < 4; idx++) {
+        if (tokens[idx] != NULL) {
+            if (strstart(tokens[idx], "mport=", NULL)) {
+                xseg_find_port(tokens[idx], "mport=", &lmport);
+            }
+            if (strstart(tokens[idx], "vport=", NULL)) {
+                xseg_find_port(tokens[idx], "vport=", &lvport);
+            }
+            if (strstart(tokens[idx], "segment=", NULL)) {
+                xseg_find_segment(tokens[idx], "segment=", segment_name);
+            }
+        }
+    }
+
+    if ((lmport == -2) || (lvport == -2)) {
+        error_setg(errp, "mport and/or vport must be set");
+        g_free(ds);
+        return;
+    }
+    *volume = g_strdup(tokens[0]);
+    *mport = lmport;
+    *vport = lvport;
+    g_free(ds);
+}
+
+static void archipelago_parse_filename(const char *filename, QDict *options,
+                                       Error **errp)
+{
+    const char *start;
+    char *volume = NULL, *segment_name = NULL;
+    xport mport = NoPort, vport = NoPort;
+
+    if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME)
+            || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT)
+            || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT)
+            || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) {
+        error_setg(errp, "volume/mport/vport/segment and a file name may not"
+                         " be specified at the same time");
+        return;
+    }
+
+    if (!strstart(filename, "archipelago:", &start)) {
+        error_setg(errp, "File name must start with 'archipelago:'");
+        return;
+    }
+
+    if (!strlen(start) || strstart(start, "/", NULL)) {
+        error_setg(errp, "volume name must be specified");
+        return;
+    }
+
+    parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport);
+
+    if (volume) {
+        qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume));
+        g_free(volume);
+    }
+    if (segment_name) {
+        qdict_put(options, ARCHIPELAGO_OPT_SEGMENT,
+                  qstring_from_str(segment_name));
+        g_free(segment_name);
+    }
+    if (mport != NoPort) {
+        qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport));
+    }
+    if (vport != NoPort) {
+        qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport));
+    }
+}
+
+static QemuOptsList archipelago_runtime_opts = {
+    .name = "archipelago",
+    .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head),
+    .desc = {
+        {
+            .name = ARCHIPELAGO_OPT_VOLUME,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of the volume image",
+        },
+        {
+            .name = ARCHIPELAGO_OPT_SEGMENT,
+            .type = QEMU_OPT_STRING,
+            .help = "Name of the Archipelago shared memory segment",
+        },
+        {
+            .name = ARCHIPELAGO_OPT_MPORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Archipelago mapperd port number"
+        },
+        {
+            .name = ARCHIPELAGO_OPT_VPORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Archipelago vlmcd port number"
+
+        },
+        { /* end of list */ }
+    },
+};
+
+static int qemu_archipelago_open(BlockDriverState *bs,
+                                 QDict *options,
+                                 int bdrv_flags,
+                                 Error **errp)
+{
+    int ret = 0;
+    const char *volume, *segment_name;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+    BDRVArchipelagoState *s = bs->opaque;
+
+    opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto err_exit;
+    }
+
+    s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT,
+                                     ARCHIPELAGO_DFL_MPORT);
+    s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT,
+                                     ARCHIPELAGO_DFL_VPORT);
+
+    segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT);
+    if (segment_name == NULL) {
+        s->segment_name = g_strdup("archipelago");
+    } else {
+        s->segment_name = g_strdup(segment_name);
+    }
+
+    volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME);
+    if (volume == NULL) {
+        error_setg(errp, "archipelago block driver requires the 'volume'"
+                   " option");
+        ret = -EINVAL;
+        goto err_exit;
+    }
+    s->volname = g_strdup(volume);
+
+    /* Initialize XSEG, join shared memory segment */
+    ret = qemu_archipelago_init(s);
+    if (ret < 0) {
+        error_setg(errp, "cannot initialize XSEG and join shared "
+                   "memory segment");
+        goto err_exit;
+    }
+
+    qemu_opts_del(opts);
+    return 0;
+
+err_exit:
+    g_free(s->volname);
+    g_free(s->segment_name);
+    qemu_opts_del(opts);
+    return ret;
+}
+
+static void qemu_archipelago_close(BlockDriverState *bs)
+{
+    int r, targetlen;
+    char *target;
+    struct xseg_request *req;
+    BDRVArchipelagoState *s = bs->opaque;
+
+    s->stopping = true;
+
+    qemu_mutex_lock(&s->request_mutex);
+    while (!s->th_is_signaled) {
+        qemu_cond_wait(&s->request_cond,
+                       &s->request_mutex);
+    }
+    qemu_mutex_unlock(&s->request_mutex);
+    qemu_thread_join(&s->request_th);
+    qemu_cond_destroy(&s->request_cond);
+    qemu_mutex_destroy(&s->request_mutex);
+
+    qemu_cond_destroy(&s->archip_cond);
+    qemu_mutex_destroy(&s->archip_mutex);
+
+    targetlen = strlen(s->volname);
+    req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
+    if (!req) {
+        archipelagolog("Cannot get XSEG request\n");
+        goto err_exit;
+    }
+    r = xseg_prep_request(s->xseg, req, targetlen, 0);
+    if (r < 0) {
+        xseg_put_request(s->xseg, req, s->srcport);
+        archipelagolog("Cannot prepare XSEG close request\n");
+        goto err_exit;
+    }
+
+    target = xseg_get_target(s->xseg, req);
+    memcpy(target, s->volname, targetlen);
+    req->size = req->datalen;
+    req->offset = 0;
+    req->op = X_CLOSE;
+
+    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+    if (p == NoPort) {
+        xseg_put_request(s->xseg, req, s->srcport);
+        archipelagolog("Cannot submit XSEG close request\n");
+        goto err_exit;
+    }
+
+    xseg_signal(s->xseg, p);
+    wait_reply(s->xseg, s->srcport, s->port, req);
+
+    xseg_put_request(s->xseg, req, s->srcport);
+
+err_exit:
+    g_free(s->volname);
+    g_free(s->segment_name);
+    xseg_quit_local_signal(s->xseg, s->srcport);
+    xseg_leave_dynport(s->xseg, s->port);
+    xseg_leave(s->xseg);
+}
+
+static int qemu_archipelago_create_volume(Error **errp, const char *volname,
+                                          char *segment_name,
+                                          uint64_t size, xport mportno,
+                                          xport vportno)
+{
+    int ret, targetlen;
+    struct xseg *xseg = NULL;
+    struct xseg_request *req;
+    struct xseg_request_clone *xclone;
+    struct xseg_port *port;
+    xport srcport = NoPort, sport = NoPort;
+    char *target;
+
+    /* Try default values if none has been set */
+    if (mportno == (xport) -1) {
+        mportno = ARCHIPELAGO_DFL_MPORT;
+    }
+
+    if (vportno == (xport) -1) {
+        vportno = ARCHIPELAGO_DFL_VPORT;
+    }
+
+    if (xseg_initialize()) {
+        error_setg(errp, "Cannot initialize XSEG");
+        return -1;
+    }
+
+    xseg = xseg_join("posix", segment_name,
+                     "posixfd", NULL);
+
+    if (!xseg) {
+        error_setg(errp, "Cannot join XSEG shared memory segment");
+        return -1;
+    }
+
+    port = xseg_bind_dynport(xseg);
+    srcport = port->portno;
+    init_local_signal(xseg, sport, srcport);
+
+    req = xseg_get_request(xseg, srcport, mportno, X_ALLOC);
+    if (!req) {
+        error_setg(errp, "Cannot get XSEG request");
+        return -1;
+    }
+
+    targetlen = strlen(volname);
+    ret = xseg_prep_request(xseg, req, targetlen,
+                            sizeof(struct xseg_request_clone));
+    if (ret < 0) {
+        error_setg(errp, "Cannot prepare XSEG request");
+        goto err_exit;
+    }
+
+    target = xseg_get_target(xseg, req);
+    if (!target) {
+        error_setg(errp, "Cannot get XSEG target.\n");
+        goto err_exit;
+    }
+    memcpy(target, volname, targetlen);
+    xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req);
+    memset(xclone->target, 0 , XSEG_MAX_TARGETLEN);
+    xclone->targetlen = 0;
+    xclone->size = size;
+    req->offset = 0;
+    req->size = req->datalen;
+    req->op = X_CLONE;
+
+    xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
+    if (p == NoPort) {
+        error_setg(errp, "Could not submit XSEG request");
+        goto err_exit;
+    }
+    xseg_signal(xseg, p);
+
+    ret = wait_reply(xseg, srcport, port, req);
+    if (ret < 0) {
+        error_setg(errp, "wait_reply() error.");
+    }
+
+    xseg_put_request(xseg, req, srcport);
+    xseg_quit_local_signal(xseg, srcport);
+    xseg_leave_dynport(xseg, port);
+    xseg_leave(xseg);
+    return ret;
+
+err_exit:
+    xseg_put_request(xseg, req, srcport);
+    xseg_quit_local_signal(xseg, srcport);
+    xseg_leave_dynport(xseg, port);
+    xseg_leave(xseg);
+    return -1;
+}
+
+static int qemu_archipelago_create(const char *filename,
+                                   QemuOpts *options,
+                                   Error **errp)
+{
+    int ret = 0;
+    uint64_t total_size = 0;
+    char *volname = NULL, *segment_name = NULL;
+    const char *start;
+    xport mport = NoPort, vport = NoPort;
+
+    if (!strstart(filename, "archipelago:", &start)) {
+        error_setg(errp, "File name must start with 'archipelago:'");
+        return -1;
+    }
+
+    if (!strlen(start) || strstart(start, "/", NULL)) {
+        error_setg(errp, "volume name must be specified");
+        return -1;
+    }
+
+    parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
+                        &vport);
+    total_size = qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0);
+
+    if (segment_name == NULL) {
+        segment_name = g_strdup("archipelago");
+    }
+
+    /* Create an Archipelago volume */
+    ret = qemu_archipelago_create_volume(errp, volname, segment_name,
+                                         total_size, mport,
+                                         vport);
+
+    g_free(volname);
+    g_free(segment_name);
+    return ret;
+}
+
+static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb;
+    aio_cb->cancelled = true;
+    while (aio_cb->status == -EINPROGRESS) {
+        aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true);
+    }
+    qemu_aio_release(aio_cb);
+}
+
+static const AIOCBInfo archipelago_aiocb_info = {
+    .aiocb_size = sizeof(ArchipelagoAIOCB),
+    .cancel = qemu_archipelago_aio_cancel,
+};
+
+static int archipelago_submit_request(BDRVArchipelagoState *s,
+                                        uint64_t bufidx,
+                                        size_t count,
+                                        off_t offset,
+                                        ArchipelagoAIOCB *aio_cb,
+                                        ArchipelagoSegmentedRequest *segreq,
+                                        int op)
+{
+    int ret, targetlen;
+    char *target;
+    void *data = NULL;
+    struct xseg_request *req;
+    AIORequestData *reqdata = g_malloc(sizeof(AIORequestData));
+
+    targetlen = strlen(s->volname);
+    req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
+    if (!req) {
+        archipelagolog("Cannot get XSEG request\n");
+        goto err_exit2;
+    }
+    ret = xseg_prep_request(s->xseg, req, targetlen, count);
+    if (ret < 0) {
+        archipelagolog("Cannot prepare XSEG request\n");
+        goto err_exit;
+    }
+    target = xseg_get_target(s->xseg, req);
+    if (!target) {
+        archipelagolog("Cannot get XSEG target\n");
+        goto err_exit;
+    }
+    memcpy(target, s->volname, targetlen);
+    req->size = count;
+    req->offset = offset;
+
+    switch (op) {
+    case ARCHIP_OP_READ:
+        req->op = X_READ;
+        break;
+    case ARCHIP_OP_WRITE:
+        req->op = X_WRITE;
+        break;
+    case ARCHIP_OP_FLUSH:
+        req->op = X_FLUSH;
+        break;
+    }
+    reqdata->volname = s->volname;
+    reqdata->offset = offset;
+    reqdata->size = count;
+    reqdata->bufidx = bufidx;
+    reqdata->aio_cb = aio_cb;
+    reqdata->segreq = segreq;
+    reqdata->op = op;
+
+    xseg_set_req_data(s->xseg, req, reqdata);
+    if (op == ARCHIP_OP_WRITE) {
+        data = xseg_get_data(s->xseg, req);
+        if (!data) {
+            archipelagolog("Cannot get XSEG data\n");
+            goto err_exit;
+        }
+        qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count);
+    }
+
+    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+    if (p == NoPort) {
+        archipelagolog("Could not submit XSEG request\n");
+        goto err_exit;
+    }
+    xseg_signal(s->xseg, p);
+    return 0;
+
+err_exit:
+    g_free(reqdata);
+    xseg_put_request(s->xseg, req, s->srcport);
+    return -EIO;
+err_exit2:
+    g_free(reqdata);
+    return -EIO;
+}
+
+static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s,
+                                        size_t count,
+                                        off_t offset,
+                                        ArchipelagoAIOCB *aio_cb,
+                                        int op)
+{
+    int i, ret, segments_nr, last_segment_size;
+    ArchipelagoSegmentedRequest *segreq;
+
+    segreq = g_malloc(sizeof(ArchipelagoSegmentedRequest));
+
+    if (op == ARCHIP_OP_FLUSH) {
+        segments_nr = 1;
+        segreq->ref = segments_nr;
+        segreq->total = count;
+        segreq->count = 0;
+        segreq->failed = 0;
+        ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
+                                           segreq, ARCHIP_OP_FLUSH);
+        if (ret < 0) {
+            goto err_exit;
+        }
+        return 0;
+    }
+
+    segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
+                  ((count % MAX_REQUEST_SIZE) ? 1 : 0);
+    last_segment_size = (int)(count % MAX_REQUEST_SIZE);
+
+    segreq->ref = segments_nr;
+    segreq->total = count;
+    segreq->count = 0;
+    segreq->failed = 0;
+
+    for (i = 0; i < segments_nr - 1; i++) {
+        ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+                                           MAX_REQUEST_SIZE,
+                                           offset + i * MAX_REQUEST_SIZE,
+                                           aio_cb, segreq, op);
+
+        if (ret < 0) {
+            goto err_exit;
+        }
+    }
+
+    if ((segments_nr > 1) && last_segment_size) {
+        ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+                                           last_segment_size,
+                                           offset + i * MAX_REQUEST_SIZE,
+                                           aio_cb, segreq, op);
+    } else if ((segments_nr > 1) && !last_segment_size) {
+        ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
+                                           MAX_REQUEST_SIZE,
+                                           offset + i * MAX_REQUEST_SIZE,
+                                           aio_cb, segreq, op);
+    } else if (segments_nr == 1) {
+            ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
+                                               segreq, op);
+    }
+
+    if (ret < 0) {
+        goto err_exit;
+    }
+
+    return 0;
+
+err_exit:
+    __sync_add_and_fetch(&segreq->failed, 1);
+    if (segments_nr == 1) {
+        if (__sync_add_and_fetch(&segreq->ref, -1) == 0) {
+            g_free(segreq);
+        }
+    } else {
+        if ((__sync_add_and_fetch(&segreq->ref, -segments_nr + i)) == 0) {
+            g_free(segreq);
+        }
+    }
+
+    return ret;
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
+                                                 int64_t sector_num,
+                                                 QEMUIOVector *qiov,
+                                                 int nb_sectors,
+                                                 BlockDriverCompletionFunc *cb,
+                                                 void *opaque,
+                                                 int op)
+{
+    ArchipelagoAIOCB *aio_cb;
+    BDRVArchipelagoState *s = bs->opaque;
+    int64_t size, off;
+    int ret;
+
+    aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque);
+    aio_cb->cmd = op;
+    aio_cb->qiov = qiov;
+
+    aio_cb->ret = 0;
+    aio_cb->s = s;
+    aio_cb->cancelled = false;
+    aio_cb->status = -EINPROGRESS;
+
+    off = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    aio_cb->size = size;
+
+    ret = archipelago_aio_segmented_rw(s, size, off,
+                                       aio_cb, op);
+    if (ret < 0) {
+        goto err_exit;
+    }
+    return &aio_cb->common;
+
+err_exit:
+    error_report("qemu_archipelago_aio_rw(): I/O Error\n");
+    qemu_aio_release(aio_cb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                                   opaque, ARCHIP_OP_READ);
+}
+
+static BlockDriverAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                                   opaque, ARCHIP_OP_WRITE);
+}
+
+static int64_t archipelago_volume_info(BDRVArchipelagoState *s)
+{
+    uint64_t size;
+    int ret, targetlen;
+    struct xseg_request *req;
+    struct xseg_reply_info *xinfo;
+    AIORequestData *reqdata = g_malloc(sizeof(AIORequestData));
+
+    const char *volname = s->volname;
+    targetlen = strlen(volname);
+    req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
+    if (!req) {
+        archipelagolog("Cannot get XSEG request\n");
+        goto err_exit2;
+    }
+    ret = xseg_prep_request(s->xseg, req, targetlen,
+                            sizeof(struct xseg_reply_info));
+    if (ret < 0) {
+        archipelagolog("Cannot prepare XSEG request\n");
+        goto err_exit;
+    }
+    char *target = xseg_get_target(s->xseg, req);
+    if (!target) {
+        archipelagolog("Cannot get XSEG target\n");
+        goto err_exit;
+    }
+    memcpy(target, volname, targetlen);
+    req->size = req->datalen;
+    req->offset = 0;
+    req->op = X_INFO;
+
+    reqdata->op = ARCHIP_OP_VOLINFO;
+    reqdata->volname = volname;
+    xseg_set_req_data(s->xseg, req, reqdata);
+
+    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
+    if (p == NoPort) {
+        archipelagolog("Cannot submit XSEG request\n");
+        goto err_exit;
+    }
+    xseg_signal(s->xseg, p);
+    qemu_mutex_lock(&s->archip_mutex);
+    while (!s->is_signaled) {
+        qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
+    }
+    s->is_signaled = false;
+    qemu_mutex_unlock(&s->archip_mutex);
+
+    xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req);
+    size = xinfo->size;
+    xseg_put_request(s->xseg, req, s->srcport);
+    g_free(reqdata);
+    s->size = size;
+    return size;
+
+err_exit:
+    xseg_put_request(s->xseg, req, s->srcport);
+err_exit2:
+    g_free(reqdata);
+    return -EIO;
+}
+
+static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
+{
+    int64_t ret;
+    BDRVArchipelagoState *s = bs->opaque;
+
+    ret = archipelago_volume_info(s);
+    return ret;
+}
+
+static QemuOptsList qemu_archipelago_create_opts = {
+    .name = "archipelago-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        { /* end of list */ }
+    }
+};
+
+static BlockDriverAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque,
+                                   ARCHIP_OP_FLUSH);
+}
+
+static BlockDriver bdrv_archipelago = {
+    .format_name         = "archipelago",
+    .protocol_name       = "archipelago",
+    .instance_size       = sizeof(BDRVArchipelagoState),
+    .bdrv_parse_filename = archipelago_parse_filename,
+    .bdrv_file_open      = qemu_archipelago_open,
+    .bdrv_close          = qemu_archipelago_close,
+    .bdrv_create         = qemu_archipelago_create,
+    .bdrv_getlength      = qemu_archipelago_getlength,
+    .bdrv_aio_readv      = qemu_archipelago_aio_readv,
+    .bdrv_aio_writev     = qemu_archipelago_aio_writev,
+    .bdrv_aio_flush      = qemu_archipelago_aio_flush,
+    .bdrv_has_zero_init  = bdrv_has_zero_init_1,
+    .create_opts         = &qemu_archipelago_create_opts,
+};
+
+static void bdrv_archipelago_init(void)
+{
+    bdrv_register(&bdrv_archipelago);
+}
+
+block_init(bdrv_archipelago_init);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index f51407de3f..1586ed9664 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -522,6 +522,25 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
     return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }
 
+static BlockDriverAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
+    BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugRule *rule = NULL;
+
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        if (rule->options.inject.sector == -1) {
+            break;
+        }
+    }
+
+    if (rule && rule->options.inject.error) {
+        return inject_error(bs, cb, opaque, rule);
+    }
+
+    return bdrv_aio_flush(bs->file, cb, opaque);
+}
+
 
 static void blkdebug_close(BlockDriverState *bs)
 {
@@ -699,6 +718,7 @@ static BlockDriver bdrv_blkdebug = {
 
     .bdrv_aio_readv         = blkdebug_aio_readv,
     .bdrv_aio_writev        = blkdebug_aio_writev,
+    .bdrv_aio_flush         = blkdebug_aio_flush,
 
     .bdrv_debug_event           = blkdebug_debug_event,
     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
diff --git a/block/bochs.c b/block/bochs.c
index eba23df335..6674b27438 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -131,7 +131,11 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
         return -EFBIG;
     }
 
-    s->catalog_bitmap = g_malloc(s->catalog_size * 4);
+    s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
+    if (s->catalog_size && s->catalog_bitmap == NULL) {
+        error_setg(errp, "Could not allocate memory for catalog");
+        return -ENOMEM;
+    }
 
     ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
                      s->catalog_size * 4);
diff --git a/block/cloop.c b/block/cloop.c
index 8457737922..f328be06f8 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -116,7 +116,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
                    "try increasing block size");
         return -EINVAL;
     }
-    s->offsets = g_malloc(offsets_size);
+
+    s->offsets = g_try_malloc(offsets_size);
+    if (s->offsets == NULL) {
+        error_setg(errp, "Could not allocate offsets table");
+        return -ENOMEM;
+    }
 
     ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
     if (ret < 0) {
@@ -158,8 +163,20 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* initialize zlib engine */
-    s->compressed_block = g_malloc(max_compressed_block_size + 1);
-    s->uncompressed_block = g_malloc(s->block_size);
+    s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
+    if (s->compressed_block == NULL) {
+        error_setg(errp, "Could not allocate compressed_block");
+        ret = -ENOMEM;
+        goto fail;
+    }
+
+    s->uncompressed_block = g_try_malloc(s->block_size);
+    if (s->uncompressed_block == NULL) {
+        error_setg(errp, "Could not allocate uncompressed_block");
+        ret = -ENOMEM;
+        goto fail;
+    }
+
     if (inflateInit(&s->zstream) != Z_OK) {
         ret = -EINVAL;
         goto fail;
diff --git a/block/curl.c b/block/curl.c
index 79ff2f1e41..d4b85d20a5 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -640,7 +640,13 @@ static void curl_readv_bh_cb(void *p)
     state->buf_start = start;
     state->buf_len = acb->end + s->readahead_size;
     end = MIN(start + state->buf_len, s->len) - 1;
-    state->orig_buf = g_malloc(state->buf_len);
+    state->orig_buf = g_try_malloc(state->buf_len);
+    if (state->buf_len && state->orig_buf == NULL) {
+        curl_clean_state(state);
+        acb->common.cb(acb->common.opaque, -ENOMEM);
+        qemu_aio_release(acb);
+        return;
+    }
     state->acb[0] = acb;
 
     snprintf(state->range, 127, "%zd-%zd", start, end);
diff --git a/block/dmg.c b/block/dmg.c
index 1e153cd76d..e455886d2b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -284,8 +284,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* initialize zlib engine */
-    s->compressed_chunk = g_malloc(max_compressed_size + 1);
-    s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
+    s->compressed_chunk = qemu_try_blockalign(bs->file,
+                                              max_compressed_size + 1);
+    s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+                                                512 * max_sectors_per_chunk);
+    if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
     if (inflateInit(&s->zstream) != Z_OK) {
         ret = -EINVAL;
         goto fail;
@@ -302,8 +309,8 @@ fail:
     g_free(s->lengths);
     g_free(s->sectors);
     g_free(s->sectorcounts);
-    g_free(s->compressed_chunk);
-    g_free(s->uncompressed_chunk);
+    qemu_vfree(s->compressed_chunk);
+    qemu_vfree(s->uncompressed_chunk);
     return ret;
 }
 
@@ -426,8 +433,8 @@ static void dmg_close(BlockDriverState *bs)
     g_free(s->lengths);
     g_free(s->sectors);
     g_free(s->sectorcounts);
-    g_free(s->compressed_chunk);
-    g_free(s->uncompressed_chunk);
+    qemu_vfree(s->compressed_chunk);
+    qemu_vfree(s->uncompressed_chunk);
 
     inflateEnd(&s->zstream);
 }
diff --git a/block/iscsi.c b/block/iscsi.c
index f3e83e2332..2c9cfc18eb 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -893,7 +893,10 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
     nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
 
     if (iscsilun->zeroblock == NULL) {
-        iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
+        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
+        if (iscsilun->zeroblock == NULL) {
+            return -ENOMEM;
+        }
     }
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
@@ -1450,7 +1453,7 @@ static void iscsi_close(BlockDriverState *bs)
     memset(iscsilun, 0, sizeof(IscsiLun));
 }
 
-static int iscsi_refresh_limits(BlockDriverState *bs)
+static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     IscsiLun *iscsilun = bs->opaque;
 
@@ -1475,7 +1478,6 @@ static int iscsi_refresh_limits(BlockDriverState *bs)
     }
     bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
                                                  iscsilun);
-    return 0;
 }
 
 /* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
diff --git a/block/mirror.c b/block/mirror.c
index c7a655fc58..5e7a166b39 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -367,7 +367,12 @@ static void coroutine_fn mirror_run(void *opaque)
     }
 
     end = s->common.len >> BDRV_SECTOR_BITS;
-    s->buf = qemu_blockalign(bs, s->buf_size);
+    s->buf = qemu_try_blockalign(bs, s->buf_size);
+    if (s->buf == NULL) {
+        ret = -ENOMEM;
+        goto immediate_exit;
+    }
+
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
     mirror_free_init(s);
 
diff --git a/block/nfs.c b/block/nfs.c
index 8439e0d389..fe46c33709 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -172,7 +172,11 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
 
     nfs_co_init_task(client, &task);
 
-    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    if (nb_sectors && buf == NULL) {
+        return -ENOMEM;
+    }
+
     qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
 
     if (nfs_pwrite_async(client->context, client->fh,
diff --git a/block/parallels.c b/block/parallels.c
index 1a5bd350b3..1774ab8e8e 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -30,6 +30,7 @@
 /**************************************************************/
 
 #define HEADER_MAGIC "WithoutFreeSpace"
+#define HEADER_MAGIC2 "WithouFreSpacExt"
 #define HEADER_VERSION 2
 #define HEADER_SIZE 64
 
@@ -41,8 +42,10 @@ struct parallels_header {
     uint32_t cylinders;
     uint32_t tracks;
     uint32_t catalog_entries;
-    uint32_t nb_sectors;
-    char padding[24];
+    uint64_t nb_sectors;
+    uint32_t inuse;
+    uint32_t data_off;
+    char padding[12];
 } QEMU_PACKED;
 
 typedef struct BDRVParallelsState {
@@ -52,6 +55,8 @@ typedef struct BDRVParallelsState {
     unsigned int catalog_size;
 
     unsigned int tracks;
+
+    unsigned int off_multiplier;
 } BDRVParallelsState;
 
 static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -59,11 +64,12 @@ static int parallels_probe(const uint8_t *buf, int buf_size, const char *filenam
     const struct parallels_header *ph = (const void *)buf;
 
     if (buf_size < HEADER_SIZE)
-	return 0;
+        return 0;
 
-    if (!memcmp(ph->magic, HEADER_MAGIC, 16) &&
-	(le32_to_cpu(ph->version) == HEADER_VERSION))
-	return 100;
+    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
+        !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
+        (le32_to_cpu(ph->version) == HEADER_VERSION))
+        return 100;
 
     return 0;
 }
@@ -83,14 +89,19 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
-        (le32_to_cpu(ph.version) != HEADER_VERSION)) {
-        error_setg(errp, "Image not in Parallels format");
-        ret = -EINVAL;
-        goto fail;
-    }
+    bs->total_sectors = le64_to_cpu(ph.nb_sectors);
 
-    bs->total_sectors = le32_to_cpu(ph.nb_sectors);
+    if (le32_to_cpu(ph.version) != HEADER_VERSION) {
+        goto fail_format;
+    }
+    if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
+        s->off_multiplier = 1;
+        bs->total_sectors = 0xffffffff & bs->total_sectors;
+    } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
+        s->off_multiplier = le32_to_cpu(ph.tracks);
+    } else {
+        goto fail_format;
+    }
 
     s->tracks = le32_to_cpu(ph.tracks);
     if (s->tracks == 0) {
@@ -98,6 +109,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EINVAL;
         goto fail;
     }
+    if (s->tracks > INT32_MAX/513) {
+        error_setg(errp, "Invalid image: Too big cluster");
+        ret = -EFBIG;
+        goto fail;
+    }
 
     s->catalog_size = le32_to_cpu(ph.catalog_entries);
     if (s->catalog_size > INT_MAX / 4) {
@@ -105,7 +121,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EFBIG;
         goto fail;
     }
-    s->catalog_bitmap = g_malloc(s->catalog_size * 4);
+    s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
+    if (s->catalog_size && s->catalog_bitmap == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
     if (ret < 0) {
@@ -113,11 +133,14 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     for (i = 0; i < s->catalog_size; i++)
-	le32_to_cpus(&s->catalog_bitmap[i]);
+        le32_to_cpus(&s->catalog_bitmap[i]);
 
     qemu_co_mutex_init(&s->lock);
     return 0;
 
+fail_format:
+    error_setg(errp, "Image not in Parallels format");
+    ret = -EINVAL;
 fail:
     g_free(s->catalog_bitmap);
     return ret;
@@ -133,8 +156,9 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 
     /* not allocated */
     if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
-	return -1;
-    return (uint64_t)(s->catalog_bitmap[index] + offset) * 512;
+        return -1;
+    return
+        ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512;
 }
 
 static int parallels_read(BlockDriverState *bs, int64_t sector_num,
diff --git a/block/qapi.c b/block/qapi.c
index f44f6b4012..79d1e6a9f4 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -28,6 +28,13 @@
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif
 
 BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 {
@@ -165,19 +172,28 @@ void bdrv_query_image_info(BlockDriverState *bs,
                            ImageInfo **p_info,
                            Error **errp)
 {
-    uint64_t total_sectors;
+    int64_t size;
     const char *backing_filename;
     char backing_filename2[1024];
     BlockDriverInfo bdi;
     int ret;
     Error *err = NULL;
-    ImageInfo *info = g_new0(ImageInfo, 1);
-
-    bdrv_get_geometry(bs, &total_sectors);
+    ImageInfo *info;
+#ifdef __linux__
+    int fd, attr;
+#endif
+
+    size = bdrv_getlength(bs);
+    if (size < 0) {
+        error_setg_errno(errp, -size, "Can't get size of device '%s'",
+                         bdrv_get_device_name(bs));
+        return;
+    }
 
+    info = g_new0(ImageInfo, 1);
     info->filename        = g_strdup(bs->filename);
     info->format          = g_strdup(bdrv_get_format_name(bs));
-    info->virtual_size    = total_sectors * 512;
+    info->virtual_size    = size;
     info->actual_size     = bdrv_get_allocated_file_size(bs);
     info->has_actual_size = info->actual_size >= 0;
     if (bdrv_is_encrypted(bs)) {
@@ -195,6 +211,18 @@ void bdrv_query_image_info(BlockDriverState *bs,
     info->format_specific     = bdrv_get_specific_info(bs);
     info->has_format_specific = info->format_specific != NULL;
 
+#ifdef __linux__
+    /* get NOCOW info */
+    fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK);
+    if (fd >= 0) {
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) {
+            info->has_nocow = true;
+            info->nocow = true;
+        }
+        qemu_close(fd);
+    }
+#endif
+
     backing_filename = bs->backing_file;
     if (backing_filename[0] != '\0') {
         info->backing_filename = g_strdup(backing_filename);
@@ -625,4 +653,8 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
         func_fprintf(f, "Format specific information:\n");
         bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
     }
+
+    if (info->has_nocow && info->nocow) {
+        func_fprintf(f, "NOCOW flag: set\n");
+    }
 }
diff --git a/block/qcow.c b/block/qcow.c
index a874056cf3..67332f03c1 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -182,7 +182,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+    s->l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
+    if (s->l1_table == NULL) {
+        error_setg(errp, "Could not allocate memory for L1 table");
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
                s->l1_size * sizeof(uint64_t));
@@ -193,8 +198,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     for(i = 0;i < s->l1_size; i++) {
         be64_to_cpus(&s->l1_table[i]);
     }
-    /* alloc L2 cache */
-    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+
+    /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
+    s->l2_cache =
+        qemu_try_blockalign(bs->file,
+                            s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+    if (s->l2_cache == NULL) {
+        error_setg(errp, "Could not allocate L2 table cache");
+        ret = -ENOMEM;
+        goto fail;
+    }
     s->cluster_cache = g_malloc(s->cluster_size);
     s->cluster_data = g_malloc(s->cluster_size);
     s->cluster_cache_offset = -1;
@@ -226,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
 
  fail:
     g_free(s->l1_table);
-    g_free(s->l2_cache);
+    qemu_vfree(s->l2_cache);
     g_free(s->cluster_cache);
     g_free(s->cluster_data);
     return ret;
@@ -517,7 +530,10 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
     void *orig_buf;
 
     if (qiov->niov > 1) {
-        buf = orig_buf = qemu_blockalign(bs, qiov->size);
+        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
+        if (buf == NULL) {
+            return -ENOMEM;
+        }
     } else {
         orig_buf = NULL;
         buf = (uint8_t *)qiov->iov->iov_base;
@@ -619,7 +635,10 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
     if (qiov->niov > 1) {
-        buf = orig_buf = qemu_blockalign(bs, qiov->size);
+        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
+        if (buf == NULL) {
+            return -ENOMEM;
+        }
         qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
     } else {
         orig_buf = NULL;
@@ -685,7 +704,7 @@ static void qcow_close(BlockDriverState *bs)
     BDRVQcowState *s = bs->opaque;
 
     g_free(s->l1_table);
-    g_free(s->l2_cache);
+    qemu_vfree(s->l2_cache);
     g_free(s->cluster_cache);
     g_free(s->cluster_data);
 
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 8ecbb5bc00..5353b44828 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -53,10 +53,21 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
     c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
 
     for (i = 0; i < c->size; i++) {
-        c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
+        c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
+        if (c->entries[i].table == NULL) {
+            goto fail;
+        }
     }
 
     return c;
+
+fail:
+    for (i = 0; i < c->size; i++) {
+        qemu_vfree(c->entries[i].table);
+    }
+    g_free(c->entries);
+    g_free(c);
+    return NULL;
 }
 
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4208dc08b5..5b36018b3e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -72,14 +72,20 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 #endif
 
     new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+    new_l1_table = qemu_try_blockalign(bs->file,
+                                       align_offset(new_l1_size2, 512));
+    if (new_l1_table == NULL) {
+        return -ENOMEM;
+    }
+    memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+
     memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
 
     /* write new table (align to cluster) */
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
     new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
     if (new_l1_table_offset < 0) {
-        g_free(new_l1_table);
+        qemu_vfree(new_l1_table);
         return new_l1_table_offset;
     }
 
@@ -113,7 +119,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     if (ret < 0) {
         goto fail;
     }
-    g_free(s->l1_table);
+    qemu_vfree(s->l1_table);
     old_l1_table_offset = s->l1_table_offset;
     s->l1_table_offset = new_l1_table_offset;
     s->l1_table = new_l1_table;
@@ -123,7 +129,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         QCOW2_DISCARD_OTHER);
     return 0;
  fail:
-    g_free(new_l1_table);
+    qemu_vfree(new_l1_table);
     qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
                         QCOW2_DISCARD_OTHER);
     return ret;
@@ -372,7 +378,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
     }
 
     iov.iov_len = n * BDRV_SECTOR_SIZE;
-    iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+    iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
+    if (iov.iov_base == NULL) {
+        return -ENOMEM;
+    }
 
     qemu_iovec_init_external(&qiov, &iov, 1);
 
@@ -702,7 +711,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
     assert(m->nb_clusters > 0);
 
-    old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+    old_cluster = g_try_malloc(m->nb_clusters * sizeof(uint64_t));
+    if (old_cluster == NULL) {
+        ret = -ENOMEM;
+        goto err;
+    }
 
     /* copy content of unmodified sectors */
     ret = perform_cow(bs, m, &m->cow_start);
@@ -1106,6 +1119,17 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
         return 0;
     }
 
+    /* !*host_offset would overwrite the image header and is reserved for "no
+     * host offset preferred". If 0 was a valid host offset, it'd trigger the
+     * following overlap check; do that now to avoid having an invalid value in
+     * *host_offset. */
+    if (!alloc_cluster_offset) {
+        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
+                                            nb_clusters * s->cluster_size);
+        assert(ret < 0);
+        goto fail;
+    }
+
     /*
      * Save info needed for meta data update.
      *
@@ -1562,7 +1586,10 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
     if (!is_active_l1) {
         /* inactive L2 tables require a buffer to be stored in when loading
          * them from disk */
-        l2_table = qemu_blockalign(bs, s->cluster_size);
+        l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+        if (l2_table == NULL) {
+            return -ENOMEM;
+        }
     }
 
     for (i = 0; i < l1_size; i++) {
@@ -1740,7 +1767,11 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
 
     nb_clusters = size_to_clusters(s, bs->file->total_sectors *
                                    BDRV_SECTOR_SIZE);
-    expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
+    expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
+    if (expanded_clusters == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
                                      &expanded_clusters, &nb_clusters);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index cc6cf743d6..3b7747048e 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -46,19 +46,25 @@ int qcow2_refcount_init(BlockDriverState *bs)
 
     assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
     refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
-    s->refcount_table = g_malloc(refcount_table_size2);
+    s->refcount_table = g_try_malloc(refcount_table_size2);
+
     if (s->refcount_table_size > 0) {
+        if (s->refcount_table == NULL) {
+            ret = -ENOMEM;
+            goto fail;
+        }
         BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
         ret = bdrv_pread(bs->file, s->refcount_table_offset,
                          s->refcount_table, refcount_table_size2);
-        if (ret != refcount_table_size2)
+        if (ret < 0) {
             goto fail;
+        }
         for(i = 0; i < s->refcount_table_size; i++)
             be64_to_cpus(&s->refcount_table[i]);
     }
     return 0;
  fail:
-    return -ENOMEM;
+    return ret;
 }
 
 void qcow2_refcount_close(BlockDriverState *bs)
@@ -344,8 +350,14 @@ static int alloc_refcount_block(BlockDriverState *bs,
     uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
         s->cluster_size;
     uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
-    uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
-    uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
+    uint64_t *new_table = g_try_malloc0(table_size * sizeof(uint64_t));
+    uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
+
+    assert(table_size > 0 && blocks_clusters > 0);
+    if (new_table == NULL || new_blocks == NULL) {
+        ret = -ENOMEM;
+        goto fail_table;
+    }
 
     /* Fill the new refcount table */
     memcpy(new_table, s->refcount_table,
@@ -369,6 +381,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
     ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
         blocks_clusters * s->cluster_size);
     g_free(new_blocks);
+    new_blocks = NULL;
     if (ret < 0) {
         goto fail_table;
     }
@@ -424,6 +437,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
     return -EAGAIN;
 
 fail_table:
+    g_free(new_blocks);
     g_free(new_table);
 fail_block:
     if (*refcount_block != NULL) {
@@ -847,7 +861,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     int64_t l1_table_offset, int l1_size, int addend)
 {
     BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
+    bool l1_allocated = false;
     int64_t old_offset, old_l2_offset;
     int i, j, l1_modified = 0, nb_csectors, refcount;
     int ret;
@@ -862,8 +877,12 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
      * l1_table_offset when it is the current s->l1_table_offset! Be careful
      * when changing this! */
     if (l1_table_offset != s->l1_table_offset) {
-        l1_table = g_malloc0(align_offset(l1_size2, 512));
-        l1_allocated = 1;
+        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
+        if (l1_size2 && l1_table == NULL) {
+            ret = -ENOMEM;
+            goto fail;
+        }
+        l1_allocated = true;
 
         ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
         if (ret < 0) {
@@ -875,7 +894,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     } else {
         assert(l1_size == s->l1_size);
         l1_table = s->l1_table;
-        l1_allocated = 0;
+        l1_allocated = false;
     }
 
     for(i = 0; i < l1_size; i++) {
@@ -1197,7 +1216,11 @@ static int check_refcounts_l1(BlockDriverState *bs,
     if (l1_size2 == 0) {
         l1_table = NULL;
     } else {
-        l1_table = g_malloc(l1_size2);
+        l1_table = g_try_malloc(l1_size2);
+        if (l1_table == NULL) {
+            ret = -ENOMEM;
+            goto fail;
+        }
         if (bdrv_pread(bs->file, l1_table_offset,
                        l1_table, l1_size2) != l1_size2)
             goto fail;
@@ -1501,7 +1524,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
         return -EFBIG;
     }
 
-    refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
+    refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t));
+    if (nb_clusters && refcount_table == NULL) {
+        res->check_errors++;
+        return -ENOMEM;
+    }
 
     res->bfi.total_clusters =
         size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
@@ -1753,9 +1780,13 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
             uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
             uint32_t l1_sz  = s->snapshots[i].l1_size;
             uint64_t l1_sz2 = l1_sz * sizeof(uint64_t);
-            uint64_t *l1 = g_malloc(l1_sz2);
+            uint64_t *l1 = g_try_malloc(l1_sz2);
             int ret;
 
+            if (l1_sz2 && l1 == NULL) {
+                return -ENOMEM;
+            }
+
             ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
             if (ret < 0) {
                 g_free(l1);
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0aa9defbe2..f67b47282f 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -381,7 +381,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     sn->l1_table_offset = l1_table_offset;
     sn->l1_size = s->l1_size;
 
-    l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+    l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
+    if (s->l1_size && l1_table == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
     for(i = 0; i < s->l1_size; i++) {
         l1_table[i] = cpu_to_be64(s->l1_table[i]);
     }
@@ -499,7 +504,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
      * Decrease the refcount referenced by the old one only when the L1
      * table is overwritten.
      */
-    sn_l1_table = g_malloc0(cur_l1_bytes);
+    sn_l1_table = g_try_malloc0(cur_l1_bytes);
+    if (cur_l1_bytes && sn_l1_table == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
     if (ret < 0) {
@@ -698,17 +707,21 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
         return -EFBIG;
     }
     new_l1_bytes = sn->l1_size * sizeof(uint64_t);
-    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
+    new_l1_table = qemu_try_blockalign(bs->file,
+                                       align_offset(new_l1_bytes, 512));
+    if (new_l1_table == NULL) {
+        return -ENOMEM;
+    }
 
     ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
     if (ret < 0) {
         error_setg(errp, "Failed to read l1 table for snapshot");
-        g_free(new_l1_table);
+        qemu_vfree(new_l1_table);
         return ret;
     }
 
     /* Switch the L1 table */
-    g_free(s->l1_table);
+    qemu_vfree(s->l1_table);
 
     s->l1_size = sn->l1_size;
     s->l1_table_offset = sn->l1_table_offset;
diff --git a/block/qcow2.c b/block/qcow2.c
index b0faa6917b..435e0e11d0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -210,20 +210,31 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
 static void report_unsupported_feature(BlockDriverState *bs,
     Error **errp, Qcow2Feature *table, uint64_t mask)
 {
+    char *features = g_strdup("");
+    char *old;
+
     while (table && table->name[0] != '\0') {
         if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
-            if (mask & (1 << table->bit)) {
-                report_unsupported(bs, errp, "%.46s", table->name);
-                mask &= ~(1 << table->bit);
+            if (mask & (1ULL << table->bit)) {
+                old = features;
+                features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
+                                           table->name);
+                g_free(old);
+                mask &= ~(1ULL << table->bit);
             }
         }
         table++;
     }
 
     if (mask) {
-        report_unsupported(bs, errp, "Unknown incompatible feature: %" PRIx64,
-                           mask);
+        old = features;
+        features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
+                                   old, *old ? ", " : "", mask);
+        g_free(old);
     }
+
+    report_unsupported(bs, errp, "%s", features);
+    g_free(features);
 }
 
 /*
@@ -677,8 +688,13 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
 
 
     if (s->l1_size > 0) {
-        s->l1_table = g_malloc0(
+        s->l1_table = qemu_try_blockalign(bs->file,
             align_offset(s->l1_size * sizeof(uint64_t), 512));
+        if (s->l1_table == NULL) {
+            error_setg(errp, "Could not allocate L1 table");
+            ret = -ENOMEM;
+            goto fail;
+        }
         ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
                          s->l1_size * sizeof(uint64_t));
         if (ret < 0) {
@@ -693,11 +709,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     /* alloc L2 table/refcount block cache */
     s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
     s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
+    if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
+        error_setg(errp, "Could not allocate metadata caches");
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     s->cluster_cache = g_malloc(s->cluster_size);
     /* one more sector for decompressed data alignment */
-    s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
-                                  + 512);
+    s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+                                                    * s->cluster_size + 512);
+    if (s->cluster_data == NULL) {
+        error_setg(errp, "Could not allocate temporary cluster buffer");
+        ret = -ENOMEM;
+        goto fail;
+    }
+
     s->cluster_cache_offset = -1;
     s->flags = flags;
 
@@ -841,7 +868,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     cleanup_unknown_header_ext(bs);
     qcow2_free_snapshots(bs);
     qcow2_refcount_close(bs);
-    g_free(s->l1_table);
+    qemu_vfree(s->l1_table);
     /* else pre-write overlap checks in cache_destroy may crash */
     s->l1_table = NULL;
     if (s->l2_table_cache) {
@@ -855,13 +882,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     return ret;
 }
 
-static int qcow2_refresh_limits(BlockDriverState *bs)
+static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVQcowState *s = bs->opaque;
 
     bs->bl.write_zeroes_alignment = s->cluster_sectors;
-
-    return 0;
 }
 
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -1073,7 +1098,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                  */
                 if (!cluster_data) {
                     cluster_data =
-                        qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+                        qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+                                                      * s->cluster_size);
+                    if (cluster_data == NULL) {
+                        ret = -ENOMEM;
+                        goto fail;
+                    }
                 }
 
                 assert(cur_nr_sectors <=
@@ -1173,8 +1203,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
         if (s->crypt_method) {
             if (!cluster_data) {
-                cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
-                                                 s->cluster_size);
+                cluster_data = qemu_try_blockalign(bs->file,
+                                                   QCOW_MAX_CRYPT_CLUSTERS
+                                                   * s->cluster_size);
+                if (cluster_data == NULL) {
+                    ret = -ENOMEM;
+                    goto fail;
+                }
             }
 
             assert(hd_qiov.size <=
@@ -1261,7 +1296,7 @@ fail:
 static void qcow2_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
-    g_free(s->l1_table);
+    qemu_vfree(s->l1_table);
     /* else pre-write overlap checks in cache_destroy may crash */
     s->l1_table = NULL;
 
@@ -1548,7 +1583,7 @@ static int preallocate(BlockDriverState *bs)
     int ret;
     QCowL2Meta *meta;
 
-    nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+    nb_sectors = bdrv_nb_sectors(bs);
     offset = 0;
 
     while (nb_sectors) {
@@ -1938,7 +1973,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
         cluster_offset = bdrv_getlength(bs->file);
-        cluster_offset = (cluster_offset + 511) & ~511;
         bdrv_truncate(bs->file, cluster_offset);
         return 0;
     }
diff --git a/block/qed-check.c b/block/qed-check.c
index b473dcd61f..40a882cc93 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -227,8 +227,11 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
     };
     int ret;
 
-    check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
-                                       sizeof(check.used_clusters[0]));
+    check.used_clusters = g_try_malloc0(((check.nclusters + 31) / 32) *
+                                        sizeof(check.used_clusters[0]));
+    if (check.nclusters && check.used_clusters == NULL) {
+        return -ENOMEM;
+    }
 
     check.result->bfi.total_clusters =
         (s->header.image_size + s->header.cluster_size - 1) /
diff --git a/block/qed.c b/block/qed.c
index cd4872b529..ba395af76a 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -528,13 +528,11 @@ out:
     return ret;
 }
 
-static int bdrv_qed_refresh_limits(BlockDriverState *bs)
+static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
 
     bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-
-    return 0;
 }
 
 /* We have nothing to do for QED reopen, stubs just return
@@ -1242,7 +1240,11 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
         struct iovec *iov = acb->qiov->iov;
 
         if (!iov->iov_base) {
-            iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+            iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
+            if (iov->iov_base == NULL) {
+                qed_aio_complete(acb, -ENOMEM);
+                return;
+            }
             memset(iov->iov_base, 0, iov->iov_len);
         }
     }
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 2bcc73dc37..1194eb00ad 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -221,7 +221,7 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif
 
-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
     char *buf;
@@ -240,24 +240,24 @@ static void raw_probe_alignment(BlockDriverState *bs)
     s->buf_align = 0;
 
 #ifdef BLKSSZGET
-    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
+    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
         bs->request_alignment = sector_size;
     }
 #endif
 #ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
         bs->request_alignment = sector_size;
     }
 #endif
 #ifdef DIOCGSECTORSIZE
-    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
         bs->request_alignment = sector_size;
     }
 #endif
 #ifdef CONFIG_XFS
     if (s->is_xfs) {
         struct dioattr da;
-        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+        if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
             bs->request_alignment = da.d_miniosz;
             /* The kernel returns wrong information for d_mem */
             /* s->buf_align = da.d_mem; */
@@ -270,7 +270,7 @@ static void raw_probe_alignment(BlockDriverState *bs)
         size_t align;
         buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
         for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+            if (pread(fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
                 s->buf_align = align;
                 break;
             }
@@ -282,13 +282,18 @@ static void raw_probe_alignment(BlockDriverState *bs)
         size_t align;
         buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
         for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
-            if (pread(s->fd, buf, align, 0) >= 0) {
+            if (pread(fd, buf, align, 0) >= 0) {
                 bs->request_alignment = align;
                 break;
             }
         }
         qemu_vfree(buf);
     }
+
+    if (!s->buf_align || !bs->request_alignment) {
+        error_setg(errp, "Could not find working O_DIRECT alignment. "
+                         "Try cache.direct=off.");
+    }
 }
 
 static void raw_parse_flags(int bdrv_flags, int *open_flags)
@@ -505,6 +510,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     BDRVRawState *s;
     BDRVRawReopenState *raw_s;
     int ret = 0;
+    Error *local_err = NULL;
 
     assert(state != NULL);
     assert(state->bs != NULL);
@@ -577,6 +583,19 @@ static int raw_reopen_prepare(BDRVReopenState *state,
             ret = -1;
         }
     }
+
+    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
+     * alignment with the new fd. */
+    if (raw_s->fd != -1) {
+        raw_probe_alignment(state->bs, raw_s->fd, &local_err);
+        if (local_err) {
+            qemu_close(raw_s->fd);
+            raw_s->fd = -1;
+            error_propagate(errp, local_err);
+            ret = -EINVAL;
+        }
+    }
+
     return ret;
 }
 
@@ -615,14 +634,12 @@ static void raw_reopen_abort(BDRVReopenState *state)
     state->opaque = NULL;
 }
 
-static int raw_refresh_limits(BlockDriverState *bs)
+static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
 
-    raw_probe_alignment(bs);
+    raw_probe_alignment(bs, s->fd, errp);
     bs->bl.opt_mem_alignment = s->buf_align;
-
-    return 0;
 }
 
 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
@@ -781,7 +798,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
      * Ok, we have to do it the hard way, copy all segments into
      * a single aligned buffer.
      */
-    buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
+    buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
+    if (buf == NULL) {
+        return -ENOMEM;
+    }
+
     if (aiocb->aio_type & QEMU_AIO_WRITE) {
         char *p = buf;
         int i;
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 492f58de69..f82f4c25df 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -94,10 +94,9 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return bdrv_get_info(bs->file, bdi);
 }
 
-static int raw_refresh_limits(BlockDriverState *bs)
+static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     bs->bl = bs->file->bl;
-    return 0;
 }
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
diff --git a/block/rbd.c b/block/rbd.c
index 2b797d3e8b..4459102adf 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -617,7 +617,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
                                        RBDAIOCmd cmd)
 {
     RBDAIOCB *acb;
-    RADOSCB *rcb;
+    RADOSCB *rcb = NULL;
     rbd_completion_t c;
     int64_t off, size;
     char *buf;
@@ -631,7 +631,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
         acb->bounce = NULL;
     } else {
-        acb->bounce = qemu_blockalign(bs, qiov->size);
+        acb->bounce = qemu_try_blockalign(bs, qiov->size);
+        if (acb->bounce == NULL) {
+            goto failed;
+        }
     }
     acb->ret = 0;
     acb->error = 0;
diff --git a/block/stream.c b/block/stream.c
index 34de8ba0d9..cdea3e8d0c 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -76,7 +76,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
         bdrv_unref(unused);
     }
 
-    bdrv_refresh_limits(top);
+    bdrv_refresh_limits(top, NULL);
 }
 
 static void coroutine_fn stream_run(void *opaque)
diff --git a/block/vdi.c b/block/vdi.c
index 197bd77c97..adc6aa9a5f 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,13 +53,6 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
-#endif
-#endif
 
 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -299,7 +292,12 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
         return -ENOTSUP;
     }
 
-    bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
+    bmap = g_try_malloc(s->header.blocks_in_image * sizeof(uint32_t));
+    if (s->header.blocks_in_image && bmap == NULL) {
+        res->check_errors++;
+        return -ENOMEM;
+    }
+
     memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
 
     /* Check block map and value of blocks_allocated. */
@@ -357,23 +355,23 @@ static int vdi_make_empty(BlockDriverState *bs)
 static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     const VdiHeader *header = (const VdiHeader *)buf;
-    int result = 0;
+    int ret = 0;
 
     logout("\n");
 
     if (buf_size < sizeof(*header)) {
         /* Header too small, no VDI. */
     } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
-        result = 100;
+        ret = 100;
     }
 
-    if (result == 0) {
+    if (ret == 0) {
         logout("no vdi image\n");
     } else {
         logout("%s", header->text);
     }
 
-    return result;
+    return ret;
 }
 
 static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
@@ -478,7 +476,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
 
     bmap_size = header.blocks_in_image * sizeof(uint32_t);
     bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
-    s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
+    s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+    if (s->bmap == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
+
     ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
     if (ret < 0) {
         goto fail_free_bmap;
@@ -493,7 +496,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
     return 0;
 
  fail_free_bmap:
-    g_free(s->bmap);
+    qemu_vfree(s->bmap);
 
  fail:
     return ret;
@@ -681,8 +684,7 @@ static int vdi_co_write(BlockDriverState *bs,
 
 static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
 {
-    int fd;
-    int result = 0;
+    int ret = 0;
     uint64_t bytes = 0;
     uint32_t blocks;
     size_t block_size = DEFAULT_CLUSTER_SIZE;
@@ -690,7 +692,10 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     VdiHeader header;
     size_t i;
     size_t bmap_size;
-    bool nocow = false;
+    int64_t offset = 0;
+    Error *local_err = NULL;
+    BlockDriverState *bs = NULL;
+    uint32_t *bmap = NULL;
 
     logout("\n");
 
@@ -707,37 +712,25 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
         image_type = VDI_TYPE_STATIC;
     }
 #endif
-    nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
 
     if (bytes > VDI_DISK_SIZE_MAX) {
-        result = -ENOTSUP;
+        ret = -ENOTSUP;
         error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
                           ", max supported is 0x%" PRIx64 ")",
                           bytes, VDI_DISK_SIZE_MAX);
         goto exit;
     }
 
-    fd = qemu_open(filename,
-                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-                   0644);
-    if (fd < 0) {
-        result = -errno;
+    ret = bdrv_create_file(filename, opts, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
         goto exit;
     }
-
-    if (nocow) {
-#ifdef __linux__
-        /* Set NOCOW flag to solve performance issue on fs like btrfs.
-         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
-         * be ignored since any failure of this operation should not block the
-         * left work.
-         */
-        int attr;
-        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
-            attr |= FS_NOCOW_FL;
-            ioctl(fd, FS_IOC_SETFLAGS, &attr);
-        }
-#endif
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto exit;
     }
 
     /* We need enough blocks to store the given disk size,
@@ -769,13 +762,20 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     vdi_header_print(&header);
 #endif
     vdi_header_to_le(&header);
-    if (write(fd, &header, sizeof(header)) < 0) {
-        result = -errno;
-        goto close_and_exit;
+    ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+    if (ret < 0) {
+        error_setg(errp, "Error writing header to %s", filename);
+        goto exit;
     }
+    offset += sizeof(header);
 
     if (bmap_size > 0) {
-        uint32_t *bmap = g_malloc0(bmap_size);
+        bmap = g_try_malloc0(bmap_size);
+        if (bmap == NULL) {
+            ret = -ENOMEM;
+            error_setg(errp, "Could not allocate bmap");
+            goto exit;
+        }
         for (i = 0; i < blocks; i++) {
             if (image_type == VDI_TYPE_STATIC) {
                 bmap[i] = i;
@@ -783,35 +783,33 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
                 bmap[i] = VDI_UNALLOCATED;
             }
         }
-        if (write(fd, bmap, bmap_size) < 0) {
-            result = -errno;
-            g_free(bmap);
-            goto close_and_exit;
+        ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+        if (ret < 0) {
+            error_setg(errp, "Error writing bmap to %s", filename);
+            goto exit;
         }
-        g_free(bmap);
+        offset += bmap_size;
     }
 
     if (image_type == VDI_TYPE_STATIC) {
-        if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
-            result = -errno;
-            goto close_and_exit;
+        ret = bdrv_truncate(bs, offset + blocks * block_size);
+        if (ret < 0) {
+            error_setg(errp, "Failed to statically allocate %s", filename);
+            goto exit;
         }
     }
 
-close_and_exit:
-    if ((close(fd) < 0) && !result) {
-        result = -errno;
-    }
-
 exit:
-    return result;
+    bdrv_unref(bs);
+    g_free(bmap);
+    return ret;
 }
 
 static void vdi_close(BlockDriverState *bs)
 {
     BDRVVdiState *s = bs->opaque;
 
-    g_free(s->bmap);
+    qemu_vfree(s->bmap);
 
     migrate_del_blocker(s->migration_blocker);
     error_free(s->migration_blocker);
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index fe879ed995..0640d3f4a9 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -82,8 +82,6 @@ void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
     assert(d != NULL);
 
     le32_to_cpus(&d->signature);
-    le32_to_cpus(&d->trailing_bytes);
-    le64_to_cpus(&d->leading_bytes);
     le64_to_cpus(&d->file_offset);
     le64_to_cpus(&d->sequence_number);
 }
@@ -99,6 +97,15 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
     cpu_to_le64s(&d->sequence_number);
 }
 
+void vhdx_log_data_le_import(VHDXLogDataSector *d)
+{
+    assert(d != NULL);
+
+    le32_to_cpus(&d->data_signature);
+    le32_to_cpus(&d->sequence_high);
+    le32_to_cpus(&d->sequence_low);
+}
+
 void vhdx_log_data_le_export(VHDXLogDataSector *d)
 {
     assert(d != NULL);
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index a77c040ee0..eb5c7a097b 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -84,6 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
     if (ret < 0) {
         goto exit;
     }
+    vhdx_log_entry_hdr_le_import(hdr);
 
 exit:
     return ret;
@@ -211,7 +212,7 @@ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
 {
     int valid = false;
 
-    if (memcmp(&hdr->signature, "loge", 4)) {
+    if (hdr->signature != VHDX_LOG_SIGNATURE) {
         goto exit;
     }
 
@@ -275,12 +276,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
         goto exit;
     }
 
-    if (!memcmp(&desc->signature, "zero", 4)) {
+    if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
         if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
             /* valid */
             ret = true;
         }
-    } else if (!memcmp(&desc->signature, "desc", 4)) {
+    } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
             /* valid */
             ret = true;
     }
@@ -327,13 +328,15 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
  * passed into this function. Each descriptor will also be validated,
  * and error returned if any are invalid. */
 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
-                              VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+                              VHDXLogEntries *log, VHDXLogDescEntries **buffer,
+                              bool convert_endian)
 {
     int ret = 0;
     uint32_t desc_sectors;
     uint32_t sectors_read;
     VHDXLogEntryHeader hdr;
     VHDXLogDescEntries *desc_entries = NULL;
+    VHDXLogDescriptor desc;
     int i;
 
     assert(*buffer == NULL);
@@ -342,14 +345,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
     if (ret < 0) {
         goto exit;
     }
-    vhdx_log_entry_hdr_le_import(&hdr);
+
     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
         ret = -EINVAL;
         goto exit;
     }
 
     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-    desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    desc_entries = qemu_try_blockalign(bs->file,
+                                       desc_sectors * VHDX_LOG_SECTOR_SIZE);
+    if (desc_entries == NULL) {
+        ret = -ENOMEM;
+        goto exit;
+    }
 
     ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
                                 desc_sectors, false);
@@ -363,12 +371,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
 
     /* put in proper endianness, and validate each desc */
     for (i = 0; i < hdr.descriptor_count; i++) {
-        vhdx_log_desc_le_import(&desc_entries->desc[i]);
-        if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+        desc = desc_entries->desc[i];
+        vhdx_log_desc_le_import(&desc);
+        if (convert_endian) {
+            desc_entries->desc[i] = desc;
+        }
+        if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
             ret = -EINVAL;
             goto free_and_exit;
         }
     }
+    if (convert_endian) {
+        desc_entries->hdr = hdr;
+    }
 
     *buffer = desc_entries;
     goto exit;
@@ -403,7 +418,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
 
     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
 
-    if (!memcmp(&desc->signature, "desc", 4)) {
+    if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
         /* data sector */
         if (data == NULL) {
             ret = -EFAULT;
@@ -431,10 +446,15 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
 
         memcpy(buffer+offset, &desc->trailing_bytes, 4);
 
-    } else if (!memcmp(&desc->signature, "zero", 4)) {
+    } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
         /* write 'count' sectors of sector */
         memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
         count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+    } else {
+        error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
+                      desc->signature);
+        ret = -EINVAL;
+        goto exit;
     }
 
     file_offset = desc->file_offset;
@@ -493,13 +513,13 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
             goto exit;
         }
 
-        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
         if (ret < 0) {
             goto exit;
         }
 
         for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
-            if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+            if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
                 /* data sector, so read a sector to flush */
                 ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
                                             data, 1, false);
@@ -510,6 +530,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                     ret = -EINVAL;
                     goto exit;
                 }
+                vhdx_log_data_le_import(data);
             }
 
             ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
@@ -558,9 +579,6 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
         goto inc_and_exit;
     }
 
-    vhdx_log_entry_hdr_le_import(&hdr);
-
-
     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
         goto inc_and_exit;
     }
@@ -573,13 +591,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
 
     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
 
-    /* Read desc sectors, and calculate log checksum */
+    /* Read all log sectors, and calculate log checksum */
 
     total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
 
 
     /* read_desc() will increment the read idx */
-    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
     if (ret < 0) {
         goto free_and_exit;
     }
@@ -602,7 +620,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
         }
     }
     crc ^= 0xffffffff;
-    if (crc != desc_buffer->hdr.checksum) {
+    if (crc != hdr.checksum) {
         goto free_and_exit;
     }
 
@@ -962,7 +980,6 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
      * last data sector */
     vhdx_update_checksum(buffer, total_length,
                          offsetof(VHDXLogEntryHeader, checksum));
-    cpu_to_le32s((uint32_t *)(buffer + 4));
 
     /* now write to the log */
     ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
diff --git a/block/vhdx.c b/block/vhdx.c
index fedcf9f9ca..f666940db7 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -135,10 +135,8 @@ typedef struct VHDXSectorInfo {
  * buf: buffer pointer
  * size: size of buffer (must be > crc_offset+4)
  *
- * Note: The resulting checksum is in the CPU endianness, not necessarily
- *       in the file format endianness (LE).  Any header export to disk should
- *       make sure that vhdx_header_le_export() is used to convert to the
- *       correct endianness
+ * Note: The buffer should have all multi-byte data in little-endian format,
+ *       and the resulting checksum is in little endian format.
  */
 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
 {
@@ -149,6 +147,7 @@ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
 
     memset(buf + crc_offset, 0, sizeof(crc));
     crc =  crc32c(0xffffffff, buf, size);
+    cpu_to_le32s(&crc);
     memcpy(buf + crc_offset, &crc, sizeof(crc));
 
     return crc;
@@ -300,7 +299,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
 {
     uint8_t *buffer = NULL;
     int ret;
-    VHDXHeader header_le;
+    VHDXHeader *header_le;
 
     assert(bs_file != NULL);
     assert(hdr != NULL);
@@ -321,11 +320,12 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
     }
 
     /* overwrite the actual VHDXHeader portion */
-    memcpy(buffer, hdr, sizeof(VHDXHeader));
-    hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
-                                         offsetof(VHDXHeader, checksum));
-    vhdx_header_le_export(hdr, &header_le);
-    ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
+    header_le = (VHDXHeader *)buffer;
+    memcpy(header_le, hdr, sizeof(VHDXHeader));
+    vhdx_header_le_export(hdr, header_le);
+    vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+                         offsetof(VHDXHeader, checksum));
+    ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
 
 exit:
     qemu_vfree(buffer);
@@ -432,13 +432,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
     }
     /* copy over just the relevant portion that we need */
     memcpy(header1, buffer, sizeof(VHDXHeader));
-    vhdx_header_le_import(header1);
 
-    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
-        !memcmp(&header1->signature, "head", 4)             &&
-        header1->version == 1) {
-        h1_seq = header1->sequence_number;
-        h1_valid = true;
+    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
+        vhdx_header_le_import(header1);
+        if (header1->signature == VHDX_HEADER_SIGNATURE &&
+            header1->version == 1) {
+            h1_seq = header1->sequence_number;
+            h1_valid = true;
+        }
     }
 
     ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
@@ -447,13 +448,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
     }
     /* copy over just the relevant portion that we need */
     memcpy(header2, buffer, sizeof(VHDXHeader));
-    vhdx_header_le_import(header2);
 
-    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
-        !memcmp(&header2->signature, "head", 4)             &&
-        header2->version == 1) {
-        h2_seq = header2->sequence_number;
-        h2_valid = true;
+    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
+        vhdx_header_le_import(header2);
+        if (header2->signature == VHDX_HEADER_SIGNATURE &&
+            header2->version == 1) {
+            h2_seq = header2->sequence_number;
+            h2_valid = true;
+        }
     }
 
     /* If there is only 1 valid header (or no valid headers), we
@@ -519,15 +521,21 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
         goto fail;
     }
     memcpy(&s->rt, buffer, sizeof(s->rt));
-    vhdx_region_header_le_import(&s->rt);
     offset += sizeof(s->rt);
 
-    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
-        memcmp(&s->rt.signature, "regi", 4)) {
+    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) {
         ret = -EINVAL;
         goto fail;
     }
 
+    vhdx_region_header_le_import(&s->rt);
+
+    if (s->rt.signature != VHDX_REGION_SIGNATURE) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+
     /* Per spec, maximum region table entry count is 2047 */
     if (s->rt.entry_count > 2047) {
         ret = -EINVAL;
@@ -630,7 +638,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
 
     vhdx_metadata_header_le_import(&s->metadata_hdr);
 
-    if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
+    if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) {
         ret = -EINVAL;
         goto exit;
     }
@@ -950,7 +958,11 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* s->bat is freed in vhdx_close() */
-    s->bat = qemu_blockalign(bs, s->bat_rt.length);
+    s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+    if (s->bat == NULL) {
+        ret = -ENOMEM;
+        goto fail;
+    }
 
     ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
     if (ret < 0) {
@@ -1540,7 +1552,8 @@ exit:
  */
 static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
                            uint64_t image_size, VHDXImageType type,
-                           bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
+                           bool use_zero_blocks, uint64_t file_offset,
+                           uint32_t length)
 {
     int ret = 0;
     uint64_t data_file_offset;
@@ -1555,7 +1568,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
     /* this gives a data start after BAT/bitmap entries, and well
      * past any metadata entries (with a 4 MB buffer for future
      * expansion */
-    data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
+    data_file_offset = file_offset + length + 5 * MiB;
     total_sectors = image_size >> s->logical_sector_size_bits;
 
     if (type == VHDX_TYPE_DYNAMIC) {
@@ -1579,7 +1592,11 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
                 use_zero_blocks ||
                 bdrv_has_zero_init(bs) == 0) {
         /* for a fixed file, the default BAT entry is not zero */
-        s->bat = g_malloc0(rt_bat->length);
+        s->bat = g_try_malloc0(length);
+        if (length && s->bat != NULL) {
+            ret = -ENOMEM;
+            goto exit;
+        }
         block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
                                                 PAYLOAD_BLOCK_NOT_PRESENT;
         block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
@@ -1594,7 +1611,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
             cpu_to_le64s(&s->bat[sinfo.bat_idx]);
             sector_num += s->sectors_per_block;
         }
-        ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
+        ret = bdrv_pwrite(bs, file_offset, s->bat, length);
         if (ret < 0) {
             goto exit;
         }
@@ -1626,6 +1643,8 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
     int ret = 0;
     uint32_t offset = 0;
     void *buffer = NULL;
+    uint64_t bat_file_offset;
+    uint32_t bat_length;
     BDRVVHDXState *s = NULL;
     VHDXRegionTableHeader *region_table;
     VHDXRegionTableEntry *rt_bat;
@@ -1674,19 +1693,26 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
     rt_metadata->length      = 1 * MiB; /* min size, and more than enough */
     *metadata_offset = rt_metadata->file_offset;
 
+    bat_file_offset = rt_bat->file_offset;
+    bat_length = rt_bat->length;
+
+    vhdx_region_header_le_export(region_table);
+    vhdx_region_entry_le_export(rt_bat);
+    vhdx_region_entry_le_export(rt_metadata);
+
     vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
                          offsetof(VHDXRegionTableHeader, checksum));
 
 
     /* The region table gives us the data we need to create the BAT,
      * so do that now */
-    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);
+    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
+                          bat_file_offset, bat_length);
+    if (ret < 0) {
+        goto exit;
+    }
 
     /* Now write out the region headers to disk */
-    vhdx_region_header_le_export(region_table);
-    vhdx_region_entry_le_export(rt_bat);
-    vhdx_region_entry_le_export(rt_metadata);
-
     ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
                       VHDX_HEADER_BLOCK_SIZE);
     if (ret < 0) {
diff --git a/block/vhdx.h b/block/vhdx.h
index 5370010c59..b4a12a081e 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -435,6 +435,7 @@ void vhdx_header_le_import(VHDXHeader *h);
 void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
 void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
 void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
+void vhdx_log_data_le_import(VHDXLogDataSector *d);
 void vhdx_log_data_le_export(VHDXLogDataSector *d);
 void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
 void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
diff --git a/block/vmdk.c b/block/vmdk.c
index 27a78daa02..01412a8939 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -106,6 +106,7 @@ typedef struct VmdkExtent {
     uint32_t l2_cache_counts[L2_CACHE_SIZE];
 
     int64_t cluster_sectors;
+    int64_t next_cluster_sector;
     char *type;
 } VmdkExtent;
 
@@ -124,7 +125,6 @@ typedef struct BDRVVmdkState {
 } BDRVVmdkState;
 
 typedef struct VmdkMetaData {
-    uint32_t offset;
     unsigned int l1_index;
     unsigned int l2_index;
     unsigned int l2_offset;
@@ -397,6 +397,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
 {
     VmdkExtent *extent;
     BDRVVmdkState *s = bs->opaque;
+    int64_t length;
 
     if (cluster_sectors > 0x200000) {
         /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
@@ -412,6 +413,11 @@ static int vmdk_add_extent(BlockDriverState *bs,
         return -EFBIG;
     }
 
+    length = bdrv_getlength(file);
+    if (length < 0) {
+        return length;
+    }
+
     s->extents = g_realloc(s->extents,
                               (s->num_extents + 1) * sizeof(VmdkExtent));
     extent = &s->extents[s->num_extents];
@@ -427,6 +433,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
     extent->l1_entry_sectors = l2_size * cluster_sectors;
     extent->l2_size = l2_size;
     extent->cluster_sectors = flat ? sectors : cluster_sectors;
+    extent->next_cluster_sector =
+        ROUND_UP(DIV_ROUND_UP(length, BDRV_SECTOR_SIZE), cluster_sectors);
 
     if (s->num_extents > 1) {
         extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -448,7 +456,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
 
     /* read the L1 table */
     l1_size = extent->l1_size * sizeof(uint32_t);
-    extent->l1_table = g_malloc(l1_size);
+    extent->l1_table = g_try_malloc(l1_size);
+    if (l1_size && extent->l1_table == NULL) {
+        return -ENOMEM;
+    }
+
     ret = bdrv_pread(extent->file,
                      extent->l1_table_offset,
                      extent->l1_table,
@@ -464,7 +476,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
     }
 
     if (extent->l1_backup_table_offset) {
-        extent->l1_backup_table = g_malloc(l1_size);
+        extent->l1_backup_table = g_try_malloc(l1_size);
+        if (l1_size && extent->l1_backup_table == NULL) {
+            ret = -ENOMEM;
+            goto fail_l1;
+        }
         ret = bdrv_pread(extent->file,
                          extent->l1_backup_table_offset,
                          extent->l1_backup_table,
@@ -669,8 +685,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
         l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
     }
-    if (bdrv_getlength(file) <
-            le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+    if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
         error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
                    (int64_t)(le64_to_cpu(header.grain_offset)
                              * BDRV_SECTOR_SIZE));
@@ -938,7 +953,7 @@ fail:
 }
 
 
-static int vmdk_refresh_limits(BlockDriverState *bs)
+static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVVmdkState *s = bs->opaque;
     int i;
@@ -950,61 +965,99 @@ static int vmdk_refresh_limits(BlockDriverState *bs)
                     s->extents[i].cluster_sectors);
         }
     }
-
-    return 0;
 }
 
+/**
+ * get_whole_cluster
+ *
+ * Copy backing file's cluster that covers @sector_num, otherwise write zero,
+ * to the cluster at @cluster_sector_num.
+ *
+ * If @skip_start_sector < @skip_end_sector, the relative range
+ * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
+ * it for call to write user data in the request.
+ */
 static int get_whole_cluster(BlockDriverState *bs,
-                VmdkExtent *extent,
-                uint64_t cluster_offset,
-                uint64_t offset,
-                bool allocate)
+                             VmdkExtent *extent,
+                             uint64_t cluster_sector_num,
+                             uint64_t sector_num,
+                             uint64_t skip_start_sector,
+                             uint64_t skip_end_sector)
 {
     int ret = VMDK_OK;
-    uint8_t *whole_grain = NULL;
+    int64_t cluster_bytes;
+    uint8_t *whole_grain;
+
+    /* For COW, align request sector_num to cluster start */
+    sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
+    cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+    whole_grain = qemu_blockalign(bs, cluster_bytes);
 
+    if (!bs->backing_hd) {
+        memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
+        memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
+               cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+    }
+
+    assert(skip_end_sector <= extent->cluster_sectors);
     /* we will be here if it's first write on non-exist grain(cluster).
      * try to read from parent image, if exist */
-    if (bs->backing_hd) {
-        whole_grain =
-            qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
-        if (!vmdk_is_cid_valid(bs)) {
-            ret = VMDK_ERROR;
-            goto exit;
-        }
+    if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+        ret = VMDK_ERROR;
+        goto exit;
+    }
 
-        /* floor offset to cluster */
-        offset -= offset % (extent->cluster_sectors * 512);
-        ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
-                extent->cluster_sectors);
+    /* Read backing data before skip range */
+    if (skip_start_sector > 0) {
+        if (bs->backing_hd) {
+            ret = bdrv_read(bs->backing_hd, sector_num,
+                            whole_grain, skip_start_sector);
+            if (ret < 0) {
+                ret = VMDK_ERROR;
+                goto exit;
+            }
+        }
+        ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+                         skip_start_sector);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
         }
-
-        /* Write grain only into the active image */
-        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
-                extent->cluster_sectors);
+    }
+    /* Read backing data after skip range */
+    if (skip_end_sector < extent->cluster_sectors) {
+        if (bs->backing_hd) {
+            ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+                            whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+                            extent->cluster_sectors - skip_end_sector);
+            if (ret < 0) {
+                ret = VMDK_ERROR;
+                goto exit;
+            }
+        }
+        ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+                         whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+                         extent->cluster_sectors - skip_end_sector);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
         }
     }
+
 exit:
     qemu_vfree(whole_grain);
     return ret;
 }
 
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
+                         uint32_t offset)
 {
-    uint32_t offset;
-    QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
-    offset = cpu_to_le32(m_data->offset);
+    offset = cpu_to_le32(offset);
     /* update L2 table */
     if (bdrv_pwrite_sync(
                 extent->file,
                 ((int64_t)m_data->l2_offset * 512)
-                    + (m_data->l2_index * sizeof(m_data->offset)),
+                    + (m_data->l2_index * sizeof(offset)),
                 &offset, sizeof(offset)) < 0) {
         return VMDK_ERROR;
     }
@@ -1014,7 +1067,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
         if (bdrv_pwrite_sync(
                     extent->file,
                     ((int64_t)m_data->l2_offset * 512)
-                        + (m_data->l2_index * sizeof(m_data->offset)),
+                        + (m_data->l2_index * sizeof(offset)),
                     &offset, sizeof(offset)) < 0) {
             return VMDK_ERROR;
         }
@@ -1026,17 +1079,41 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
     return VMDK_OK;
 }
 
+/**
+ * get_cluster_offset
+ *
+ * Look up cluster offset in extent file by sector number, and store in
+ * @cluster_offset.
+ *
+ * For flat extents, the start offset as parsed from the description file is
+ * returned.
+ *
+ * For sparse extents, look up in L1, L2 table. If allocate is true, return an
+ * offset for a new cluster and update L2 cache. If there is a backing file,
+ * COW is done before returning; otherwise, zeroes are written to the allocated
+ * cluster. Both COW and zero writing skips the sector range
+ * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
+ * has new data to write there.
+ *
+ * Returns: VMDK_OK if cluster exists and mapped in the image.
+ *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
+ *          VMDK_ERROR if failed.
+ */
 static int get_cluster_offset(BlockDriverState *bs,
-                                    VmdkExtent *extent,
-                                    VmdkMetaData *m_data,
-                                    uint64_t offset,
-                                    int allocate,
-                                    uint64_t *cluster_offset)
+                              VmdkExtent *extent,
+                              VmdkMetaData *m_data,
+                              uint64_t offset,
+                              bool allocate,
+                              uint64_t *cluster_offset,
+                              uint64_t skip_start_sector,
+                              uint64_t skip_end_sector)
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
     uint32_t min_count, *l2_table;
     bool zeroed = false;
+    int64_t ret;
+    int32_t cluster_sector;
 
     if (m_data) {
         m_data->valid = 0;
@@ -1090,52 +1167,41 @@ static int get_cluster_offset(BlockDriverState *bs,
     extent->l2_cache_counts[min_index] = 1;
  found:
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
-    *cluster_offset = le32_to_cpu(l2_table[l2_index]);
+    cluster_sector = le32_to_cpu(l2_table[l2_index]);
 
     if (m_data) {
         m_data->valid = 1;
         m_data->l1_index = l1_index;
         m_data->l2_index = l2_index;
-        m_data->offset = *cluster_offset;
         m_data->l2_offset = l2_offset;
         m_data->l2_cache_entry = &l2_table[l2_index];
     }
-    if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+    if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
         zeroed = true;
     }
 
-    if (!*cluster_offset || zeroed) {
+    if (!cluster_sector || zeroed) {
         if (!allocate) {
             return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
         }
 
-        /* Avoid the L2 tables update for the images that have snapshots. */
-        *cluster_offset = bdrv_getlength(extent->file);
-        if (!extent->compressed) {
-            bdrv_truncate(
-                extent->file,
-                *cluster_offset + (extent->cluster_sectors << 9)
-            );
-        }
-
-        *cluster_offset >>= 9;
-        l2_table[l2_index] = cpu_to_le32(*cluster_offset);
+        cluster_sector = extent->next_cluster_sector;
+        extent->next_cluster_sector += extent->cluster_sectors;
 
         /* First of all we write grain itself, to avoid race condition
          * that may to corrupt the image.
          * This problem may occur because of insufficient space on host disk
          * or inappropriate VM shutdown.
          */
-        if (get_whole_cluster(
-                bs, extent, *cluster_offset, offset, allocate) == -1) {
-            return VMDK_ERROR;
-        }
-
-        if (m_data) {
-            m_data->offset = *cluster_offset;
+        ret = get_whole_cluster(bs, extent,
+                                cluster_sector,
+                                offset >> BDRV_SECTOR_BITS,
+                                skip_start_sector, skip_end_sector);
+        if (ret) {
+            return ret;
         }
     }
-    *cluster_offset <<= 9;
+    *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
     return VMDK_OK;
 }
 
@@ -1170,7 +1236,8 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
     }
     qemu_co_mutex_lock(&s->lock);
     ret = get_cluster_offset(bs, extent, NULL,
-                            sector_num * 512, 0, &offset);
+                             sector_num * 512, false, &offset,
+                             0, 0);
     qemu_co_mutex_unlock(&s->lock);
 
     switch (ret) {
@@ -1323,9 +1390,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
         if (!extent) {
             return -EIO;
         }
-        ret = get_cluster_offset(
-                            bs, extent, NULL,
-                            sector_num << 9, 0, &cluster_offset);
+        ret = get_cluster_offset(bs, extent, NULL,
+                                 sector_num << 9, false, &cluster_offset,
+                                 0, 0);
         extent_begin_sector = extent->end_sector - extent->sectors;
         extent_relative_sector_num = sector_num - extent_begin_sector;
         index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
@@ -1406,12 +1473,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         if (!extent) {
             return -EIO;
         }
-        ret = get_cluster_offset(
-                                bs,
-                                extent,
-                                &m_data,
-                                sector_num << 9, !extent->compressed,
-                                &cluster_offset);
+        extent_begin_sector = extent->end_sector - extent->sectors;
+        extent_relative_sector_num = sector_num - extent_begin_sector;
+        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors) {
+            n = nb_sectors;
+        }
+        ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+                                 !(extent->compressed || zeroed),
+                                 &cluster_offset,
+                                 index_in_cluster, index_in_cluster + n);
         if (extent->compressed) {
             if (ret == VMDK_OK) {
                 /* Refuse write to allocated cluster for streamOptimized */
@@ -1420,24 +1492,13 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                 return -EIO;
             } else {
                 /* allocate */
-                ret = get_cluster_offset(
-                                        bs,
-                                        extent,
-                                        &m_data,
-                                        sector_num << 9, 1,
-                                        &cluster_offset);
+                ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+                                         true, &cluster_offset, 0, 0);
             }
         }
         if (ret == VMDK_ERROR) {
             return -EINVAL;
         }
-        extent_begin_sector = extent->end_sector - extent->sectors;
-        extent_relative_sector_num = sector_num - extent_begin_sector;
-        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
-        n = extent->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
         if (zeroed) {
             /* Do zeroed write, buf is ignored */
             if (extent->has_zero_grain &&
@@ -1445,9 +1506,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                     n >= extent->cluster_sectors) {
                 n = extent->cluster_sectors;
                 if (!zero_dry_run) {
-                    m_data.offset = VMDK_GTE_ZEROED;
                     /* update L2 tables */
-                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                    if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
+                            != VMDK_OK) {
                         return -EIO;
                     }
                 }
@@ -1463,7 +1524,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
             }
             if (m_data.valid) {
                 /* update L2 tables */
-                if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                if (vmdk_L2update(extent, &m_data,
+                                  cluster_offset >> BDRV_SECTOR_BITS)
+                        != VMDK_OK) {
                     return -EIO;
                 }
             }
@@ -2001,7 +2064,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
     int64_t sector_num = 0;
-    int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+    int64_t total_sectors = bdrv_nb_sectors(bs);
     int ret;
     uint64_t cluster_offset;
 
@@ -2022,7 +2085,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
         }
         ret = get_cluster_offset(bs, extent, NULL,
                                  sector_num << BDRV_SECTOR_BITS,
-                                 0, &cluster_offset);
+                                 false, &cluster_offset, 0, 0);
         if (ret == VMDK_ERROR) {
             fprintf(stderr,
                     "ERROR: could not get cluster_offset for sector %"
diff --git a/block/vpc.c b/block/vpc.c
index 8b376a40be..055efc42d2 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,13 +29,6 @@
 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
 #endif
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
-#endif
-#endif
 
 /**************************************************************/
 
@@ -276,7 +269,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
             goto fail;
         }
 
-        s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);
+        s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4);
+        if (s->pagetable == NULL) {
+            ret = -ENOMEM;
+            goto fail;
+        }
 
         s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 
@@ -656,39 +653,41 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
     return 0;
 }
 
-static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
+static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+                               int64_t total_sectors)
 {
     VHDDynDiskHeader *dyndisk_header =
         (VHDDynDiskHeader *) buf;
     size_t block_size, num_bat_entries;
     int i;
-    int ret = -EIO;
+    int ret;
+    int64_t offset = 0;
 
     // Write the footer (twice: at the beginning and at the end)
     block_size = 0x200000;
     num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 
-    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+    if (ret) {
         goto fail;
     }
 
-    if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) {
-        goto fail;
-    }
-    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
+    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+    if (ret < 0) {
         goto fail;
     }
 
     // Write the initial BAT
-    if (lseek(fd, 3 * 512, SEEK_SET) < 0) {
-        goto fail;
-    }
+    offset = 3 * 512;
 
     memset(buf, 0xFF, 512);
     for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
-        if (write(fd, buf, 512) != 512) {
+        ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+        if (ret < 0) {
             goto fail;
         }
+        offset += 512;
     }
 
     // Prepare the Dynamic Disk Header
@@ -709,39 +708,35 @@ static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
     dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
 
     // Write the header
-    if (lseek(fd, 512, SEEK_SET) < 0) {
-        goto fail;
-    }
+    offset = 512;
 
-    if (write(fd, buf, 1024) != 1024) {
+    ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+    if (ret < 0) {
         goto fail;
     }
-    ret = 0;
 
  fail:
     return ret;
 }
 
-static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
+static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+                             int64_t total_size)
 {
-    int ret = -EIO;
+    int ret;
 
     /* Add footer to total size */
-    total_size += 512;
-    if (ftruncate(fd, total_size) != 0) {
-        ret = -errno;
-        goto fail;
-    }
-    if (lseek(fd, -512, SEEK_END) < 0) {
-        goto fail;
-    }
-    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
-        goto fail;
+    total_size += HEADER_SIZE;
+
+    ret = bdrv_truncate(bs, total_size);
+    if (ret < 0) {
+        return ret;
     }
 
-    ret = 0;
+    ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+    if (ret < 0) {
+        return ret;
+    }
 
- fail:
     return ret;
 }
 
@@ -750,7 +745,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     uint8_t buf[1024];
     VHDFooter *footer = (VHDFooter *) buf;
     char *disk_type_param;
-    int fd, i;
+    int i;
     uint16_t cyls = 0;
     uint8_t heads = 0;
     uint8_t secs_per_cyl = 0;
@@ -758,7 +753,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     int64_t total_size;
     int disk_type;
     int ret = -EIO;
-    bool nocow = false;
+    Error *local_err = NULL;
+    BlockDriverState *bs = NULL;
 
     /* Read out options */
     total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
@@ -775,28 +771,17 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     } else {
         disk_type = VHD_DYNAMIC;
     }
-    nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
 
-    /* Create the file */
-    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
-    if (fd < 0) {
-        ret = -EIO;
+    ret = bdrv_create_file(filename, opts, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
         goto out;
     }
-
-    if (nocow) {
-#ifdef __linux__
-        /* Set NOCOW flag to solve performance issue on fs like btrfs.
-         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
-         * be ignored since any failure of this operation should not block the
-         * left work.
-         */
-        int attr;
-        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
-            attr |= FS_NOCOW_FL;
-            ioctl(fd, FS_IOC_SETFLAGS, &attr);
-        }
-#endif
+    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
+                    NULL, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        goto out;
     }
 
     /*
@@ -810,7 +795,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
                                &secs_per_cyl))
         {
             ret = -EFBIG;
-            goto fail;
+            goto out;
         }
     }
 
@@ -856,14 +841,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
 
     if (disk_type == VHD_DYNAMIC) {
-        ret = create_dynamic_disk(fd, buf, total_sectors);
+        ret = create_dynamic_disk(bs, buf, total_sectors);
     } else {
-        ret = create_fixed_disk(fd, buf, total_size);
+        ret = create_fixed_disk(bs, buf, total_size);
     }
 
-fail:
-    qemu_close(fd);
 out:
+    bdrv_unref(bs);
     g_free(disk_type_param);
     return ret;
 }
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 8e417f70ae..5030e3274d 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -139,7 +139,10 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
     waiocb->is_read = (type == QEMU_AIO_READ);
 
     if (qiov->niov > 1) {
-        waiocb->buf = qemu_blockalign(bs, qiov->size);
+        waiocb->buf = qemu_try_blockalign(bs, qiov->size);
+        if (waiocb->buf == NULL) {
+            goto out;
+        }
         if (type & QEMU_AIO_WRITE) {
             iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
         }
@@ -168,6 +171,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
 
 out_dec_count:
     aio->count--;
+out:
     qemu_aio_release(waiocb);
     return NULL;
 }