28 files changed, 1054 insertions, 267 deletions
diff --git a/Makefile.objs b/Makefile.objs
index b39d76cbb6..5f0b3f7136 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -17,8 +17,12 @@ coroutine-obj-y += qemu-coroutine-sleep.o
 ifeq ($(CONFIG_UCONTEXT_COROUTINE),y)
 coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o
 else
+ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y)
+coroutine-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o
+else
 coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o
 endif
+endif
 coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 
 #######################################################################
diff --git a/block.c b/block.c
index 52ffe1494a..b88ee90817 100644
--- a/block.c
+++ b/block.c
@@ -1244,13 +1244,17 @@ ro_cleanup:
     return ret;
 }
 
-void bdrv_commit_all(void)
+int bdrv_commit_all(void)
 {
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        bdrv_commit(bs);
+        int ret = bdrv_commit(bs);
+        if (ret < 0) {
+            return ret;
+        }
     }
+    return 0;
 }
 
 struct BdrvTrackedRequest {
diff --git a/block.h b/block.h
index 48d0bf3592..415bb17c73 100644
--- a/block.h
+++ b/block.h
@@ -165,7 +165,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs);
 int bdrv_commit(BlockDriverState *bs);
-void bdrv_commit_all(void);
+int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 340a6f2b26..710d4b1828 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -25,6 +25,7 @@
 #include "block_int.h"
 #include "qemu-common.h"
 #include "qcow2.h"
+#include "trace.h"
 
 typedef struct Qcow2CachedTable {
     void*   table;
@@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         return 0;
     }
 
+    trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+                                  c == s->l2_table_cache, i);
+
     if (c->depends) {
         ret = qcow2_cache_flush_dependency(bs, c);
     } else if (c->depends_on_flush) {
@@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
 {
+    BDRVQcowState *s = bs->opaque;
     int result = 0;
     int ret;
     int i;
 
+    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
     for (i = 0; i < c->size; i++) {
         ret = qcow2_cache_entry_flush(bs, c, i);
         if (ret < 0 && result != -ENOSPC) {
@@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     int i;
     int ret;
 
+    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+                          offset, read_from_disk);
+
     /* Check if the table is already cached */
     for (i = 0; i < c->size; i++) {
         if (c->entries[i].offset == offset) {
@@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
 
     /* If not, write a table back and replace it */
     i = qcow2_cache_find_entry_to_replace(c);
+    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+                                        c == s->l2_table_cache, i);
     if (i < 0) {
         return i;
     }
@@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
         return ret;
     }
 
+    trace_qcow2_cache_get_read(qemu_coroutine_self(),
+                               c == s->l2_table_cache, i);
     c->entries[i].offset = 0;
     if (read_from_disk) {
         if (c == s->l2_table_cache) {
@@ -258,6 +272,10 @@ found:
     c->entries[i].cache_hits++;
     c->entries[i].ref++;
     *table = c->entries[i].table;
+
+    trace_qcow2_cache_get_done(qemu_coroutine_self(),
+                               c == s->l2_table_cache, i);
+
     return 0;
 }
 
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 07a2e936fd..e0fb90792f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -27,6 +27,7 @@
 #include "qemu-common.h"
 #include "block_int.h"
 #include "block/qcow2.h"
+#include "trace.h"
 
 int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
 {
@@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
     old_l2_offset = s->l1_table[l1_index];
 
+    trace_qcow2_l2_allocate(bs, l1_index);
+
     /* allocate a new l2 entry */
 
     l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
@@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
     /* allocate a new entry in the l2 cache */
 
+    trace_qcow2_l2_allocate_get_empty(bs, l1_index);
     ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
     if (ret < 0) {
         return ret;
@@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     /* write the l2 table to the file */
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
+    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
@@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     }
 
     /* update the L1 entry */
+    trace_qcow2_l2_allocate_write_l1(bs, l1_index);
     s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
     ret = write_l1_entry(bs, l1_index);
     if (ret < 0) {
@@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     }
 
     *table = l2_table;
+    trace_qcow2_l2_allocate_done(bs, l1_index, 0);
     return 0;
 
 fail:
+    trace_qcow2_l2_allocate_done(bs, l1_index, ret);
     qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
     s->l1_table[l1_index] = old_l2_offset;
     return ret;
@@ -581,9 +589,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     BDRVQcowState *s = bs->opaque;
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
-    uint64_t cluster_offset = m->cluster_offset;
+    uint64_t cluster_offset = m->alloc_offset;
     bool cow = false;
 
+    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+
     if (m->nb_clusters == 0)
         return 0;
 
@@ -667,64 +677,15 @@ err:
  }
 
 /*
- * alloc_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in qcow2 file.
- * If the offset is not found, allocate a new cluster.
- *
- * If the cluster was already allocated, m->nb_clusters is set to 0,
- * other fields in m are meaningless.
- *
- * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. In this case, the other
- * fields of m are valid and contain information about the first allocated
- * cluster.
- *
- * If the request conflicts with another write request in flight, the coroutine
- * is queued and will be reentered when the dependency has completed.
- *
- * Return 0 on success and -errno in error cases
+ * Returns the number of contiguous clusters that can be used for an allocating
+ * write, but require COW to be performed (this includes yet unallocated space,
+ * which must copy from the backing file)
  */
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, QCowL2Meta *m)
+static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+    uint64_t *l2_table, int l2_index)
 {
-    BDRVQcowState *s = bs->opaque;
-    int l2_index, ret;
-    uint64_t l2_offset, *l2_table;
-    int64_t cluster_offset;
-    unsigned int nb_clusters, i = 0;
-    QCowL2Meta *old_alloc;
-
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
-    if (ret < 0) {
-        return ret;
-    }
-
-again:
-    nb_clusters = size_to_clusters(s, n_end << 9);
-
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
-    /* We keep all QCOW_OFLAG_COPIED clusters */
-
-    if (cluster_offset & QCOW_OFLAG_COPIED) {
-        nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], 0, 0);
-
-        cluster_offset &= ~QCOW_OFLAG_COPIED;
-        m->nb_clusters = 0;
-
-        goto out;
-    }
-
-    /* for the moment, multiple compressed clusters are not managed */
-
-    if (cluster_offset & QCOW_OFLAG_COMPRESSED)
-        nb_clusters = 1;
-
-    /* how many available clusters ? */
+    int i = 0;
+    uint64_t cluster_offset;
 
     while (i < nb_clusters) {
         i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
@@ -745,8 +706,39 @@ again:
                 (cluster_offset & QCOW_OFLAG_COMPRESSED))
             break;
     }
+
     assert(i <= nb_clusters);
-    nb_clusters = i;
+    return i;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+    uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t cluster_offset;
+    QCowL2Meta *old_alloc;
+
+    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+                                         *host_offset, *nb_clusters);
 
     /*
      * Check if there already is an AIO write request in flight which allocates
@@ -755,8 +747,8 @@ again:
      */
     QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
 
-        uint64_t start = offset >> s->cluster_bits;
-        uint64_t end = start + nb_clusters;
+        uint64_t start = guest_offset >> s->cluster_bits;
+        uint64_t end = start + *nb_clusters;
         uint64_t old_start = old_alloc->offset >> s->cluster_bits;
         uint64_t old_end = old_start + old_alloc->nb_clusters;
 
@@ -765,58 +757,185 @@ again:
         } else {
             if (start < old_start) {
                 /* Stop at the start of a running allocation */
-                nb_clusters = old_start - start;
+                *nb_clusters = old_start - start;
             } else {
-                nb_clusters = 0;
+                *nb_clusters = 0;
             }
 
-            if (nb_clusters == 0) {
+            if (*nb_clusters == 0) {
                 /* Wait for the dependency to complete. We need to recheck
                  * the free/allocated clusters when we continue. */
                 qemu_co_mutex_unlock(&s->lock);
                 qemu_co_queue_wait(&old_alloc->dependent_requests);
                 qemu_co_mutex_lock(&s->lock);
-                goto again;
+                return -EAGAIN;
             }
         }
     }
 
-    if (!nb_clusters) {
+    if (!*nb_clusters) {
         abort();
     }
 
-    /* save info needed for meta data update */
-    m->offset = offset;
-    m->n_start = n_start;
-    m->nb_clusters = nb_clusters;
+    /* Allocate new clusters */
+    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+    if (*host_offset == 0) {
+        cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+    } else {
+        cluster_offset = *host_offset;
+        *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters);
+    }
+
+    if (cluster_offset < 0) {
+        return cluster_offset;
+    }
+    *host_offset = cluster_offset;
+    return 0;
+}
 
-    QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+/*
+ * alloc_cluster_offset
+ *
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
+ *
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
+ * other fields in m are meaningless.
+ *
+ * If the cluster is newly allocated, m->nb_clusters is set to the number of
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
+ *
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
+ *
+ * Return 0 on success and -errno in error cases
+ */
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+    int n_start, int n_end, int *num, QCowL2Meta *m)
+{
+    BDRVQcowState *s = bs->opaque;
+    int l2_index, ret, sectors;
+    uint64_t l2_offset, *l2_table;
+    unsigned int nb_clusters, keep_clusters;
+    uint64_t cluster_offset;
 
-    /* allocate a new cluster */
+    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+                                      n_start, n_end);
 
-    cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
-    if (cluster_offset < 0) {
-        ret = cluster_offset;
-        goto fail;
+    /* Find L2 entry for the first involved cluster */
+    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    if (ret < 0) {
+        return ret;
     }
 
-out:
+    /*
+     * Calculate the number of clusters to look for. We stop at L2 table
+     * boundaries to keep things simple.
+     */
+again:
+    nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS),
+                      s->l2_size - l2_index);
+
+    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+    /*
+     * Check how many clusters are already allocated and don't need COW, and how
+     * many need a new allocation.
+     */
+    if (cluster_offset & QCOW_OFLAG_COPIED) {
+        /* We keep all QCOW_OFLAG_COPIED clusters */
+        keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
+                                                  &l2_table[l2_index], 0, 0);
+        assert(keep_clusters <= nb_clusters);
+        nb_clusters -= keep_clusters;
+    } else {
+        /* For the moment, overwrite compressed clusters one by one */
+        if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+            nb_clusters = 1;
+        } else {
+            nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+        }
+
+        keep_clusters = 0;
+        cluster_offset = 0;
+    }
+
+    cluster_offset &= ~QCOW_OFLAG_COPIED;
+
+    /* If there is something left to allocate, do that now */
+    *m = (QCowL2Meta) {
+        .cluster_offset     = cluster_offset,
+        .nb_clusters        = 0,
+    };
+    qemu_co_queue_init(&m->dependent_requests);
+
+    if (nb_clusters > 0) {
+        uint64_t alloc_offset;
+        uint64_t alloc_cluster_offset;
+        uint64_t keep_bytes = keep_clusters * s->cluster_size;
+
+        /* Calculate start and size of allocation */
+        alloc_offset = offset + keep_bytes;
+
+        if (keep_clusters == 0) {
+            alloc_cluster_offset = 0;
+        } else {
+            alloc_cluster_offset = cluster_offset + keep_bytes;
+        }
+
+        /* Allocate, if necessary at a given offset in the image file */
+        ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset,
+                                      &nb_clusters, l2_table);
+        if (ret == -EAGAIN) {
+            goto again;
+        } else if (ret < 0) {
+            goto fail;
+        }
+
+        /* save info needed for meta data update */
+        if (nb_clusters > 0) {
+            int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
+            int avail_sectors = (keep_clusters + nb_clusters)
+                                << (s->cluster_bits - BDRV_SECTOR_BITS);
+
+            *m = (QCowL2Meta) {
+                .cluster_offset = keep_clusters == 0 ?
+                                  alloc_cluster_offset : cluster_offset,
+                .alloc_offset   = alloc_cluster_offset,
+                .offset         = alloc_offset,
+                .n_start        = keep_clusters == 0 ? n_start : 0,
+                .nb_clusters    = nb_clusters,
+                .nb_available   = MIN(requested_sectors, avail_sectors),
+            };
+            qemu_co_queue_init(&m->dependent_requests);
+            QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+        }
+    }
+
+    /* Some cleanup work */
     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
         goto fail_put;
     }
 
-    m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
-    m->cluster_offset = cluster_offset;
+    sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
+    if (sectors > n_end) {
+        sectors = n_end;
+    }
 
-    *num = m->nb_available - n_start;
+    assert(sectors > n_start);
+    *num = sectors - n_start;
 
     return 0;
 
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
 fail_put:
-    QLIST_REMOVE(m, next_in_flight);
+    if (nb_clusters > 0) {
+        QLIST_REMOVE(m, next_in_flight);
+    }
     return ret;
 }
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2db2ede3d1..f39928a6bf 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -582,6 +582,34 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
     return offset;
 }
 
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+    int nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t cluster_index;
+    int i, refcount, ret;
+
+    /* Check how many clusters there are free */
+    cluster_index = offset >> s->cluster_bits;
+    for(i = 0; i < nb_clusters; i++) {
+        refcount = get_refcount(bs, cluster_index++);
+
+        if (refcount < 0) {
+            return refcount;
+        } else if (refcount != 0) {
+            break;
+        }
+    }
+
+    /* And then allocate them */
+    ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return i;
+}
+
 /* only used to allocate compressed sectors. We try to allocate
    contiguous sectors. size must be <= cluster_size */
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
diff --git a/block/qcow2.c b/block/qcow2.c
index eb5ea485d9..7aece65406 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -29,6 +29,7 @@
 #include "block/qcow2.h"
 #include "qemu-error.h"
 #include "qerror.h"
+#include "trace.h"
 
 /*
   Differences with QCOW:
@@ -569,6 +570,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         .nb_clusters = 0,
     };
 
+    trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
+                                 remaining_sectors);
+
     qemu_co_queue_init(&l2meta.dependent_requests);
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
@@ -579,6 +583,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
     while (remaining_sectors != 0) {
 
+        trace_qcow2_writev_start_part(qemu_coroutine_self());
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
         n_end = index_in_cluster + remaining_sectors;
         if (s->crypt_method &&
@@ -619,6 +624,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         qemu_co_mutex_unlock(&s->lock);
+        trace_qcow2_writev_data(qemu_coroutine_self(),
+                                (cluster_offset >> 9) + index_in_cluster);
         ret = bdrv_co_writev(bs->file,
                              (cluster_offset >> 9) + index_in_cluster,
                              cur_nr_sectors, &hd_qiov);
@@ -637,6 +644,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         remaining_sectors -= cur_nr_sectors;
         sector_num += cur_nr_sectors;
         bytes_done += cur_nr_sectors * 512;
+        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
     }
     ret = 0;
 
@@ -647,6 +655,7 @@ fail:
 
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
+    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
 
     return ret;
 }
@@ -1111,16 +1120,19 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
     int ret, new_l1_size;
 
     if (offset & 511) {
+        error_report("The new size must be a multiple of 512");
         return -EINVAL;
     }
 
     /* cannot proceed if image has snapshots */
     if (s->nb_snapshots) {
+        error_report("Can't resize an image which has snapshots");
         return -ENOTSUP;
     }
 
     /* shrinking is currently not supported */
     if (offset < bs->total_sectors * 512) {
+        error_report("qcow2 doesn't support shrinking images yet");
         return -ENOTSUP;
     }
 
diff --git a/block/qcow2.h b/block/qcow2.h
index fc35838175..e4ac366cfc 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -155,6 +155,7 @@ typedef struct QCowL2Meta
 {
     uint64_t offset;
     uint64_t cluster_offset;
+    uint64_t alloc_offset;
     int n_start;
     int nb_available;
     int nb_clusters;
@@ -193,6 +194,8 @@ int qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+    int nb_clusters);
 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
 void qcow2_free_clusters(BlockDriverState *bs,
     int64_t offset, int64_t size);
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index 02b81a2e33..e9b2aae44d 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -161,11 +161,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
         return;
     }
 
+    /* Evict an unused cache entry so we have space.  If all entries are in use
+     * we can grow the cache temporarily and we try to shrink back down later.
+     */
     if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
-        entry = QTAILQ_FIRST(&l2_cache->entries);
-        QTAILQ_REMOVE(&l2_cache->entries, entry, node);
-        l2_cache->n_entries--;
-        qed_unref_l2_cache_entry(entry);
+        CachedL2Table *next;
+        QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
+            if (entry->ref > 1) {
+                continue;
+            }
+
+            QTAILQ_REMOVE(&l2_cache->entries, entry, node);
+            l2_cache->n_entries--;
+            qed_unref_l2_cache_entry(entry);
+
+            /* Stop evicting when we've shrunk back to max size */
+            if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
+                break;
+            }
+        }
     }
 
     l2_cache->n_entries++;
diff --git a/blockdev.c b/blockdev.c
index d78aa51af5..1a500b830d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -627,12 +627,15 @@ void do_commit(Monitor *mon, const QDict *qdict)
 {
     const char *device = qdict_get_str(qdict, "device");
     BlockDriverState *bs;
+    int ret;
 
     if (!strcmp(device, "all")) {
-        bdrv_commit_all();
+        ret = bdrv_commit_all();
+        if (ret == -EBUSY) {
+            qerror_report(QERR_DEVICE_IN_USE, device);
+            return;
+        }
     } else {
-        int ret;
-
         bs = bdrv_find(device);
         if (!bs) {
             qerror_report(QERR_DEVICE_NOT_FOUND, device);
@@ -646,101 +649,55 @@ void do_commit(Monitor *mon, const QDict *qdict)
     }
 }
 
+static void blockdev_do_action(int kind, void *data, Error **errp)
+{
+    BlockdevAction action;
+    BlockdevActionList list;
+
+    action.kind = kind;
+    action.data = data;
+    list.value = &action;
+    list.next = NULL;
+    qmp_transaction(&list, errp);
+}
+
 void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
                                 bool has_format, const char *format,
+                                bool has_mode, enum NewImageMode mode,
                                 Error **errp)
 {
-    BlockDriverState *bs;
-    BlockDriver *drv, *old_drv, *proto_drv;
-    int ret = 0;
-    int flags;
-    char old_filename[1024];
-
-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
-        return;
-    }
-    if (bdrv_in_use(bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, device);
-        return;
-    }
-
-    pstrcpy(old_filename, sizeof(old_filename), bs->filename);
-
-    old_drv = bs->drv;
-    flags = bs->open_flags;
-
-    if (!has_format) {
-        format = "qcow2";
-    }
-
-    drv = bdrv_find_format(format);
-    if (!drv) {
-        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-        return;
-    }
-
-    proto_drv = bdrv_find_protocol(snapshot_file);
-    if (!proto_drv) {
-        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-        return;
-    }
-
-    ret = bdrv_img_create(snapshot_file, format, bs->filename,
-                          bs->drv->format_name, NULL, -1, flags);
-    if (ret) {
-        error_set(errp, QERR_UNDEFINED_ERROR);
-        return;
-    }
-
-    bdrv_drain_all();
-    bdrv_flush(bs);
-
-    bdrv_close(bs);
-    ret = bdrv_open(bs, snapshot_file, flags, drv);
-    /*
-     * If reopening the image file we just created fails, fall back
-     * and try to re-open the original image. If that fails too, we
-     * are in serious trouble.
-     */
-    if (ret != 0) {
-        ret = bdrv_open(bs, old_filename, flags, old_drv);
-        if (ret != 0) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
-        } else {
-            error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
-        }
-    }
+    BlockdevSnapshot snapshot = {
+        .device = (char *) device,
+        .snapshot_file = (char *) snapshot_file,
+        .has_format = has_format,
+        .format = (char *) format,
+        .has_mode = has_mode,
+        .mode = mode,
+    };
+    blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot,
+                       errp);
 }
 
 
 /* New and old BlockDriverState structs for group snapshots */
-typedef struct BlkGroupSnapshotStates {
+typedef struct BlkTransactionStates {
     BlockDriverState *old_bs;
     BlockDriverState *new_bs;
-    QSIMPLEQ_ENTRY(BlkGroupSnapshotStates) entry;
-} BlkGroupSnapshotStates;
+    QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
+} BlkTransactionStates;
 
 /*
  * 'Atomic' group snapshots.  The snapshots are taken as a set, and if any fail
  *  then we do not pivot any of the devices in the group, and abandon the
  *  snapshots
  */
-void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
-                                      Error **errp)
+void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
 {
     int ret = 0;
-    SnapshotDevList *dev_entry = dev_list;
-    SnapshotDev *dev_info = NULL;
-    BlkGroupSnapshotStates *states;
-    BlockDriver *proto_drv;
-    BlockDriver *drv;
-    int flags;
-    const char *format;
-    const char *snapshot_file;
-
-    QSIMPLEQ_HEAD(snap_bdrv_states, BlkGroupSnapshotStates) snap_bdrv_states;
+    BlockdevActionList *dev_entry = dev_list;
+    BlkTransactionStates *states, *next;
+
+    QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
     QSIMPLEQ_INIT(&snap_bdrv_states);
 
     /* drain all i/o before any snapshots */
@@ -748,21 +705,51 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
 
     /* We don't do anything in this loop that commits us to the snapshot */
     while (NULL != dev_entry) {
+        BlockdevAction *dev_info = NULL;
+        BlockDriver *proto_drv;
+        BlockDriver *drv;
+        int flags;
+        enum NewImageMode mode;
+        const char *new_image_file;
+        const char *device;
+        const char *format = "qcow2";
+
         dev_info = dev_entry->value;
         dev_entry = dev_entry->next;
 
-        states = g_malloc0(sizeof(BlkGroupSnapshotStates));
+        states = g_malloc0(sizeof(BlkTransactionStates));
         QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
 
-        states->old_bs = bdrv_find(dev_info->device);
+        switch (dev_info->kind) {
+        case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+            device = dev_info->blockdev_snapshot_sync->device;
+            if (!dev_info->blockdev_snapshot_sync->has_mode) {
+                dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+            }
+            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
+            if (dev_info->blockdev_snapshot_sync->has_format) {
+                format = dev_info->blockdev_snapshot_sync->format;
+            }
+            mode = dev_info->blockdev_snapshot_sync->mode;
+            break;
+        default:
+            abort();
+        }
+
+        drv = bdrv_find_format(format);
+        if (!drv) {
+            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+            goto delete_and_fail;
+        }
 
+        states->old_bs = bdrv_find(device);
         if (!states->old_bs) {
-            error_set(errp, QERR_DEVICE_NOT_FOUND, dev_info->device);
+            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
             goto delete_and_fail;
         }
 
         if (bdrv_in_use(states->old_bs)) {
-            error_set(errp, QERR_DEVICE_IN_USE, dev_info->device);
+            error_set(errp, QERR_DEVICE_IN_USE, device);
             goto delete_and_fail;
         }
 
@@ -775,43 +762,32 @@ void qmp_blockdev_group_snapshot_sync(SnapshotDevList *dev_list,
             }
         }
 
-        snapshot_file = dev_info->snapshot_file;
-
         flags = states->old_bs->open_flags;
 
-        if (!dev_info->has_format) {
-            format = "qcow2";
-        } else {
-            format = dev_info->format;
-        }
-
-        drv = bdrv_find_format(format);
-        if (!drv) {
-            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-            goto delete_and_fail;
-        }
-
-        proto_drv = bdrv_find_protocol(snapshot_file);
+        proto_drv = bdrv_find_protocol(new_image_file);
         if (!proto_drv) {
             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
             goto delete_and_fail;
         }
 
         /* create new image w/backing file */
-        ret = bdrv_img_create(snapshot_file, format,
-                              states->old_bs->filename,
-                              drv->format_name, NULL, -1, flags);
-        if (ret) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
-            goto delete_and_fail;
+        if (mode != NEW_IMAGE_MODE_EXISTING) {
+            ret = bdrv_img_create(new_image_file, format,
+                                  states->old_bs->filename,
+                                  states->old_bs->drv->format_name,
+                                  NULL, -1, flags);
+            if (ret) {
+                error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+                goto delete_and_fail;
+            }
         }
 
         /* We will manually add the backing_hd field to the bs later */
         states->new_bs = bdrv_new("");
-        ret = bdrv_open(states->new_bs, snapshot_file,
+        ret = bdrv_open(states->new_bs, new_image_file,
                         flags | BDRV_O_NO_BACKING, drv);
         if (ret != 0) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
             goto delete_and_fail;
         }
     }
@@ -838,7 +814,7 @@ delete_and_fail:
         }
     }
 exit:
-    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
+    QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
         g_free(states);
     }
     return;
diff --git a/configure b/configure
index 39d2b54519..ddb3e398fa 100755
--- a/configure
+++ b/configure
@@ -194,6 +194,7 @@ opengl=""
 zlib="yes"
 guest_agent="yes"
 libiscsi=""
+coroutine=""
 
 # parse CC options first
 for opt do
@@ -784,6 +785,8 @@ for opt do
   ;;
   --with-pkgversion=*) pkgversion=" ($optarg)"
   ;;
+  --with-coroutine=*) coroutine="$optarg"
+  ;;
   --disable-docs) docs="no"
   ;;
   --enable-docs) docs="yes"
@@ -1110,6 +1113,8 @@ echo "  --disable-usb-redir      disable usb network redirection support"
 echo "  --enable-usb-redir       enable usb network redirection support"
 echo "  --disable-guest-agent    disable building of the QEMU Guest Agent"
 echo "  --enable-guest-agent     enable building of the QEMU Guest Agent"
+echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
+echo "                           gthread, ucontext, sigaltstack, windows"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -2715,21 +2720,38 @@ EOF
 fi
 
 ##########################################
-# check if we have makecontext
-# (and that it's not a glibc stub which always returns -1)
+# check and set a backend for coroutine
 
-ucontext_coroutine=no
-if test "$darwin" != "yes"; then
-  cat > $TMPC << EOF
+# default is ucontext, but always fallback to gthread
+# windows autodetected by make
+if test "$coroutine" = "" -o "$coroutine" = "ucontext"; then
+  if test "$darwin" != "yes"; then
+    cat > $TMPC << EOF
 #include <ucontext.h>
 #ifdef __stub_makecontext
 #error Ignoring glibc stub makecontext which will always fail
 #endif
 int main(void) { makecontext(0, 0, 0); return 0; }
 EOF
-  if compile_prog "" "" ; then
-      ucontext_coroutine=yes
+    if compile_prog "" "" ; then
+        coroutine_backend=ucontext
+    else
+	coroutine_backend=gthread
+    fi
+  else
+    echo "Silently falling back into gthread backend under darwin"
   fi
+elif test "$coroutine" = "gthread" ; then
+  coroutine_backend=gthread
+elif test "$coroutine" = "windows" ; then
+  coroutine_backend=windows
+elif test "$coroutine" = "sigaltstack" ; then
+  coroutine_backend=sigaltstack
+else
+  echo
+  echo "Error: unknown coroutine backend $coroutine"
+  echo
+  exit 1
 fi
 
 ##########################################
@@ -2931,6 +2953,7 @@ echo "usb net redir     $usb_redir"
 echo "OpenGL support    $opengl"
 echo "libiscsi support  $libiscsi"
 echo "build guest agent $guest_agent"
+echo "coroutine backend $coroutine_backend"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3246,8 +3269,10 @@ if test "$rbd" = "yes" ; then
   echo "CONFIG_RBD=y" >> $config_host_mak
 fi
 
-if test "$ucontext_coroutine" = "yes" ; then
+if test "$coroutine_backend" = "ucontext" ; then
   echo "CONFIG_UCONTEXT_COROUTINE=y" >> $config_host_mak
+elif test "$coroutine_backend" = "sigaltstack" ; then
+  echo "CONFIG_SIGALTSTACK_COROUTINE=y" >> $config_host_mak
 fi
 
 if test "$open_by_handle_at" = "yes" ; then
diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
new file mode 100644
index 0000000000..7ff2d3379e
--- /dev/null
+++ b/coroutine-sigaltstack.c
@@ -0,0 +1,334 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu-coroutine-int.h"
+
+enum {
+    /* Maximum free pool size prevents holding too many freed coroutines */
+    POOL_MAX_SIZE = 64,
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    jmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+
+    /** Information for the signal handler (trampoline) */
+    jmp_buf tr_reenter;
+    volatile sig_atomic_t tr_called;
+    void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((destructor)) coroutine_cleanup(void)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
+        g_free(DO_UPCAST(CoroutineUContext, base, co)->stack);
+        g_free(co);
+    }
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+    /* Initialize longjmp environment and switch back the caller */
+    if (!setjmp(self->env)) {
+        longjmp(*(jmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+    CoroutineUContext *self;
+    Coroutine *co;
+    CoroutineThreadState *coTS;
+
+    /* Get the thread specific information */
+    coTS = coroutine_get_thread_state();
+    self = coTS->tr_handler;
+    coTS->tr_called = 1;
+    co = &self->base;
+
+    /*
+     * Here we have to do a bit of a ping pong between the caller, given that
+     * this is a signal handler and we have to do a return "soon". Then the
+     * caller can reestablish everything and do a longjmp here again.
+     */
+    if (!setjmp(coTS->tr_reenter)) {
+        return;
+    }
+
+    /*
+     * Ok, the caller has longjmp'ed back to us, so now prepare
+     * us for the real machine state switching. We have to jump
+     * into another function here to get a new stack context for
+     * the auto variables (which have to be auto-variables
+     * because the start of the thread happens later). Else with
+     * PIC (i.e. Position Independent Code which is used when PTH
+     * is built as a shared library) most platforms would
+     * horrible core dump as experience showed.
+     */
+    coroutine_bootstrap(self, co);
+}
+
+static Coroutine *coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    CoroutineThreadState *coTS;
+    struct sigaction sa;
+    struct sigaction osa;
+    struct sigaltstack ss;
+    struct sigaltstack oss;
+    sigset_t sigs;
+    sigset_t osigs;
+    jmp_buf old_env;
+
+    /* The way to manipulate stack is with the sigaltstack function. We
+     * prepare a stack, with it delivering a signal to ourselves and then
+     * put setjmp/longjmp where needed.
+     * This has been done keeping coroutine-ucontext as a model and with the
+     * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+     * of the coroutines and see pth_mctx.c (from the pth project) for the
+     * sigaltstack way of manipulating stacks.
+     */
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    coTS = coroutine_get_thread_state();
+    coTS->tr_handler = co;
+
+    /*
+     * Preserve the SIGUSR2 signal state, block SIGUSR2,
+     * and establish our signal handler. The signal will
+     * later transfer control onto the signal stack.
+     */
+    sigemptyset(&sigs);
+    sigaddset(&sigs, SIGUSR2);
+    pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+    sa.sa_handler = coroutine_trampoline;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK;
+    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+        abort();
+    }
+
+    /*
+     * Set the new stack.
+     */
+    ss.ss_sp = co->stack;
+    ss.ss_size = stack_size;
+    ss.ss_flags = 0;
+    if (sigaltstack(&ss, &oss) < 0) {
+        abort();
+    }
+
+    /*
+     * Now transfer control onto the signal stack and set it up.
+     * It will return immediately via "return" after the setjmp()
+     * was performed. Be careful here with race conditions.  The
+     * signal can be delivered the first time sigsuspend() is
+     * called.
+     */
+    coTS->tr_called = 0;
+    kill(getpid(), SIGUSR2);
+    sigfillset(&sigs);
+    sigdelset(&sigs, SIGUSR2);
+    while (!coTS->tr_called) {
+        sigsuspend(&sigs);
+    }
+
+    /*
+     * Inform the system that we are back off the signal stack by
+     * removing the alternative signal stack. Be careful here: It
+     * first has to be disabled, before it can be removed.
+     */
+    sigaltstack(NULL, &ss);
+    ss.ss_flags = SS_DISABLE;
+    if (sigaltstack(&ss, NULL) < 0) {
+        abort();
+    }
+    sigaltstack(NULL, &ss);
+    if (!(oss.ss_flags & SS_DISABLE)) {
+        sigaltstack(&oss, NULL);
+    }
+
+    /*
+     * Restore the old SIGUSR2 signal handler and mask
+     */
+    sigaction(SIGUSR2, &osa, NULL);
+    pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+    /*
+     * Now enter the trampoline again, but this time not as a signal
+     * handler. Instead we jump into it directly. The functionally
+     * redundant ping-pong pointer arithmentic is neccessary to avoid
+     * type-conversion warnings related to the `volatile' qualifier and
+     * the fact that `jmp_buf' usually is an array type.
+     */
+    if (!setjmp(old_env)) {
+        longjmp(coTS->tr_reenter, 1);
+    }
+
+    /*
+     * Ok, we returned again, so now we're finished
+     */
+
+    return &co->base;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    Coroutine *co;
+
+    co = QSLIST_FIRST(&pool);
+    if (co) {
+        QSLIST_REMOVE_HEAD(&pool, pool_next);
+        pool_size--;
+    } else {
+        co = coroutine_new();
+    }
+    return co;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+    if (pool_size < POOL_MAX_SIZE) {
+        QSLIST_INSERT_HEAD(&pool, &co->base, pool_next);
+        co->base.caller = NULL;
+        pool_size++;
+        return;
+    }
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = setjmp(from->env);
+    if (ret == 0) {
+        longjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
+
diff --git a/hmp-commands.hx b/hmp-commands.hx
index ed88877230..6980214a1a 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -882,14 +882,17 @@ ETEXI
 
     {
         .name       = "snapshot_blkdev",
-        .args_type  = "device:B,snapshot-file:s?,format:s?",
-        .params     = "device [new-image-file] [format]",
+        .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
+        .params     = "[-n] device [new-image-file] [format]",
         .help       = "initiates a live snapshot\n\t\t\t"
                       "of device. If a new image file is specified, the\n\t\t\t"
                       "new image file will become the new root image.\n\t\t\t"
                       "If format is specified, the snapshot file will\n\t\t\t"
                       "be created in that format. Otherwise the\n\t\t\t"
-                      "snapshot will be internal! (currently unsupported)",
+                      "snapshot will be internal! (currently unsupported).\n\t\t\t"
+                      "The default format is qcow2.  The -n flag requests QEMU\n\t\t\t"
+                      "to reuse the image found in new-image-file, instead of\n\t\t\t"
+                      "recreating it from scratch.",
         .mhandler.cmd = hmp_snapshot_blkdev,
     },
 
diff --git a/hmp.c b/hmp.c
index 3a54455574..290c43d03a 100644
--- a/hmp.c
+++ b/hmp.c
@@ -692,6 +692,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
     const char *device = qdict_get_str(qdict, "device");
     const char *filename = qdict_get_try_str(qdict, "snapshot-file");
     const char *format = qdict_get_try_str(qdict, "format");
+    int reuse = qdict_get_try_bool(qdict, "reuse", 0);
+    enum NewImageMode mode;
     Error *errp = NULL;
 
     if (!filename) {
@@ -702,7 +704,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    qmp_blockdev_snapshot_sync(device, filename, !!format, format, &errp);
+    mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    qmp_blockdev_snapshot_sync(device, filename, !!format, format,
+                               true, mode, &errp);
     hmp_handle_error(mon, &errp);
 }
 
diff --git a/qapi-schema-test.json b/qapi-schema-test.json
index 2b38919001..8c7f9f79f4 100644
--- a/qapi-schema-test.json
+++ b/qapi-schema-test.json
@@ -22,6 +22,16 @@
                        'dict2': { 'userdef1': 'UserDefOne', 'string2': 'str' },
                        '*dict3': { 'userdef2': 'UserDefOne', 'string3': 'str' } } } }
 
+# for testing unions
+{ 'type': 'UserDefA',
+  'data': { 'boolean': 'bool' } }
+
+{ 'type': 'UserDefB',
+  'data': { 'integer': 'int' } }
+
+{ 'union': 'UserDefUnion',
+  'data': { 'a' : 'UserDefA', 'b' : 'UserDefB' } }
+
 # testing commands
 { 'command': 'user_def_cmd', 'data': {} }
 { 'command': 'user_def_cmd1', 'data': {'ud1a': 'UserDefOne'} }
diff --git a/qapi-schema.json b/qapi-schema.json
index dd9e0e5434..04fa84fbde 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1120,27 +1120,58 @@
 { 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }}
 
 ##
-# @SnapshotDev
+# @NewImageMode
+#
+# An enumeration that tells QEMU how to set the backing file path in
+# a new image file.
+#
+# @existing: QEMU should look for an existing image file.
+#
+# @absolute-paths: QEMU should create a new image with absolute paths
+# for the backing file.
+#
+# Since: 1.1
+##
+{ 'enum': 'NewImageMode'
+  'data': [ 'existing', 'absolute-paths' ] }
+
+##
+# @BlockdevSnapshot
 #
 # @device:  the name of the device to generate the snapshot from.
 #
 # @snapshot-file: the target of the new image. A new file will be created.
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
+##
+{ 'type': 'BlockdevSnapshot',
+  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
+            '*mode': 'NewImageMode' } }
+
+##
+# @BlockdevAction
+#
+# A discriminated record of operations that can be performed with
+# @transaction.
 ##
-{ 'type': 'SnapshotDev',
-  'data': {'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+{ 'union': 'BlockdevAction',
+  'data': {
+       'blockdev-snapshot-sync': 'BlockdevSnapshot',
+   } }
 
 ##
-# @blockdev-group-snapshot-sync
+# @transaction
 #
-# Generates a synchronous snapshot of a group of one or more block devices,
-# as atomically as possible.  If the snapshot of any device in the group
-# fails, then the entire group snapshot will be abandoned and the
-# appropriate error returned.
+# Atomically operate on a group of one or more block devices.  If
+# any operation fails, then the entire set of actions will be
+# abandoned and the appropriate error returned.  The only operation
+# supported is currently blockdev-snapshot-sync.
 #
 #  List of:
-#  @SnapshotDev: information needed for the device snapshot
+#  @BlockdevAction: information needed for the device snapshot
 #
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
@@ -1149,13 +1180,14 @@
 #          If @snapshot-file can't be opened, OpenFileFailed
 #          If @format is invalid, InvalidBlockFormat
 #
-# Note: The group snapshot attempt returns failure on the first snapshot
-# device failure.  Therefore, there will be only one device or snapshot file
-# returned in an error condition, and subsequent devices will not have been
-# attempted.
+# Note: The transaction aborts on the first failure.  Therefore, there will
+# be only one device or snapshot file returned in an error condition, and
+# subsequent actions will not have been attempted.
+#
+# Since 1.1
 ##
-{ 'command': 'blockdev-group-snapshot-sync',
-  'data': { 'devlist': [ 'SnapshotDev' ] } }
+{ 'command': 'transaction',
+  'data': { 'actions': [ 'BlockdevAction' ] } }
 
 ##
 # @blockdev-snapshot-sync
@@ -1170,21 +1202,19 @@
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
+# @mode: #optional whether and how QEMU should create a new image, default is
+# 'absolute-paths'.
+#
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
 #          If @snapshot-file can't be opened, OpenFileFailed
 #          If @format is invalid, InvalidBlockFormat
 #
-# Notes: One of the last steps taken by this command is to close the current
-#        image being used by @device and open the @snapshot-file one. If that
-#        fails, the command will try to reopen the original image file. If
-#        that also fails OpenFileFailed will be returned and the guest may get
-#        unexpected errors.
-#
 # Since 0.14.0
 ##
 { 'command': 'blockdev-snapshot-sync',
-  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
+            '*mode': 'NewImageMode'} }
 
 ##
 # @human-monitor-command:
diff --git a/qemu-img.c b/qemu-img.c
index 8df35648e9..0e48b35296 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1614,7 +1614,7 @@ static int img_resize(int argc, char **argv)
         printf("Image resized.\n");
         break;
     case -ENOTSUP:
-        error_report("This image format does not support resize");
+        error_report("This image does not support resize");
         break;
     case -EACCES:
         error_report("Image is read-only");
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 0c9bfac20d..dfe8a5b40b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -687,41 +687,55 @@ EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
     },
     {
-        .name       = "blockdev-group-snapshot-sync",
-        .args_type  = "devlist:O",
-        .params  = "device:B,snapshot-file:s,format:s?",
-        .mhandler.cmd_new = qmp_marshal_input_blockdev_group_snapshot_sync,
+        .name       = "transaction",
+        .args_type  = "actions:O",
+        .mhandler.cmd_new = qmp_marshal_input_transaction,
     },
 
 SQMP
-blockdev-group-snapshot-sync
-----------------------
+transaction
+-----------
 
-Synchronous snapshot of one or more block devices.  A list array input
-is accepted, that contains the device and snapshot file information for
-each device in group. The default format, if not specified, is qcow2.
+Atomically operate on one or more block devices.  The only supported
+operation for now is snapshotting.  If there is any failure performing
+any of the operations, all snapshots for the group are abandoned, and
+the original disks pre-snapshot attempt are used.
 
-If there is any failure creating or opening a new snapshot, all snapshots
-for the group are abandoned, and the original disks pre-snapshot attempt
-are used.
+A list of dictionaries is accepted, that contains the actions to be performed.
+For snapshots this is the device, the file to use for the new snapshot,
+and the format.  The default format, if not specified, is qcow2.
 
+Each new snapshot defaults to being created by QEMU (wiping any
+contents if the file already exists), but it is also possible to reuse
+an externally-created file.  In the latter case, you should ensure that
+the new image file has the same contents as the current one; QEMU cannot
+perform any meaningful check.  Typically this is achieved by using the
+current image file as the backing file for the new image.
 
 Arguments:
 
-devlist array:
-    - "device": device name to snapshot (json-string)
-    - "snapshot-file": name of new image file (json-string)
-    - "format": format of new image (json-string, optional)
+actions array:
+    - "type": the operation to perform.  The only supported
+      value is "blockdev-snapshot-sync". (json-string)
+    - "data": a dictionary.  The contents depend on the value
+      of "type".  When "type" is "blockdev-snapshot-sync":
+      - "device": device name to snapshot (json-string)
+      - "snapshot-file": name of new image file (json-string)
+      - "format": format of new image (json-string, optional)
+      - "mode": whether and how QEMU should create the snapshot file
+        (NewImageMode, optional, default "absolute-paths")
 
 Example:
 
--> { "execute": "blockdev-group-snapshot-sync", "arguments":
-                      { "devlist": [{ "device": "ide-hd0",
-                                      "snapshot-file": "/some/place/my-image",
-                                      "format": "qcow2" },
-                                    { "device": "ide-hd1",
-                                      "snapshot-file": "/some/place/my-image2",
-                                      "format": "qcow2" }] } }
+-> { "execute": "transaction",
+     "arguments": { "actions": [
+         { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0",
+                                         "snapshot-file": "/some/place/my-image",
+                                         "format": "qcow2" } },
+         { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1",
+                                         "snapshot-file": "/some/place/my-image2",
+                                         "mode": "existing",
+                                         "format": "qcow2" } } ] } }
 <- { "return": {} }
 
 EQMP
@@ -746,6 +760,8 @@ Arguments:
 
 - "device": device name to snapshot (json-string)
 - "snapshot-file": name of new image file (json-string)
+- "mode": whether and how QEMU should create the snapshot file
+  (NewImageMode, optional, default "absolute-paths")
 - "format": format of new image (json-string, optional)
 
 Example:
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index b56225bdaa..727fb77266 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -117,6 +117,7 @@ struct %(name)s
 {
     %(name)sKind kind;
     union {
+        void *data;
 ''',
                 name=name)
 
@@ -269,6 +270,7 @@ for expr in exprs:
     elif expr.has_key('union'):
         ret += generate_fwd_struct(expr['union'], expr['data']) + "\n"
         ret += generate_enum('%sKind' % expr['union'], expr['data'].keys())
+        fdef.write(generate_enum_lookup('%sKind' % expr['union'], expr['data'].keys()))
     else:
         continue
     fdecl.write(ret)
@@ -283,6 +285,10 @@ for expr in exprs:
         fdef.write(generate_type_cleanup(expr['type']) + "\n")
     elif expr.has_key('union'):
         ret += generate_union(expr['union'], expr['data'])
+        ret += generate_type_cleanup_decl(expr['union'] + "List")
+        fdef.write(generate_type_cleanup(expr['union'] + "List") + "\n")
+        ret += generate_type_cleanup_decl(expr['union'])
+        fdef.write(generate_type_cleanup(expr['union']) + "\n")
     else:
         continue
     fdecl.write(ret)
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index 5160d83c4f..54117d4d2b 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -110,10 +110,38 @@ def generate_visit_union(name, members):
 
 void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error **errp)
 {
-}
+    Error *err = NULL;
+
+    visit_start_struct(m, (void **)obj, "%(name)s", name, sizeof(%(name)s), &err);
+    visit_type_%(name)sKind(m, &(*obj)->kind, "type", &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto end;
+    }
+    switch ((*obj)->kind) {
 ''',
                  name=name)
 
+    for key in members:
+        ret += mcgen('''
+    case %(abbrev)s_KIND_%(enum)s:
+        visit_type_%(c_type)s(m, &(*obj)->%(c_name)s, "data", errp);
+        break;
+''',
+                abbrev = de_camel_case(name).upper(),
+                enum = de_camel_case(key).upper(),
+                c_type=members[key],
+                c_name=c_var(key))
+
+    ret += mcgen('''
+    default:
+        abort();
+    }
+end:
+    visit_end_struct(m, errp);
+}
+''')
+
     return ret
 
 def generate_declaration(name, members, genlist=True):
@@ -242,6 +270,7 @@ for expr in exprs:
         fdecl.write(ret)
     elif expr.has_key('union'):
         ret = generate_visit_union(expr['union'], expr['data'])
+        ret += generate_visit_list(expr['union'], expr['data'])
         fdef.write(ret)
 
         ret = generate_decl_enum('%sKind' % expr['union'], expr['data'].keys())
diff --git a/test-coroutine.c b/test-coroutine.c
index bf9f3e91b5..e5d14eb696 100644
--- a/test-coroutine.c
+++ b/test-coroutine.c
@@ -177,6 +177,32 @@ static void perf_lifecycle(void)
     g_test_message("Lifecycle %u iterations: %f s\n", max, duration);
 }
 
+static void perf_nesting(void)
+{
+    unsigned int i, maxcycles, maxnesting;
+    double duration;
+
+    maxcycles = 100000000;
+    maxnesting = 20000;
+    Coroutine *root;
+    NestData nd = {
+        .n_enter  = 0,
+        .n_return = 0,
+        .max      = maxnesting,
+    };
+
+    g_test_timer_start();
+    for (i = 0; i < maxcycles; i++) {
+        root = qemu_coroutine_create(nest);
+        qemu_coroutine_enter(root, &nd);
+    }
+    duration = g_test_timer_elapsed();
+
+    g_test_message("Nesting %u iterations of %u depth each: %f s\n",
+        maxcycles, maxnesting, duration);
+}
+
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -187,6 +213,7 @@ int main(int argc, char **argv)
     g_test_add_func("/basic/in_coroutine", test_in_coroutine);
     if (g_test_perf()) {
         g_test_add_func("/perf/lifecycle", perf_lifecycle);
+        g_test_add_func("/perf/nesting", perf_nesting);
     }
     return g_test_run();
 }
diff --git a/test-qmp-input-visitor.c b/test-qmp-input-visitor.c
index 926db5cb91..1996e49576 100644
--- a/test-qmp-input-visitor.c
+++ b/test-qmp-input-visitor.c
@@ -234,6 +234,22 @@ static void test_visitor_in_list(TestInputVisitorData *data,
     qapi_free_UserDefOneList(head);
 }
 
+static void test_visitor_in_union(TestInputVisitorData *data,
+                                  const void *unused)
+{
+    Visitor *v;
+    Error *err = NULL;
+    UserDefUnion *tmp;
+
+    v = visitor_input_test_init(data, "{ 'type': 'b', 'data' : { 'integer': 42 } }");
+
+    visit_type_UserDefUnion(v, &tmp, NULL, &err);
+    g_assert(err == NULL);
+    g_assert_cmpint(tmp->kind, ==, USER_DEF_UNION_KIND_B);
+    g_assert_cmpint(tmp->b->integer, ==, 42);
+    qapi_free_UserDefUnion(tmp);
+}
+
 static void input_visitor_test_add(const char *testpath,
                                    TestInputVisitorData *data,
                                    void (*test_func)(TestInputVisitorData *data, const void *user_data))
@@ -264,6 +280,8 @@ int main(int argc, char **argv)
                             &in_visitor_data, test_visitor_in_struct_nested);
     input_visitor_test_add("/visitor/input/list",
                             &in_visitor_data, test_visitor_in_list);
+    input_visitor_test_add("/visitor/input/union",
+                            &in_visitor_data, test_visitor_in_union);
 
     g_test_run();
 
diff --git a/test-qmp-output-visitor.c b/test-qmp-output-visitor.c
index 5452cd43bc..4d6c4d4420 100644
--- a/test-qmp-output-visitor.c
+++ b/test-qmp-output-visitor.c
@@ -380,6 +380,38 @@ static void test_visitor_out_list_qapi_free(TestOutputVisitorData *data,
     qapi_free_UserDefNestedList(head);
 }
 
+static void test_visitor_out_union(TestOutputVisitorData *data,
+                                   const void *unused)
+{
+    QObject *arg, *qvalue;
+    QDict *qdict, *value;
+
+    Error *err = NULL;
+
+    UserDefUnion *tmp = g_malloc0(sizeof(UserDefUnion));
+    tmp->kind = USER_DEF_UNION_KIND_A;
+    tmp->a = g_malloc0(sizeof(UserDefA));
+    tmp->a->boolean = true;
+
+    visit_type_UserDefUnion(data->ov, &tmp, NULL, &err);
+    g_assert(err == NULL);
+    arg = qmp_output_get_qobject(data->qov);
+
+    g_assert(qobject_type(arg) == QTYPE_QDICT);
+    qdict = qobject_to_qdict(arg);
+
+    g_assert_cmpstr(qdict_get_str(qdict, "type"), ==, "a");
+
+    qvalue = qdict_get(qdict, "data");
+    g_assert(data != NULL);
+    g_assert(qobject_type(qvalue) == QTYPE_QDICT);
+    value = qobject_to_qdict(qvalue);
+    g_assert_cmpint(qdict_get_bool(value, "boolean"), ==, true);
+
+    qapi_free_UserDefUnion(tmp);
+    QDECREF(qdict);
+}
+
 static void output_visitor_test_add(const char *testpath,
                                     TestOutputVisitorData *data,
                                     void (*test_func)(TestOutputVisitorData *data, const void *user_data))
@@ -416,6 +448,8 @@ int main(int argc, char **argv)
                             &out_visitor_data, test_visitor_out_list);
     output_visitor_test_add("/visitor/output/list-qapi-free",
                             &out_visitor_data, test_visitor_out_list_qapi_free);
+    output_visitor_test_add("/visitor/output/union",
+                            &out_visitor_data, test_visitor_out_union);
 
     g_test_run();
 
diff --git a/tests/Makefile b/tests/Makefile
index 74b29dc076..c78ade122e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,6 +1,9 @@
+export SRC_PATH
+
 CHECKS = check-qdict check-qfloat check-qint check-qstring check-qlist
 CHECKS += check-qjson test-qmp-output-visitor test-qmp-input-visitor
 CHECKS += test-string-input-visitor test-string-output-visitor test-coroutine
+CHECKS += $(SRC_PATH)/tests/qemu-iotests-quick.sh
 
 check-qint.o check-qstring.o check-qdict.o check-qlist.o check-qfloat.o check-qjson.o test-coroutine.o: $(GENERATED_HEADERS)
 
@@ -42,6 +45,13 @@ test-qmp-input-visitor: test-qmp-input-visitor.o $(qobject-obj-y) $(qapi-obj-y)
 test-qmp-commands.o: $(addprefix $(qapi-dir)/, test-qapi-types.c test-qapi-types.h test-qapi-visit.c test-qapi-visit.h test-qmp-marshal.c test-qmp-commands.h) $(qapi-obj-y)
 test-qmp-commands: test-qmp-commands.o $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) $(qapi-dir)/test-qapi-visit.o $(qapi-dir)/test-qapi-types.o $(qapi-dir)/test-qmp-marshal.o module.o
 
-.PHONY: check
+$(SRC_PATH)/tests/qemu-iotests-quick.sh: qemu-img qemu-io
+
+
+.PHONY: check check-block
+
 check: $(CHECKS)
 	$(call quiet-command, gtester $(CHECKS), "  CHECK")
+
+check-block:
+	$(call quiet-command, $(SHELL) $(SRC_PATH)/tests/check-block.sh , "  CHECK")
diff --git a/tests/check-block.sh b/tests/check-block.sh
new file mode 100755
index 0000000000..b9d9c6a9f6
--- /dev/null
+++ b/tests/check-block.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+export QEMU_PROG="$(pwd)/x86_64-softmmu/qemu-system-x86_64"
+export QEMU_IMG_PROG="$(pwd)/qemu-img"
+export QEMU_IO_PROG="$(pwd)/qemu-io"
+
+if [ ! -x $QEMU_PROG ]; then
+    echo "'make check-block' requires qemu-system-x86_64"
+    exit 1
+fi
+
+cd $SRC_PATH/tests/qemu-iotests
+
+ret=0
+./check -T -nocache -raw || ret=1
+./check -T -nocache -qcow2 || ret=1
+./check -T -nocache -qed|| ret=1
+./check -T -nocache -vmdk|| ret=1
+./check -T -nocache -vpc || ret=1
+
+exit $ret
diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
new file mode 100755
index 0000000000..cf90de0b8b
--- /dev/null
+++ b/tests/qemu-iotests-quick.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# We don't know which of the system emulator binaries there is (or if there is
+# any at all), so the 'quick' group doesn't contain any tests that require
+# running qemu proper. Assign a fake binary name so that qemu-iotests doesn't
+# complain about the missing binary.
+export QEMU_PROG="this_should_be_unused"
+
+export QEMU_IMG_PROG="$(pwd)/qemu-img"
+export QEMU_IO_PROG="$(pwd)/qemu-io"
+
+cd $SRC_PATH/tests/qemu-iotests
+
+ret=0
+./check -T -nocache -qcow2 -g quick || ret=1
+
+exit $ret
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index fcf869d36e..b549f10f17 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -8,32 +8,32 @@
 # test-group association ... one line per test
 #
 001 rw auto
-002 rw auto
+002 rw auto quick
 003 rw auto
-004 rw auto
+004 rw auto quick
 005 img auto
 006 img auto
 007 snapshot auto
 008 rw auto
 009 rw auto
 010 rw auto
-011 rw auto
-012 auto
+011 rw auto quick
+012 auto quick
 013 rw auto
 014 rw auto
 015 rw snapshot auto
-016 rw auto
-017 rw backing auto
+016 rw auto quick
+017 rw backing auto quick
 018 rw backing auto
-019 rw backing auto
-020 rw backing auto
+019 rw backing auto quick
+020 rw backing auto quick
 021 io auto
 022 rw snapshot auto
 023 rw auto
-024 rw backing auto
-025 rw auto
+024 rw backing auto quick
+025 rw auto quick
 026 rw blkdbg auto
-027 rw auto
+027 rw auto quick
 028 rw backing auto
-029 rw auto
+029 rw auto quick
 030 rw auto
diff --git a/trace-events b/trace-events
index dfe28ed42b..74bb92af4f 100644
--- a/trace-events
+++ b/trace-events
@@ -312,6 +312,31 @@ scsi_request_sense(int target, int lun, int tag) "target %d lun %d tag %d"
 # vl.c
 vm_state_notify(int running, int reason) "running %d reason %d"
 
+# block/qcow2.c
+qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d"
+qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
+qcow2_writev_start_part(void *co) "co %p"
+qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
+qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
+
+qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d"
+qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
+qcow2_cluster_alloc_phys(void *co) "co %p"
+qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d"
+
+qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d"
+
+qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset %" PRIx64 " read_from_disk %d"
+qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d"
+qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+
 # block/qed-l2-cache.c
 qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p"
 qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"