aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/backup.c105
-rw-r--r--block/commit.c2
-rw-r--r--block/curl.c25
-rw-r--r--block/gluster.c40
-rw-r--r--block/iscsi.c28
-rw-r--r--block/linux-aio.c18
-rw-r--r--block/mirror.c10
-rw-r--r--block/nbd.c18
-rw-r--r--block/qcow2.c7
-rw-r--r--block/qcow2.h8
-rw-r--r--block/qed.c10
-rw-r--r--block/raw.c23
-rw-r--r--block/rbd.c16
-rw-r--r--block/sheepdog.c65
-rw-r--r--block/ssh.c12
-rw-r--r--block/stream.c8
-rw-r--r--block/vhdx.h2
-rw-r--r--block/vmdk.c151
-rw-r--r--block/win32-aio.c10
19 files changed, 295 insertions, 263 deletions
diff --git a/block/backup.c b/block/backup.c
index 16105d40b1..e12b3b1461 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,6 +37,7 @@ typedef struct CowRequest {
typedef struct BackupBlockJob {
BlockJob common;
BlockDriverState *target;
+ MirrorSyncMode sync_mode;
RateLimit limit;
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
@@ -247,40 +248,83 @@ static void coroutine_fn backup_run(void *opaque)
bdrv_add_before_write_notifier(bs, &before_write);
- for (; start < end; start++) {
- bool error_is_read;
-
- if (block_job_is_cancelled(&job->common)) {
- break;
+ if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
+ while (!block_job_is_cancelled(&job->common)) {
+ /* Yield until the job is cancelled. We just let our before_write
+ * notify callback service CoW requests. */
+ job->common.busy = false;
+ qemu_coroutine_yield();
+ job->common.busy = true;
}
+ } else {
+ /* Both FULL and TOP SYNC_MODE's require copying.. */
+ for (; start < end; start++) {
+ bool error_is_read;
- /* we need to yield so that qemu_aio_flush() returns.
- * (without, VM does not reboot)
- */
- if (job->common.speed) {
- uint64_t delay_ns = ratelimit_calculate_delay(
- &job->limit, job->sectors_read);
- job->sectors_read = 0;
- block_job_sleep_ns(&job->common, rt_clock, delay_ns);
- } else {
- block_job_sleep_ns(&job->common, rt_clock, 0);
- }
+ if (block_job_is_cancelled(&job->common)) {
+ break;
+ }
- if (block_job_is_cancelled(&job->common)) {
- break;
- }
+ /* we need to yield so that qemu_aio_flush() returns.
+ * (without, VM does not reboot)
+ */
+ if (job->common.speed) {
+ uint64_t delay_ns = ratelimit_calculate_delay(
+ &job->limit, job->sectors_read);
+ job->sectors_read = 0;
+ block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+ } else {
+ block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+ }
- ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
- if (ret < 0) {
- /* Depending on error action, fail now or retry cluster */
- BlockErrorAction action =
- backup_error_action(job, error_is_read, -ret);
- if (action == BDRV_ACTION_REPORT) {
+ if (block_job_is_cancelled(&job->common)) {
break;
- } else {
- start--;
- continue;
+ }
+
+ if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
+ int i, n;
+ int alloced = 0;
+
+ /* Check to see if these blocks are already in the
+ * backing file. */
+
+ for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
+ /* bdrv_co_is_allocated() only returns true/false based
+ * on the first set of sectors it comes accross that
+ * are are all in the same state.
+ * For that reason we must verify each sector in the
+ * backup cluster length. We end up copying more than
+ * needed but at some point that is always the case. */
+ alloced =
+ bdrv_co_is_allocated(bs,
+ start * BACKUP_SECTORS_PER_CLUSTER + i,
+ BACKUP_SECTORS_PER_CLUSTER - i, &n);
+ i += n;
+
+ if (alloced == 1) {
+ break;
+ }
+ }
+
+ /* If the above loop never found any sectors that are in
+ * the topmost image, skip this backup. */
+ if (alloced == 0) {
+ continue;
+ }
+ }
+ /* FULL sync mode we copy the whole drive. */
+ ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
+ BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ if (ret < 0) {
+ /* Depending on error action, fail now or retry cluster */
+ BlockErrorAction action =
+ backup_error_action(job, error_is_read, -ret);
+ if (action == BDRV_ACTION_REPORT) {
+ break;
+ } else {
+ start--;
+ continue;
+ }
}
}
}
@@ -300,7 +344,7 @@ static void coroutine_fn backup_run(void *opaque)
}
void backup_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed,
+ int64_t speed, MirrorSyncMode sync_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb, void *opaque,
@@ -335,6 +379,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
job->target = target;
+ job->sync_mode = sync_mode;
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
qemu_coroutine_enter(job->common.co, job);
diff --git a/block/commit.c b/block/commit.c
index 2227fc2e6c..51a1ab3678 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -103,7 +103,7 @@ wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
- block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
if (block_job_is_cancelled(&s->common)) {
break;
}
diff --git a/block/curl.c b/block/curl.c
index 82d39ff53f..e566855f76 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -86,7 +86,6 @@ typedef struct BDRVCURLState {
static void curl_clean_state(CURLState *s);
static void curl_multi_do(void *arg);
-static int curl_aio_flush(void *opaque);
static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
void *s, void *sp)
@@ -94,17 +93,16 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
switch (action) {
case CURL_POLL_IN:
- qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s);
+ qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s);
break;
case CURL_POLL_OUT:
- qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s);
+ qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s);
break;
case CURL_POLL_INOUT:
- qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do,
- curl_aio_flush, s);
+ qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s);
break;
case CURL_POLL_REMOVE:
- qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
break;
}
@@ -495,21 +493,6 @@ out_noclean:
return -EINVAL;
}
-static int curl_aio_flush(void *opaque)
-{
- BDRVCURLState *s = opaque;
- int i, j;
-
- for (i=0; i < CURL_NUM_STATES; i++) {
- for(j=0; j < CURL_NUM_ACB; j++) {
- if (s->states[i].acb[j]) {
- return 1;
- }
- }
- }
- return 0;
-}
-
static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
{
// Do we have to implement canceling? Seems to work without...
diff --git a/block/gluster.c b/block/gluster.c
index 6de418c0bd..46f36f8cd6 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -32,7 +32,6 @@ typedef struct BDRVGlusterState {
struct glfs *glfs;
int fds[2];
struct glfs_fd *fd;
- int qemu_aio_count;
int event_reader_pos;
GlusterAIOCB *event_acb;
} BDRVGlusterState;
@@ -247,7 +246,6 @@ static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
ret = -EIO; /* Partial read/write - fail it */
}
- s->qemu_aio_count--;
qemu_aio_release(acb);
cb(opaque, ret);
if (finished) {
@@ -275,13 +273,6 @@ static void qemu_gluster_aio_event_reader(void *opaque)
} while (ret < 0 && errno == EINTR);
}
-static int qemu_gluster_aio_flush_cb(void *opaque)
-{
- BDRVGlusterState *s = opaque;
-
- return (s->qemu_aio_count > 0);
-}
-
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "gluster",
@@ -348,7 +339,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
}
fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
- qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+ qemu_gluster_aio_event_reader, NULL, s);
out:
qemu_opts_del(opts);
@@ -445,11 +436,9 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
qemu_mutex_lock_iothread(); /* We are in gluster thread context */
acb->common.cb(acb->common.opaque, -EIO);
qemu_aio_release(acb);
- s->qemu_aio_count--;
close(s->fds[GLUSTER_FD_READ]);
close(s->fds[GLUSTER_FD_WRITE]);
- qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
- NULL);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
bs->drv = NULL; /* Make the disk inaccessible */
qemu_mutex_unlock_iothread();
}
@@ -467,7 +456,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
offset = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
- s->qemu_aio_count++;
acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
acb->size = size;
@@ -488,11 +476,23 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
return &acb->common;
out:
- s->qemu_aio_count--;
qemu_aio_release(acb);
return NULL;
}
+static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
+{
+ int ret;
+ BDRVGlusterState *s = bs->opaque;
+
+ ret = glfs_ftruncate(s->fd, offset);
+ if (ret < 0) {
+ return -errno;
+ }
+
+ return 0;
+}
+
static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
@@ -518,7 +518,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
acb->size = 0;
acb->ret = 0;
acb->finished = NULL;
- s->qemu_aio_count++;
ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -527,7 +526,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
return &acb->common;
out:
- s->qemu_aio_count--;
qemu_aio_release(acb);
return NULL;
}
@@ -550,7 +548,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
acb->size = 0;
acb->ret = 0;
acb->finished = NULL;
- s->qemu_aio_count++;
ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -559,7 +556,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
return &acb->common;
out:
- s->qemu_aio_count--;
qemu_aio_release(acb);
return NULL;
}
@@ -598,7 +594,7 @@ static void qemu_gluster_close(BlockDriverState *bs)
close(s->fds[GLUSTER_FD_READ]);
close(s->fds[GLUSTER_FD_WRITE]);
- qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
if (s->fd) {
glfs_close(s->fd);
@@ -631,6 +627,7 @@ static BlockDriver bdrv_gluster = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
@@ -650,6 +647,7 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
@@ -669,6 +667,7 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
@@ -688,6 +687,7 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_create = qemu_gluster_create,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_truncate = qemu_gluster_truncate,
.bdrv_aio_readv = qemu_gluster_aio_readv,
.bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush,
diff --git a/block/iscsi.c b/block/iscsi.c
index 5f28c6a2ea..2bbee1f6e5 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -146,13 +146,6 @@ static const AIOCBInfo iscsi_aiocb_info = {
static void iscsi_process_read(void *arg);
static void iscsi_process_write(void *arg);
-static int iscsi_process_flush(void *arg)
-{
- IscsiLun *iscsilun = arg;
-
- return iscsi_queue_length(iscsilun->iscsi) > 0;
-}
-
static void
iscsi_set_events(IscsiLun *iscsilun)
{
@@ -166,7 +159,6 @@ iscsi_set_events(IscsiLun *iscsilun)
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
iscsi_process_read,
(ev & POLLOUT) ? iscsi_process_write : NULL,
- iscsi_process_flush,
iscsilun);
}
@@ -247,7 +239,9 @@ static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
{
if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
(nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
- error_report("iSCSI misaligned request: iscsilun->block_size %u, sector_num %ld, nb_sectors %d",
+ error_report("iSCSI misaligned request: "
+ "iscsilun->block_size %u, sector_num %" PRIi64
+ ", nb_sectors %d",
iscsilun->block_size, sector_num, nb_sectors);
return 0;
}
@@ -966,7 +960,7 @@ static void iscsi_nop_timed_event(void *opaque)
return;
}
- qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
+ timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
iscsi_set_events(iscsilun);
}
#endif
@@ -1179,8 +1173,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags)
#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
/* Set up a timer for sending out iSCSI NOPs */
- iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun);
- qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL);
+ iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
+ timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
#endif
out:
@@ -1210,10 +1204,10 @@ static void iscsi_close(BlockDriverState *bs)
struct iscsi_context *iscsi = iscsilun->iscsi;
if (iscsilun->nop_timer) {
- qemu_del_timer(iscsilun->nop_timer);
- qemu_free_timer(iscsilun->nop_timer);
+ timer_del(iscsilun->nop_timer);
+ timer_free(iscsilun->nop_timer);
}
- qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
memset(iscsilun, 0, sizeof(IscsiLun));
}
@@ -1273,8 +1267,8 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
goto out;
}
if (iscsilun->nop_timer) {
- qemu_del_timer(iscsilun->nop_timer);
- qemu_free_timer(iscsilun->nop_timer);
+ timer_del(iscsilun->nop_timer);
+ timer_free(iscsilun->nop_timer);
}
if (iscsilun->type != TYPE_DISK) {
ret = -ENODEV;
diff --git a/block/linux-aio.c b/block/linux-aio.c
index ee0f8d10c9..53434e2df5 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -39,7 +39,6 @@ struct qemu_laiocb {
struct qemu_laio_state {
io_context_t ctx;
EventNotifier e;
- int count;
};
static inline ssize_t io_event_ret(struct io_event *ev)
@@ -55,8 +54,6 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
{
int ret;
- s->count--;
-
ret = laiocb->ret;
if (ret != -ECANCELED) {
if (ret == laiocb->nbytes) {
@@ -101,13 +98,6 @@ static void qemu_laio_completion_cb(EventNotifier *e)
}
}
-static int qemu_laio_flush_cb(EventNotifier *e)
-{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
-
- return (s->count > 0) ? 1 : 0;
-}
-
static void laio_cancel(BlockDriverAIOCB *blockacb)
{
struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
@@ -177,14 +167,11 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
goto out_free_aiocb;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
- s->count++;
if (io_submit(s->ctx, 1, &iocbs) < 0)
- goto out_dec_count;
+ goto out_free_aiocb;
return &laiocb->common;
-out_dec_count:
- s->count--;
out_free_aiocb:
qemu_aio_release(laiocb);
return NULL;
@@ -203,8 +190,7 @@ void *laio_init(void)
goto out_close_efd;
}
- qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
- qemu_laio_flush_cb);
+ qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
return s;
diff --git a/block/mirror.c b/block/mirror.c
index bed4a7eadd..86de4582b4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -356,7 +356,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
bdrv_dirty_iter_init(bs, &s->hbi);
- last_pause_ns = qemu_get_clock_ns(rt_clock);
+ last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
for (;;) {
uint64_t delay_ns;
int64_t cnt;
@@ -374,7 +374,7 @@ static void coroutine_fn mirror_run(void *opaque)
* We do so every SLICE_TIME nanoseconds, or when there is an error,
* or when the source is clean, whichever comes first.
*/
- if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
+ if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
@@ -439,13 +439,13 @@ static void coroutine_fn mirror_run(void *opaque)
delay_ns = 0;
}
- block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
if (block_job_is_cancelled(&s->common)) {
break;
}
} else if (!should_complete) {
delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
- block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
} else if (cnt == 0) {
/* The two disks are in sync. Exit and report successful
* completion.
@@ -454,7 +454,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.cancelled = false;
break;
}
- last_pause_ns = qemu_get_clock_ns(rt_clock);
+ last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
immediate_exit:
diff --git a/block/nbd.c b/block/nbd.c
index 9c480b8f26..691066f726 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -279,13 +279,6 @@ static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
request->handle = INDEX_TO_HANDLE(s, i);
}
-static int nbd_have_request(void *opaque)
-{
- BDRVNBDState *s = opaque;
-
- return s->in_flight > 0;
-}
-
static void nbd_reply_ready(void *opaque)
{
BDRVNBDState *s = opaque;
@@ -341,8 +334,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
qemu_co_mutex_lock(&s->send_mutex);
s->send_coroutine = qemu_coroutine_self();
- qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
- nbd_have_request, s);
+ qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
if (qiov) {
if (!s->is_unix) {
socket_set_cork(s->sock, 1);
@@ -361,8 +353,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
} else {
rc = nbd_send_request(s->sock, request);
}
- qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
- nbd_have_request, s);
+ qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
@@ -438,8 +429,7 @@ static int nbd_establish_connection(BlockDriverState *bs)
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
qemu_set_nonblock(sock);
- qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
- nbd_have_request, s);
+ qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, s);
s->sock = sock;
s->size = size;
@@ -459,7 +449,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
request.len = 0;
nbd_send_request(s->sock, &request);
- qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
closesocket(s->sock);
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 0eceefe2cd..78097e5173 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -291,7 +291,7 @@ static QemuOptsList qcow2_runtime_opts = {
.head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
.desc = {
{
- .name = "lazy_refcounts",
+ .name = QCOW2_OPT_LAZY_REFCOUNTS,
.type = QEMU_OPT_BOOL,
.help = "Postpone refcount updates",
},
@@ -1402,7 +1402,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options)
int flags = 0;
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
int prealloc = 0;
- int version = 2;
+ int version = 3;
/* Read out options */
while (options && options->name) {
@@ -1722,12 +1722,15 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
{
BDRVQcowState *s = bs->opaque;
int growable = bs->growable;
+ bool zero_beyond_eof = bs->zero_beyond_eof;
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
bs->growable = 1;
+ bs->zero_beyond_eof = false;
ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
bs->growable = growable;
+ bs->zero_beyond_eof = zero_beyond_eof;
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 3b2d5cda71..dba9771419 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -59,10 +59,10 @@
#define DEFAULT_CLUSTER_SIZE 65536
-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
+#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
+#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
+#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
+#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
typedef struct QCowHeader {
uint32_t magic;
diff --git a/block/qed.c b/block/qed.c
index f767b0528c..cc904c4834 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -353,10 +353,10 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
{
trace_qed_start_need_check_timer(s);
- /* Use vm_clock so we don't alter the image file while suspended for
+ /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for
* migration.
*/
- qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+ timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
}
@@ -364,7 +364,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
static void qed_cancel_need_check_timer(BDRVQEDState *s)
{
trace_qed_cancel_need_check_timer(s);
- qemu_del_timer(s->need_check_timer);
+ timer_del(s->need_check_timer);
}
static void bdrv_qed_rebind(BlockDriverState *bs)
@@ -494,7 +494,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
}
}
- s->need_check_timer = qemu_new_timer_ns(vm_clock,
+ s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
qed_need_check_timer_cb, s);
out:
@@ -518,7 +518,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
BDRVQEDState *s = bs->opaque;
qed_cancel_need_check_timer(s);
- qemu_free_timer(s->need_check_timer);
+ timer_free(s->need_check_timer);
/* Ensure writes reach stable storage */
bdrv_flush(bs->file);
diff --git a/block/raw.c b/block/raw.c
index f1682d4f32..47518253fe 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -1,3 +1,26 @@
+/*
+ * Block driver for RAW format
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
#include "qemu-common.h"
#include "block/block_int.h"
diff --git a/block/rbd.c b/block/rbd.c
index cb71751218..e798e19f81 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -100,7 +100,6 @@ typedef struct BDRVRBDState {
rados_ioctx_t io_ctx;
rbd_image_t image;
char name[RBD_MAX_IMAGE_NAME_SIZE];
- int qemu_aio_count;
char *snap;
int event_reader_pos;
RADOSCB *event_rcb;
@@ -428,19 +427,11 @@ static void qemu_rbd_aio_event_reader(void *opaque)
if (s->event_reader_pos == sizeof(s->event_rcb)) {
s->event_reader_pos = 0;
qemu_rbd_complete_aio(s->event_rcb);
- s->qemu_aio_count--;
}
}
} while (ret < 0 && errno == EINTR);
}
-static int qemu_rbd_aio_flush_cb(void *opaque)
-{
- BDRVRBDState *s = opaque;
-
- return (s->qemu_aio_count > 0);
-}
-
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "rbd",
@@ -554,7 +545,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags)
fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
- NULL, qemu_rbd_aio_flush_cb, s);
+ NULL, s);
qemu_opts_del(opts);
@@ -578,7 +569,7 @@ static void qemu_rbd_close(BlockDriverState *bs)
close(s->fds[0]);
close(s->fds[1]);
- qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL);
rbd_close(s->image);
rados_ioctx_destroy(s->io_ctx);
@@ -741,8 +732,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
off = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
- s->qemu_aio_count++; /* All the RADOSCB */
-
rcb = g_malloc(sizeof(RADOSCB));
rcb->done = 0;
rcb->acb = acb;
@@ -779,7 +768,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
failed:
g_free(rcb);
- s->qemu_aio_count--;
qemu_aio_release(acb);
return NULL;
}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 6a41ad9b3c..1ad4d070e7 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -242,14 +242,14 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
return !!inode->snap_ctime;
}
-#undef dprintf
+#undef DPRINTF
#ifdef DEBUG_SDOG
-#define dprintf(fmt, args...) \
+#define DPRINTF(fmt, args...) \
do { \
fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
} while (0)
#else
-#define dprintf(fmt, args...)
+#define DPRINTF(fmt, args...)
#endif
typedef struct SheepdogAIOCB SheepdogAIOCB;
@@ -509,13 +509,6 @@ static void restart_co_req(void *opaque)
qemu_coroutine_enter(co, NULL);
}
-static int have_co_req(void *opaque)
-{
- /* this handler is set only when there is a pending request, so
- * always returns 1. */
- return 1;
-}
-
typedef struct SheepdogReqCo {
int sockfd;
SheepdogReq *hdr;
@@ -538,14 +531,14 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, have_co_req, co);
+ qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, have_co_req, co);
+ qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret < sizeof(*hdr)) {
@@ -570,7 +563,7 @@ static coroutine_fn void do_co_req(void *opaque)
out:
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
- qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL);
srco->ret = ret;
srco->finished = true;
@@ -729,7 +722,7 @@ static void coroutine_fn aio_read_response(void *opaque)
break;
case AIOCB_FLUSH_CACHE:
if (rsp.result == SD_RES_INVALID_PARMS) {
- dprintf("disable cache since the server doesn't support it\n");
+ DPRINTF("disable cache since the server doesn't support it\n");
s->cache_flags = SD_FLAG_CMD_DIRECT;
rsp.result = SD_RES_SUCCESS;
}
@@ -796,14 +789,6 @@ static void co_write_request(void *opaque)
qemu_coroutine_enter(s->co_send, NULL);
}
-static int aio_flush_request(void *opaque)
-{
- BDRVSheepdogState *s = opaque;
-
- return !QLIST_EMPTY(&s->inflight_aio_head) ||
- !QLIST_EMPTY(&s->pending_aio_head);
-}
-
/*
* Return a socket discriptor to read/write objects.
*
@@ -819,7 +804,7 @@ static int get_sheep_fd(BDRVSheepdogState *s)
return fd;
}
- qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
+ qemu_aio_set_fd_handler(fd, co_read_response, NULL, s);
return fd;
}
@@ -1069,8 +1054,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
- qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request,
- aio_flush_request, s);
+ qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s);
socket_set_cork(s->fd, 1);
/* send a header */
@@ -1091,8 +1075,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
}
socket_set_cork(s->fd, 0);
- qemu_aio_set_fd_handler(s->fd, co_read_response, NULL,
- aio_flush_request, s);
+ qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
qemu_co_mutex_unlock(&s->lock);
return 0;
@@ -1229,7 +1212,7 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
* the same object */
QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
if (areq != aio_req && areq->oid == aio_req->oid) {
- dprintf("simultaneous CoW to %" PRIx64 "\n", aio_req->oid);
+ DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid);
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
return SD_RES_SUCCESS;
@@ -1319,7 +1302,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags)
s->discard_supported = true;
if (snapid || tag[0] != '\0') {
- dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
+ DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = true;
}
@@ -1350,7 +1333,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags)
g_free(buf);
return 0;
out:
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1554,7 +1537,7 @@ static void sd_close(BlockDriverState *bs)
unsigned int wlen, rlen = 0;
int fd, ret;
- dprintf("%s\n", s->name);
+ DPRINTF("%s\n", s->name);
fd = connect_to_sdog(s);
if (fd < 0) {
@@ -1578,7 +1561,7 @@ static void sd_close(BlockDriverState *bs)
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
}
@@ -1714,7 +1697,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
char *buf;
bool deleted;
- dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
+ DPRINTF("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
buf = g_malloc(SD_INODE_SIZE);
@@ -1730,7 +1713,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
goto out;
}
- dprintf("%" PRIx32 " is created.\n", vid);
+ DPRINTF("%" PRIx32 " is created.\n", vid);
fd = connect_to_sdog(s);
if (fd < 0) {
@@ -1751,7 +1734,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
s->is_snapshot = false;
ret = 0;
- dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
+ DPRINTF("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
out:
g_free(buf);
@@ -1841,11 +1824,11 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
if (create) {
- dprintf("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
+ DPRINTF("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
inode->vdi_id, oid,
vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
oid = vid_to_data_oid(inode->vdi_id, idx);
- dprintf("new oid %" PRIx64 "\n", oid);
+ DPRINTF("new oid %" PRIx64 "\n", oid);
}
aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
@@ -1978,7 +1961,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
SheepdogInode *inode;
unsigned int datalen;
- dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
+ DPRINTF("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
"is_snapshot %d\n", sn_info->name, sn_info->id_str,
s->name, sn_info->vm_state_size, s->is_snapshot);
@@ -1989,7 +1972,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
return -EINVAL;
}
- dprintf("%s %s\n", sn_info->name, sn_info->id_str);
+ DPRINTF("%s %s\n", sn_info->name, sn_info->id_str);
s->inode.vm_state_size = sn_info->vm_state_size;
s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
@@ -2033,7 +2016,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
memcpy(&s->inode, inode, datalen);
- dprintf("s->inode: name %s snap_id %x oid %x\n",
+ DPRINTF("s->inode: name %s snap_id %x oid %x\n",
s->inode.name, s->inode.snap_id, s->inode.vdi_id);
cleanup:
@@ -2347,6 +2330,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
@@ -2374,6 +2358,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_file_open = sd_open,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_truncate = sd_truncate,
diff --git a/block/ssh.c b/block/ssh.c
index d7e7bf8dd2..27691b4ad5 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -740,14 +740,6 @@ static void restart_coroutine(void *opaque)
qemu_coroutine_enter(co, NULL);
}
-/* Always true because when we have called set_fd_handler there is
- * always a request being processed.
- */
-static int return_true(void *opaque)
-{
- return 1;
-}
-
static coroutine_fn void set_fd_handler(BDRVSSHState *s)
{
int r;
@@ -766,13 +758,13 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
rd_handler, wr_handler);
- qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, return_true, co);
+ qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
{
DPRINTF("s->sock=%d", s->sock);
- qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
+ qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
diff --git a/block/stream.c b/block/stream.c
index 7fe9e486bf..99821252b1 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -57,6 +57,11 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
BlockDriverState *intermediate;
intermediate = top->backing_hd;
+ /* Must assign before bdrv_delete() to prevent traversing dangling pointer
+ * while we delete backing image instances.
+ */
+ top->backing_hd = base;
+
while (intermediate) {
BlockDriverState *unused;
@@ -70,7 +75,6 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
unused->backing_hd = NULL;
bdrv_delete(unused);
}
- top->backing_hd = base;
}
static void coroutine_fn stream_run(void *opaque)
@@ -110,7 +114,7 @@ wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
- block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
if (block_job_is_cancelled(&s->common)) {
break;
}
diff --git a/block/vhdx.h b/block/vhdx.h
index c3b64c6ff6..fb687ed2d6 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -168,7 +168,7 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader {
vhdx_header. If not found in
vhdx_header, it is invalid */
uint64_t flushed_file_offset; /* see spec for full details - this
- sould be vhdx file size in bytes */
+ should be vhdx file size in bytes */
uint64_t last_file_offset; /* size in bytes that all allocated
file structures fit into */
} VHDXLogEntryHeader;
diff --git a/block/vmdk.c b/block/vmdk.c
index 3756333c60..63b489d29e 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -62,19 +62,20 @@ typedef struct {
uint32_t cylinders;
uint32_t heads;
uint32_t sectors_per_track;
-} VMDK3Header;
+} QEMU_PACKED VMDK3Header;
typedef struct {
uint32_t version;
uint32_t flags;
- int64_t capacity;
- int64_t granularity;
- int64_t desc_offset;
- int64_t desc_size;
- int32_t num_gtes_per_gte;
- int64_t rgd_offset;
- int64_t gd_offset;
- int64_t grain_offset;
+ uint64_t capacity;
+ uint64_t granularity;
+ uint64_t desc_offset;
+ uint64_t desc_size;
+ /* Number of GrainTableEntries per GrainTable */
+ uint32_t num_gtes_per_gt;
+ uint64_t rgd_offset;
+ uint64_t gd_offset;
+ uint64_t grain_offset;
char filler[1];
char check_bytes[4];
uint16_t compressAlgorithm;
@@ -109,7 +110,7 @@ typedef struct VmdkExtent {
typedef struct BDRVVmdkState {
CoMutex lock;
- int desc_offset;
+ uint64_t desc_offset;
bool cid_updated;
uint32_t parent_cid;
int num_extents;
@@ -131,7 +132,7 @@ typedef struct VmdkGrainMarker {
uint64_t lba;
uint32_t size;
uint8_t data[0];
-} VmdkGrainMarker;
+} QEMU_PACKED VmdkGrainMarker;
enum {
MARKER_END_OF_STREAM = 0,
@@ -385,15 +386,30 @@ static int vmdk_parent_open(BlockDriverState *bs)
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
-static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
+static int vmdk_add_extent(BlockDriverState *bs,
BlockDriverState *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
- int l2_size, unsigned int cluster_sectors)
+ int l2_size, uint64_t cluster_sectors,
+ VmdkExtent **new_extent)
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
+ if (cluster_sectors > 0x200000) {
+ /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
+ error_report("invalid granularity, image may be corrupt");
+ return -EINVAL;
+ }
+ if (l1_size > 512 * 1024 * 1024) {
+ /* Although with big capacity and small l1_entry_sectors, we can get a
+ * big l1_size, we don't want unbounded value to allocate the table.
+ * Limit it to 512M, which is 16PB for default cluster and L2 table
+ * size */
+ error_report("L1 size too big");
+ return -EFBIG;
+ }
+
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
@@ -416,7 +432,10 @@ static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
extent->end_sector = extent->sectors;
}
bs->total_sectors = extent->end_sector;
- return extent;
+ if (new_extent) {
+ *new_extent = extent;
+ }
+ return 0;
}
static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
@@ -462,9 +481,9 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
return ret;
}
-static int vmdk_open_vmdk3(BlockDriverState *bs,
- BlockDriverState *file,
- int flags)
+static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
+ BlockDriverState *file,
+ int flags)
{
int ret;
uint32_t magic;
@@ -475,12 +494,17 @@ static int vmdk_open_vmdk3(BlockDriverState *bs,
if (ret < 0) {
return ret;
}
- extent = vmdk_add_extent(bs,
- bs->file, false,
- le32_to_cpu(header.disk_sectors),
- le32_to_cpu(header.l1dir_offset) << 9,
- 0, 1 << 6, 1 << 9,
- le32_to_cpu(header.granularity));
+ ret = vmdk_add_extent(bs, file, false,
+ le32_to_cpu(header.disk_sectors),
+ le32_to_cpu(header.l1dir_offset) << 9,
+ 0,
+ le32_to_cpu(header.l1dir_size),
+ 4096,
+ le32_to_cpu(header.granularity),
+ &extent);
+ if (ret < 0) {
+ return ret;
+ }
ret = vmdk_init_tables(bs, extent);
if (ret) {
/* free extent allocated by vmdk_add_extent */
@@ -490,7 +514,7 @@ static int vmdk_open_vmdk3(BlockDriverState *bs,
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
- int64_t desc_offset);
+ uint64_t desc_offset);
static int vmdk_open_vmdk4(BlockDriverState *bs,
BlockDriverState *file,
@@ -508,7 +532,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
return ret;
}
if (header.capacity == 0) {
- int64_t desc_offset = le64_to_cpu(header.desc_offset);
+ uint64_t desc_offset = le64_to_cpu(header.desc_offset);
if (desc_offset) {
return vmdk_open_desc_file(bs, flags, desc_offset << 9);
}
@@ -570,7 +594,12 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
return -ENOTSUP;
}
- l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
+ if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
+ error_report("L2 table size too big");
+ return -EINVAL;
+ }
+
+ l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
* le64_to_cpu(header.granularity);
if (l1_entry_sectors == 0) {
return -EINVAL;
@@ -580,13 +609,17 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- extent = vmdk_add_extent(bs, file, false,
+ ret = vmdk_add_extent(bs, file, false,
le64_to_cpu(header.capacity),
le64_to_cpu(header.gd_offset) << 9,
l1_backup_offset,
l1_size,
- le32_to_cpu(header.num_gtes_per_gte),
- le64_to_cpu(header.granularity));
+ le32_to_cpu(header.num_gtes_per_gt),
+ le64_to_cpu(header.granularity),
+ &extent);
+ if (ret < 0) {
+ return ret;
+ }
extent->compressed =
le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
@@ -641,7 +674,7 @@ static int vmdk_open_sparse(BlockDriverState *bs,
magic = be32_to_cpu(magic);
switch (magic) {
case VMDK3_MAGIC:
- return vmdk_open_vmdk3(bs, file, flags);
+ return vmdk_open_vmfs_sparse(bs, file, flags);
break;
case VMDK4_MAGIC:
return vmdk_open_vmdk4(bs, file, flags);
@@ -685,7 +718,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
}
if (sectors <= 0 ||
- (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
+ (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
(strcmp(access, "RW"))) {
goto next_line;
}
@@ -698,15 +732,18 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
}
/* save to extents array */
- if (!strcmp(type, "FLAT")) {
+ if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
/* FLAT extent */
VmdkExtent *extent;
- extent = vmdk_add_extent(bs, extent_file, true, sectors,
- 0, 0, 0, 0, sectors);
+ ret = vmdk_add_extent(bs, extent_file, true, sectors,
+ 0, 0, 0, 0, sectors, &extent);
+ if (ret < 0) {
+ return ret;
+ }
extent->flat_start_offset = flat_offset << 9;
- } else if (!strcmp(type, "SPARSE")) {
- /* SPARSE extent */
+ } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
+ /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
if (ret) {
bdrv_delete(extent_file);
@@ -728,7 +765,7 @@ next_line:
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
- int64_t desc_offset)
+ uint64_t desc_offset)
{
int ret;
char *buf = NULL;
@@ -753,6 +790,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
goto exit;
}
if (strcmp(ct, "monolithicFlat") &&
+ strcmp(ct, "vmfs") &&
+ strcmp(ct, "vmfsSparse") &&
strcmp(ct, "twoGbMaxExtentSparse") &&
strcmp(ct, "twoGbMaxExtentFlat")) {
fprintf(stderr,
@@ -807,16 +846,17 @@ static int get_whole_cluster(BlockDriverState *bs,
uint64_t offset,
bool allocate)
{
- /* 128 sectors * 512 bytes each = grain size 64KB */
- uint8_t whole_grain[extent->cluster_sectors * 512];
+ int ret = VMDK_OK;
+ uint8_t *whole_grain = NULL;
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
if (bs->backing_hd) {
- int ret;
-
+ whole_grain =
+ qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
if (!vmdk_is_cid_valid(bs)) {
- return VMDK_ERROR;
+ ret = VMDK_ERROR;
+ goto exit;
}
/* floor offset to cluster */
@@ -824,17 +864,21 @@ static int get_whole_cluster(BlockDriverState *bs,
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
- return VMDK_ERROR;
+ ret = VMDK_ERROR;
+ goto exit;
}
/* Write grain only into the active image */
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
- return VMDK_ERROR;
+ ret = VMDK_ERROR;
+ goto exit;
}
}
- return VMDK_OK;
+exit:
+ qemu_vfree(whole_grain);
+ return ret;
}
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
@@ -1200,8 +1244,10 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
/**
* vmdk_write:
* @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature
- * if possible, otherwise return -ENOTSUP.
- * @zero_dry_run: used for zeroed == true only, don't update L2 table, just
+ * if possible, otherwise return -ENOTSUP.
+ * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
+ * with each cluster. By dry run we can find if the zero write
+ * is possible without modifying image data.
*
* Returns: error code with 0 for success.
*/
@@ -1328,6 +1374,8 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
+ /* write zeroes could fail if sectors not aligned to cluster, test it with
+ * dry_run == true before really updating image */
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
if (!ret) {
ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
@@ -1336,7 +1384,6 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
return ret;
}
-
static int vmdk_create_extent(const char *filename, int64_t filesize,
bool flat, bool compress, bool zeroed_grain)
{
@@ -1367,12 +1414,12 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
header.capacity = filesize / 512;
header.granularity = 128;
- header.num_gtes_per_gte = 512;
+ header.num_gtes_per_gt = 512;
grains = (filesize / 512 + header.granularity - 1) / header.granularity;
- gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
+ gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9;
gt_count =
- (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
+ (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt;
gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
header.desc_offset = 1;
@@ -1388,7 +1435,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.flags = cpu_to_le32(header.flags);
header.capacity = cpu_to_le64(header.capacity);
header.granularity = cpu_to_le64(header.granularity);
- header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
+ header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
header.desc_offset = cpu_to_le64(header.desc_offset);
header.desc_size = cpu_to_le64(header.desc_size);
header.rgd_offset = cpu_to_le64(header.rgd_offset);
diff --git a/block/win32-aio.c b/block/win32-aio.c
index fcb7c754da..5d1d199b61 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -105,13 +105,6 @@ static void win32_aio_completion_cb(EventNotifier *e)
}
}
-static int win32_aio_flush_cb(EventNotifier *e)
-{
- QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
-
- return (s->count > 0) ? 1 : 0;
-}
-
static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
{
QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
@@ -201,8 +194,7 @@ QEMUWin32AIOState *win32_aio_init(void)
goto out_close_efd;
}
- qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
- win32_aio_flush_cb);
+ qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
return s;