diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/backup.c | 105 | ||||
-rw-r--r-- | block/commit.c | 2 | ||||
-rw-r--r-- | block/curl.c | 25 | ||||
-rw-r--r-- | block/gluster.c | 40 | ||||
-rw-r--r-- | block/iscsi.c | 28 | ||||
-rw-r--r-- | block/linux-aio.c | 18 | ||||
-rw-r--r-- | block/mirror.c | 10 | ||||
-rw-r--r-- | block/nbd.c | 18 | ||||
-rw-r--r-- | block/qcow2.c | 7 | ||||
-rw-r--r-- | block/qcow2.h | 8 | ||||
-rw-r--r-- | block/qed.c | 10 | ||||
-rw-r--r-- | block/raw.c | 23 | ||||
-rw-r--r-- | block/rbd.c | 16 | ||||
-rw-r--r-- | block/sheepdog.c | 65 | ||||
-rw-r--r-- | block/ssh.c | 12 | ||||
-rw-r--r-- | block/stream.c | 8 | ||||
-rw-r--r-- | block/vhdx.h | 2 | ||||
-rw-r--r-- | block/vmdk.c | 151 | ||||
-rw-r--r-- | block/win32-aio.c | 10 |
19 files changed, 295 insertions, 263 deletions
diff --git a/block/backup.c b/block/backup.c index 16105d40b1..e12b3b1461 100644 --- a/block/backup.c +++ b/block/backup.c @@ -37,6 +37,7 @@ typedef struct CowRequest { typedef struct BackupBlockJob { BlockJob common; BlockDriverState *target; + MirrorSyncMode sync_mode; RateLimit limit; BlockdevOnError on_source_error; BlockdevOnError on_target_error; @@ -247,40 +248,83 @@ static void coroutine_fn backup_run(void *opaque) bdrv_add_before_write_notifier(bs, &before_write); - for (; start < end; start++) { - bool error_is_read; - - if (block_job_is_cancelled(&job->common)) { - break; + if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { + while (!block_job_is_cancelled(&job->common)) { + /* Yield until the job is cancelled. We just let our before_write + * notify callback service CoW requests. */ + job->common.busy = false; + qemu_coroutine_yield(); + job->common.busy = true; } + } else { + /* Both FULL and TOP SYNC_MODE's require copying.. */ + for (; start < end; start++) { + bool error_is_read; - /* we need to yield so that qemu_aio_flush() returns. - * (without, VM does not reboot) - */ - if (job->common.speed) { - uint64_t delay_ns = ratelimit_calculate_delay( - &job->limit, job->sectors_read); - job->sectors_read = 0; - block_job_sleep_ns(&job->common, rt_clock, delay_ns); - } else { - block_job_sleep_ns(&job->common, rt_clock, 0); - } + if (block_job_is_cancelled(&job->common)) { + break; + } - if (block_job_is_cancelled(&job->common)) { - break; - } + /* we need to yield so that qemu_aio_flush() returns. + * (without, VM does not reboot) + */ + if (job->common.speed) { + uint64_t delay_ns = ratelimit_calculate_delay( + &job->limit, job->sectors_read); + job->sectors_read = 0; + block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns); + } else { + block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0); + } - ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); - if (ret < 0) { - /* Depending on error action, fail now or retry cluster */ - BlockErrorAction action = - backup_error_action(job, error_is_read, -ret); - if (action == BDRV_ACTION_REPORT) { + if (block_job_is_cancelled(&job->common)) { break; - } else { - start--; - continue; + } + + if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { + int i, n; + int alloced = 0; + + /* Check to see if these blocks are already in the + * backing file. */ + + for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) { + /* bdrv_co_is_allocated() only returns true/false based + * on the first set of sectors it comes accross that + * are are all in the same state. + * For that reason we must verify each sector in the + * backup cluster length. We end up copying more than + * needed but at some point that is always the case. */ + alloced = + bdrv_co_is_allocated(bs, + start * BACKUP_SECTORS_PER_CLUSTER + i, + BACKUP_SECTORS_PER_CLUSTER - i, &n); + i += n; + + if (alloced == 1) { + break; + } + } + + /* If the above loop never found any sectors that are in + * the topmost image, skip this backup. */ + if (alloced == 0) { + continue; + } + } + /* FULL sync mode we copy the whole drive. */ + ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, + BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + if (ret < 0) { + /* Depending on error action, fail now or retry cluster */ + BlockErrorAction action = + backup_error_action(job, error_is_read, -ret); + if (action == BDRV_ACTION_REPORT) { + break; + } else { + start--; + continue; + } } } } @@ -300,7 +344,7 @@ static void coroutine_fn backup_run(void *opaque) } void backup_start(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, + int64_t speed, MirrorSyncMode sync_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, @@ -335,6 +379,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; + job->sync_mode = sync_mode; job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); qemu_coroutine_enter(job->common.co, job); diff --git a/block/commit.c b/block/commit.c index 2227fc2e6c..51a1ab3678 100644 --- a/block/commit.c +++ b/block/commit.c @@ -103,7 +103,7 @@ wait: /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ - block_job_sleep_ns(&s->common, rt_clock, delay_ns); + block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } diff --git a/block/curl.c b/block/curl.c index 82d39ff53f..e566855f76 100644 --- a/block/curl.c +++ b/block/curl.c @@ -86,7 +86,6 @@ typedef struct BDRVCURLState { static void curl_clean_state(CURLState *s); static void curl_multi_do(void *arg); -static int curl_aio_flush(void *opaque); static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, void *s, void *sp) @@ -94,17 +93,16 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd); switch (action) { case CURL_POLL_IN: - qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s); + qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s); break; case CURL_POLL_OUT: - qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s); + qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s); break; case CURL_POLL_INOUT: - qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, - curl_aio_flush, s); + qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s); break; case CURL_POLL_REMOVE: - qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(fd, NULL, NULL, NULL); break; } @@ -495,21 +493,6 @@ out_noclean: return -EINVAL; } -static int curl_aio_flush(void *opaque) -{ - BDRVCURLState *s = opaque; - int i, j; - - for (i=0; i < CURL_NUM_STATES; i++) { - for(j=0; j < CURL_NUM_ACB; j++) { - if (s->states[i].acb[j]) { - return 1; - } - } - } - return 0; -} - static void curl_aio_cancel(BlockDriverAIOCB *blockacb) { // Do we have to implement canceling? Seems to work without... diff --git a/block/gluster.c b/block/gluster.c index 6de418c0bd..46f36f8cd6 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -32,7 +32,6 @@ typedef struct BDRVGlusterState { struct glfs *glfs; int fds[2]; struct glfs_fd *fd; - int qemu_aio_count; int event_reader_pos; GlusterAIOCB *event_acb; } BDRVGlusterState; @@ -247,7 +246,6 @@ static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) ret = -EIO; /* Partial read/write - fail it */ } - s->qemu_aio_count--; qemu_aio_release(acb); cb(opaque, ret); if (finished) { @@ -275,13 +273,6 @@ static void qemu_gluster_aio_event_reader(void *opaque) } while (ret < 0 && errno == EINTR); } -static int qemu_gluster_aio_flush_cb(void *opaque) -{ - BDRVGlusterState *s = opaque; - - return (s->qemu_aio_count > 0); -} - /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "gluster", @@ -348,7 +339,7 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, } fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], - qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + qemu_gluster_aio_event_reader, NULL, s); out: qemu_opts_del(opts); @@ -445,11 +436,9 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) qemu_mutex_lock_iothread(); /* We are in gluster thread context */ acb->common.cb(acb->common.opaque, -EIO); qemu_aio_release(acb); - s->qemu_aio_count--; close(s->fds[GLUSTER_FD_READ]); close(s->fds[GLUSTER_FD_WRITE]); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, - NULL); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL); bs->drv = NULL; /* Make the disk inaccessible */ qemu_mutex_unlock_iothread(); } @@ -467,7 +456,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, offset = sector_num * BDRV_SECTOR_SIZE; size = nb_sectors * BDRV_SECTOR_SIZE; - s->qemu_aio_count++; acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = size; @@ -488,11 +476,23 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, return &acb->common; out: - s->qemu_aio_count--; qemu_aio_release(acb); return NULL; } +static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) +{ + int ret; + BDRVGlusterState *s = bs->opaque; + + ret = glfs_ftruncate(s->fd, offset); + if (ret < 0) { + return -errno; + } + + return 0; +} + static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) @@ -518,7 +518,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, acb->size = 0; acb->ret = 0; acb->finished = NULL; - s->qemu_aio_count++; ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); if (ret < 0) { @@ -527,7 +526,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, return &acb->common; out: - s->qemu_aio_count--; qemu_aio_release(acb); return NULL; } @@ -550,7 +548,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, acb->size = 0; acb->ret = 0; acb->finished = NULL; - s->qemu_aio_count++; ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); if (ret < 0) { @@ -559,7 +556,6 @@ static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, return &acb->common; out: - s->qemu_aio_count--; qemu_aio_release(acb); return NULL; } @@ -598,7 +594,7 @@ static void qemu_gluster_close(BlockDriverState *bs) close(s->fds[GLUSTER_FD_READ]); close(s->fds[GLUSTER_FD_WRITE]); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL); if (s->fd) { glfs_close(s->fd); @@ -631,6 +627,7 @@ static BlockDriver bdrv_gluster = { .bdrv_create = qemu_gluster_create, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_truncate = qemu_gluster_truncate, .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, @@ -650,6 +647,7 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_create = qemu_gluster_create, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_truncate = qemu_gluster_truncate, .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, @@ -669,6 +667,7 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_create = qemu_gluster_create, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_truncate = qemu_gluster_truncate, .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, @@ -688,6 +687,7 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_create = qemu_gluster_create, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_truncate = qemu_gluster_truncate, .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, diff --git a/block/iscsi.c b/block/iscsi.c index 5f28c6a2ea..2bbee1f6e5 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -146,13 +146,6 @@ static const AIOCBInfo iscsi_aiocb_info = { static void iscsi_process_read(void *arg); static void iscsi_process_write(void *arg); -static int iscsi_process_flush(void *arg) -{ - IscsiLun *iscsilun = arg; - - return iscsi_queue_length(iscsilun->iscsi) > 0; -} - static void iscsi_set_events(IscsiLun *iscsilun) { @@ -166,7 +159,6 @@ iscsi_set_events(IscsiLun *iscsilun) qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read, (ev & POLLOUT) ? iscsi_process_write : NULL, - iscsi_process_flush, iscsilun); } @@ -247,7 +239,9 @@ static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors, { if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size || (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) { - error_report("iSCSI misaligned request: iscsilun->block_size %u, sector_num %ld, nb_sectors %d", + error_report("iSCSI misaligned request: " + "iscsilun->block_size %u, sector_num %" PRIi64 + ", nb_sectors %d", iscsilun->block_size, sector_num, nb_sectors); return 0; } @@ -966,7 +960,7 @@ static void iscsi_nop_timed_event(void *opaque) return; } - qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL); + timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); iscsi_set_events(iscsilun); } #endif @@ -1179,8 +1173,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags) #if defined(LIBISCSI_FEATURE_NOP_COUNTER) /* Set up a timer for sending out iSCSI NOPs */ - iscsilun->nop_timer = qemu_new_timer_ms(rt_clock, iscsi_nop_timed_event, iscsilun); - qemu_mod_timer(iscsilun->nop_timer, qemu_get_clock_ms(rt_clock) + NOP_INTERVAL); + iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun); + timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); #endif out: @@ -1210,10 +1204,10 @@ static void iscsi_close(BlockDriverState *bs) struct iscsi_context *iscsi = iscsilun->iscsi; if (iscsilun->nop_timer) { - qemu_del_timer(iscsilun->nop_timer); - qemu_free_timer(iscsilun->nop_timer); + timer_del(iscsilun->nop_timer); + timer_free(iscsilun->nop_timer); } - qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); iscsi_destroy_context(iscsi); memset(iscsilun, 0, sizeof(IscsiLun)); } @@ -1273,8 +1267,8 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options) goto out; } if (iscsilun->nop_timer) { - qemu_del_timer(iscsilun->nop_timer); - qemu_free_timer(iscsilun->nop_timer); + timer_del(iscsilun->nop_timer); + timer_free(iscsilun->nop_timer); } if (iscsilun->type != TYPE_DISK) { ret = -ENODEV; diff --git a/block/linux-aio.c b/block/linux-aio.c index ee0f8d10c9..53434e2df5 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -39,7 +39,6 @@ struct qemu_laiocb { struct qemu_laio_state { io_context_t ctx; EventNotifier e; - int count; }; static inline ssize_t io_event_ret(struct io_event *ev) @@ -55,8 +54,6 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, { int ret; - s->count--; - ret = laiocb->ret; if (ret != -ECANCELED) { if (ret == laiocb->nbytes) { @@ -101,13 +98,6 @@ static void qemu_laio_completion_cb(EventNotifier *e) } } -static int qemu_laio_flush_cb(EventNotifier *e) -{ - struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); - - return (s->count > 0) ? 1 : 0; -} - static void laio_cancel(BlockDriverAIOCB *blockacb) { struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; @@ -177,14 +167,11 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, goto out_free_aiocb; } io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); - s->count++; if (io_submit(s->ctx, 1, &iocbs) < 0) - goto out_dec_count; + goto out_free_aiocb; return &laiocb->common; -out_dec_count: - s->count--; out_free_aiocb: qemu_aio_release(laiocb); return NULL; @@ -203,8 +190,7 @@ void *laio_init(void) goto out_close_efd; } - qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb, - qemu_laio_flush_cb); + qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb); return s; diff --git a/block/mirror.c b/block/mirror.c index bed4a7eadd..86de4582b4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -356,7 +356,7 @@ static void coroutine_fn mirror_run(void *opaque) } bdrv_dirty_iter_init(bs, &s->hbi); - last_pause_ns = qemu_get_clock_ns(rt_clock); + last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); for (;;) { uint64_t delay_ns; int64_t cnt; @@ -374,7 +374,7 @@ static void coroutine_fn mirror_run(void *opaque) * We do so every SLICE_TIME nanoseconds, or when there is an error, * or when the source is clean, whichever comes first. */ - if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME && + if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME && s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { @@ -439,13 +439,13 @@ static void coroutine_fn mirror_run(void *opaque) delay_ns = 0; } - block_job_sleep_ns(&s->common, rt_clock, delay_ns); + block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } } else if (!should_complete) { delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0); - block_job_sleep_ns(&s->common, rt_clock, delay_ns); + block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); } else if (cnt == 0) { /* The two disks are in sync. Exit and report successful * completion. @@ -454,7 +454,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.cancelled = false; break; } - last_pause_ns = qemu_get_clock_ns(rt_clock); + last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } immediate_exit: diff --git a/block/nbd.c b/block/nbd.c index 9c480b8f26..691066f726 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -279,13 +279,6 @@ static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request) request->handle = INDEX_TO_HANDLE(s, i); } -static int nbd_have_request(void *opaque) -{ - BDRVNBDState *s = opaque; - - return s->in_flight > 0; -} - static void nbd_reply_ready(void *opaque) { BDRVNBDState *s = opaque; @@ -341,8 +334,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, qemu_co_mutex_lock(&s->send_mutex); s->send_coroutine = qemu_coroutine_self(); - qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, - nbd_have_request, s); + qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s); if (qiov) { if (!s->is_unix) { socket_set_cork(s->sock, 1); @@ -361,8 +353,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, } else { rc = nbd_send_request(s->sock, request); } - qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, - nbd_have_request, s); + qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s); s->send_coroutine = NULL; qemu_co_mutex_unlock(&s->send_mutex); return rc; @@ -438,8 +429,7 @@ static int nbd_establish_connection(BlockDriverState *bs) /* Now that we're connected, set the socket to be non-blocking and * kick the reply mechanism. */ qemu_set_nonblock(sock); - qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, - nbd_have_request, s); + qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, s); s->sock = sock; s->size = size; @@ -459,7 +449,7 @@ static void nbd_teardown_connection(BlockDriverState *bs) request.len = 0; nbd_send_request(s->sock, &request); - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL); closesocket(s->sock); } diff --git a/block/qcow2.c b/block/qcow2.c index 0eceefe2cd..78097e5173 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -291,7 +291,7 @@ static QemuOptsList qcow2_runtime_opts = { .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), .desc = { { - .name = "lazy_refcounts", + .name = QCOW2_OPT_LAZY_REFCOUNTS, .type = QEMU_OPT_BOOL, .help = "Postpone refcount updates", }, @@ -1402,7 +1402,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) int flags = 0; size_t cluster_size = DEFAULT_CLUSTER_SIZE; int prealloc = 0; - int version = 2; + int version = 3; /* Read out options */ while (options && options->name) { @@ -1722,12 +1722,15 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, { BDRVQcowState *s = bs->opaque; int growable = bs->growable; + bool zero_beyond_eof = bs->zero_beyond_eof; int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); bs->growable = 1; + bs->zero_beyond_eof = false; ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); bs->growable = growable; + bs->zero_beyond_eof = zero_beyond_eof; return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index 3b2d5cda71..dba9771419 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -59,10 +59,10 @@ #define DEFAULT_CLUSTER_SIZE 65536 -#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts" -#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request" -#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot" -#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other" +#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts" +#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" +#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" +#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other" typedef struct QCowHeader { uint32_t magic; diff --git a/block/qed.c b/block/qed.c index f767b0528c..cc904c4834 100644 --- a/block/qed.c +++ b/block/qed.c @@ -353,10 +353,10 @@ static void qed_start_need_check_timer(BDRVQEDState *s) { trace_qed_start_need_check_timer(s); - /* Use vm_clock so we don't alter the image file while suspended for + /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for * migration. */ - qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) + + timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT); } @@ -364,7 +364,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s) static void qed_cancel_need_check_timer(BDRVQEDState *s) { trace_qed_cancel_need_check_timer(s); - qemu_del_timer(s->need_check_timer); + timer_del(s->need_check_timer); } static void bdrv_qed_rebind(BlockDriverState *bs) @@ -494,7 +494,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags) } } - s->need_check_timer = qemu_new_timer_ns(vm_clock, + s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, qed_need_check_timer_cb, s); out: @@ -518,7 +518,7 @@ static void bdrv_qed_close(BlockDriverState *bs) BDRVQEDState *s = bs->opaque; qed_cancel_need_check_timer(s); - qemu_free_timer(s->need_check_timer); + timer_free(s->need_check_timer); /* Ensure writes reach stable storage */ bdrv_flush(bs->file); diff --git a/block/raw.c b/block/raw.c index f1682d4f32..47518253fe 100644 --- a/block/raw.c +++ b/block/raw.c @@ -1,3 +1,26 @@ +/* + * Block driver for RAW format + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ #include "qemu-common.h" #include "block/block_int.h" diff --git a/block/rbd.c b/block/rbd.c index cb71751218..e798e19f81 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -100,7 +100,6 @@ typedef struct BDRVRBDState { rados_ioctx_t io_ctx; rbd_image_t image; char name[RBD_MAX_IMAGE_NAME_SIZE]; - int qemu_aio_count; char *snap; int event_reader_pos; RADOSCB *event_rcb; @@ -428,19 +427,11 @@ static void qemu_rbd_aio_event_reader(void *opaque) if (s->event_reader_pos == sizeof(s->event_rcb)) { s->event_reader_pos = 0; qemu_rbd_complete_aio(s->event_rcb); - s->qemu_aio_count--; } } } while (ret < 0 && errno == EINTR); } -static int qemu_rbd_aio_flush_cb(void *opaque) -{ - BDRVRBDState *s = opaque; - - return (s->qemu_aio_count > 0); -} - /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "rbd", @@ -554,7 +545,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags) fcntl(s->fds[0], F_SETFL, O_NONBLOCK); fcntl(s->fds[1], F_SETFL, O_NONBLOCK); qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader, - NULL, qemu_rbd_aio_flush_cb, s); + NULL, s); qemu_opts_del(opts); @@ -578,7 +569,7 @@ static void qemu_rbd_close(BlockDriverState *bs) close(s->fds[0]); close(s->fds[1]); - qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL); rbd_close(s->image); rados_ioctx_destroy(s->io_ctx); @@ -741,8 +732,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, off = sector_num * BDRV_SECTOR_SIZE; size = nb_sectors * BDRV_SECTOR_SIZE; - s->qemu_aio_count++; /* All the RADOSCB */ - rcb = g_malloc(sizeof(RADOSCB)); rcb->done = 0; rcb->acb = acb; @@ -779,7 +768,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, failed: g_free(rcb); - s->qemu_aio_count--; qemu_aio_release(acb); return NULL; } diff --git a/block/sheepdog.c b/block/sheepdog.c index 6a41ad9b3c..1ad4d070e7 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -242,14 +242,14 @@ static inline bool is_snapshot(struct SheepdogInode *inode) return !!inode->snap_ctime; } -#undef dprintf +#undef DPRINTF #ifdef DEBUG_SDOG -#define dprintf(fmt, args...) \ +#define DPRINTF(fmt, args...) \ do { \ fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \ } while (0) #else -#define dprintf(fmt, args...) +#define DPRINTF(fmt, args...) #endif typedef struct SheepdogAIOCB SheepdogAIOCB; @@ -509,13 +509,6 @@ static void restart_co_req(void *opaque) qemu_coroutine_enter(co, NULL); } -static int have_co_req(void *opaque) -{ - /* this handler is set only when there is a pending request, so - * always returns 1. */ - return 1; -} - typedef struct SheepdogReqCo { int sockfd; SheepdogReq *hdr; @@ -538,14 +531,14 @@ static coroutine_fn void do_co_req(void *opaque) unsigned int *rlen = srco->rlen; co = qemu_coroutine_self(); - qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, have_co_req, co); + qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co); ret = send_co_req(sockfd, hdr, data, wlen); if (ret < 0) { goto out; } - qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, have_co_req, co); + qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); if (ret < sizeof(*hdr)) { @@ -570,7 +563,7 @@ static coroutine_fn void do_co_req(void *opaque) out: /* there is at most one request for this sockfd, so it is safe to * set each handler to NULL. */ - qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL); srco->ret = ret; srco->finished = true; @@ -729,7 +722,7 @@ static void coroutine_fn aio_read_response(void *opaque) break; case AIOCB_FLUSH_CACHE: if (rsp.result == SD_RES_INVALID_PARMS) { - dprintf("disable cache since the server doesn't support it\n"); + DPRINTF("disable cache since the server doesn't support it\n"); s->cache_flags = SD_FLAG_CMD_DIRECT; rsp.result = SD_RES_SUCCESS; } @@ -796,14 +789,6 @@ static void co_write_request(void *opaque) qemu_coroutine_enter(s->co_send, NULL); } -static int aio_flush_request(void *opaque) -{ - BDRVSheepdogState *s = opaque; - - return !QLIST_EMPTY(&s->inflight_aio_head) || - !QLIST_EMPTY(&s->pending_aio_head); -} - /* * Return a socket discriptor to read/write objects. * @@ -819,7 +804,7 @@ static int get_sheep_fd(BDRVSheepdogState *s) return fd; } - qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s); + qemu_aio_set_fd_handler(fd, co_read_response, NULL, s); return fd; } @@ -1069,8 +1054,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, qemu_co_mutex_lock(&s->lock); s->co_send = qemu_coroutine_self(); - qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, - aio_flush_request, s); + qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s); socket_set_cork(s->fd, 1); /* send a header */ @@ -1091,8 +1075,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } socket_set_cork(s->fd, 0); - qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, - aio_flush_request, s); + qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s); qemu_co_mutex_unlock(&s->lock); return 0; @@ -1229,7 +1212,7 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) * the same object */ QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { if (areq != aio_req && areq->oid == aio_req->oid) { - dprintf("simultaneous CoW to %" PRIx64 "\n", aio_req->oid); + DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid); QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); return SD_RES_SUCCESS; @@ -1319,7 +1302,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags) s->discard_supported = true; if (snapid || tag[0] != '\0') { - dprintf("%" PRIx32 " snapshot inode was open.\n", vid); + DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid); s->is_snapshot = true; } @@ -1350,7 +1333,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags) g_free(buf); return 0; out: - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); if (s->fd >= 0) { closesocket(s->fd); } @@ -1554,7 +1537,7 @@ static void sd_close(BlockDriverState *bs) unsigned int wlen, rlen = 0; int fd, ret; - dprintf("%s\n", s->name); + DPRINTF("%s\n", s->name); fd = connect_to_sdog(s); if (fd < 0) { @@ -1578,7 +1561,7 @@ static void sd_close(BlockDriverState *bs) error_report("%s, %s", sd_strerror(rsp->result), s->name); } - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); closesocket(s->fd); g_free(s->host_spec); } @@ -1714,7 +1697,7 @@ static int sd_create_branch(BDRVSheepdogState *s) char *buf; bool deleted; - dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id); + DPRINTF("%" PRIx32 " is snapshot.\n", s->inode.vdi_id); buf = g_malloc(SD_INODE_SIZE); @@ -1730,7 +1713,7 @@ static int sd_create_branch(BDRVSheepdogState *s) goto out; } - dprintf("%" PRIx32 " is created.\n", vid); + DPRINTF("%" PRIx32 " is created.\n", vid); fd = connect_to_sdog(s); if (fd < 0) { @@ -1751,7 +1734,7 @@ static int sd_create_branch(BDRVSheepdogState *s) s->is_snapshot = false; ret = 0; - dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id); + DPRINTF("%" PRIx32 " was newly created.\n", s->inode.vdi_id); out: g_free(buf); @@ -1841,11 +1824,11 @@ static int coroutine_fn sd_co_rw_vector(void *p) } if (create) { - dprintf("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n", + DPRINTF("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n", inode->vdi_id, oid, vid_to_data_oid(inode->data_vdi_id[idx], idx), idx); oid = vid_to_data_oid(inode->vdi_id, idx); - dprintf("new oid %" PRIx64 "\n", oid); + DPRINTF("new oid %" PRIx64 "\n", oid); } aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); @@ -1978,7 +1961,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) SheepdogInode *inode; unsigned int datalen; - dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " + DPRINTF("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " "is_snapshot %d\n", sn_info->name, sn_info->id_str, s->name, sn_info->vm_state_size, s->is_snapshot); @@ -1989,7 +1972,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) return -EINVAL; } - dprintf("%s %s\n", sn_info->name, sn_info->id_str); + DPRINTF("%s %s\n", sn_info->name, sn_info->id_str); s->inode.vm_state_size = sn_info->vm_state_size; s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; @@ -2033,7 +2016,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } memcpy(&s->inode, inode, datalen); - dprintf("s->inode: name %s snap_id %x oid %x\n", + DPRINTF("s->inode: name %s snap_id %x oid %x\n", s->inode.name, s->inode.snap_id, s->inode.vdi_id); cleanup: @@ -2347,6 +2330,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_file_open = sd_open, .bdrv_close = sd_close, .bdrv_create = sd_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_truncate = sd_truncate, @@ -2374,6 +2358,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_file_open = sd_open, .bdrv_close = sd_close, .bdrv_create = sd_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_truncate = sd_truncate, diff --git a/block/ssh.c b/block/ssh.c index d7e7bf8dd2..27691b4ad5 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -740,14 +740,6 @@ static void restart_coroutine(void *opaque) qemu_coroutine_enter(co, NULL); } -/* Always true because when we have called set_fd_handler there is - * always a request being processed. - */ -static int return_true(void *opaque) -{ - return 1; -} - static coroutine_fn void set_fd_handler(BDRVSSHState *s) { int r; @@ -766,13 +758,13 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s) DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock, rd_handler, wr_handler); - qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, return_true, co); + qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co); } static coroutine_fn void clear_fd_handler(BDRVSSHState *s) { DPRINTF("s->sock=%d", s->sock); - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL); } /* A non-blocking call returned EAGAIN, so yield, ensuring the diff --git a/block/stream.c b/block/stream.c index 7fe9e486bf..99821252b1 100644 --- a/block/stream.c +++ b/block/stream.c @@ -57,6 +57,11 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base, BlockDriverState *intermediate; intermediate = top->backing_hd; + /* Must assign before bdrv_delete() to prevent traversing dangling pointer + * while we delete backing image instances. + */ + top->backing_hd = base; + while (intermediate) { BlockDriverState *unused; @@ -70,7 +75,6 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base, unused->backing_hd = NULL; bdrv_delete(unused); } - top->backing_hd = base; } static void coroutine_fn stream_run(void *opaque) @@ -110,7 +114,7 @@ wait: /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ - block_job_sleep_ns(&s->common, rt_clock, delay_ns); + block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { break; } diff --git a/block/vhdx.h b/block/vhdx.h index c3b64c6ff6..fb687ed2d6 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -168,7 +168,7 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader { vhdx_header. If not found in vhdx_header, it is invalid */ uint64_t flushed_file_offset; /* see spec for full details - this - sould be vhdx file size in bytes */ + should be vhdx file size in bytes */ uint64_t last_file_offset; /* size in bytes that all allocated file structures fit into */ } VHDXLogEntryHeader; diff --git a/block/vmdk.c b/block/vmdk.c index 3756333c60..63b489d29e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -62,19 +62,20 @@ typedef struct { uint32_t cylinders; uint32_t heads; uint32_t sectors_per_track; -} VMDK3Header; +} QEMU_PACKED VMDK3Header; typedef struct { uint32_t version; uint32_t flags; - int64_t capacity; - int64_t granularity; - int64_t desc_offset; - int64_t desc_size; - int32_t num_gtes_per_gte; - int64_t rgd_offset; - int64_t gd_offset; - int64_t grain_offset; + uint64_t capacity; + uint64_t granularity; + uint64_t desc_offset; + uint64_t desc_size; + /* Number of GrainTableEntries per GrainTable */ + uint32_t num_gtes_per_gt; + uint64_t rgd_offset; + uint64_t gd_offset; + uint64_t grain_offset; char filler[1]; char check_bytes[4]; uint16_t compressAlgorithm; @@ -109,7 +110,7 @@ typedef struct VmdkExtent { typedef struct BDRVVmdkState { CoMutex lock; - int desc_offset; + uint64_t desc_offset; bool cid_updated; uint32_t parent_cid; int num_extents; @@ -131,7 +132,7 @@ typedef struct VmdkGrainMarker { uint64_t lba; uint32_t size; uint8_t data[0]; -} VmdkGrainMarker; +} QEMU_PACKED VmdkGrainMarker; enum { MARKER_END_OF_STREAM = 0, @@ -385,15 +386,30 @@ static int vmdk_parent_open(BlockDriverState *bs) /* Create and append extent to the extent array. Return the added VmdkExtent * address. return NULL if allocation failed. */ -static VmdkExtent *vmdk_add_extent(BlockDriverState *bs, +static int vmdk_add_extent(BlockDriverState *bs, BlockDriverState *file, bool flat, int64_t sectors, int64_t l1_offset, int64_t l1_backup_offset, uint32_t l1_size, - int l2_size, unsigned int cluster_sectors) + int l2_size, uint64_t cluster_sectors, + VmdkExtent **new_extent) { VmdkExtent *extent; BDRVVmdkState *s = bs->opaque; + if (cluster_sectors > 0x200000) { + /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */ + error_report("invalid granularity, image may be corrupt"); + return -EINVAL; + } + if (l1_size > 512 * 1024 * 1024) { + /* Although with big capacity and small l1_entry_sectors, we can get a + * big l1_size, we don't want unbounded value to allocate the table. + * Limit it to 512M, which is 16PB for default cluster and L2 table + * size */ + error_report("L1 size too big"); + return -EFBIG; + } + s->extents = g_realloc(s->extents, (s->num_extents + 1) * sizeof(VmdkExtent)); extent = &s->extents[s->num_extents]; @@ -416,7 +432,10 @@ static VmdkExtent *vmdk_add_extent(BlockDriverState *bs, extent->end_sector = extent->sectors; } bs->total_sectors = extent->end_sector; - return extent; + if (new_extent) { + *new_extent = extent; + } + return 0; } static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) @@ -462,9 +481,9 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent) return ret; } -static int vmdk_open_vmdk3(BlockDriverState *bs, - BlockDriverState *file, - int flags) +static int vmdk_open_vmfs_sparse(BlockDriverState *bs, + BlockDriverState *file, + int flags) { int ret; uint32_t magic; @@ -475,12 +494,17 @@ static int vmdk_open_vmdk3(BlockDriverState *bs, if (ret < 0) { return ret; } - extent = vmdk_add_extent(bs, - bs->file, false, - le32_to_cpu(header.disk_sectors), - le32_to_cpu(header.l1dir_offset) << 9, - 0, 1 << 6, 1 << 9, - le32_to_cpu(header.granularity)); + ret = vmdk_add_extent(bs, file, false, + le32_to_cpu(header.disk_sectors), + le32_to_cpu(header.l1dir_offset) << 9, + 0, + le32_to_cpu(header.l1dir_size), + 4096, + le32_to_cpu(header.granularity), + &extent); + if (ret < 0) { + return ret; + } ret = vmdk_init_tables(bs, extent); if (ret) { /* free extent allocated by vmdk_add_extent */ @@ -490,7 +514,7 @@ static int vmdk_open_vmdk3(BlockDriverState *bs, } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, - int64_t desc_offset); + uint64_t desc_offset); static int vmdk_open_vmdk4(BlockDriverState *bs, BlockDriverState *file, @@ -508,7 +532,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, return ret; } if (header.capacity == 0) { - int64_t desc_offset = le64_to_cpu(header.desc_offset); + uint64_t desc_offset = le64_to_cpu(header.desc_offset); if (desc_offset) { return vmdk_open_desc_file(bs, flags, desc_offset << 9); } @@ -570,7 +594,12 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, return -ENOTSUP; } - l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte) + if (le32_to_cpu(header.num_gtes_per_gt) > 512) { + error_report("L2 table size too big"); + return -EINVAL; + } + + l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt) * le64_to_cpu(header.granularity); if (l1_entry_sectors == 0) { return -EINVAL; @@ -580,13 +609,17 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } - extent = vmdk_add_extent(bs, file, false, + ret = vmdk_add_extent(bs, file, false, le64_to_cpu(header.capacity), le64_to_cpu(header.gd_offset) << 9, l1_backup_offset, l1_size, - le32_to_cpu(header.num_gtes_per_gte), - le64_to_cpu(header.granularity)); + le32_to_cpu(header.num_gtes_per_gt), + le64_to_cpu(header.granularity), + &extent); + if (ret < 0) { + return ret; + } extent->compressed = le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; @@ -641,7 +674,7 @@ static int vmdk_open_sparse(BlockDriverState *bs, magic = be32_to_cpu(magic); switch (magic) { case VMDK3_MAGIC: - return vmdk_open_vmdk3(bs, file, flags); + return vmdk_open_vmfs_sparse(bs, file, flags); break; case VMDK4_MAGIC: return vmdk_open_vmdk4(bs, file, flags); @@ -685,7 +718,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, } if (sectors <= 0 || - (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) || + (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && + strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) || (strcmp(access, "RW"))) { goto next_line; } @@ -698,15 +732,18 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, } /* save to extents array */ - if (!strcmp(type, "FLAT")) { + if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) { /* FLAT extent */ VmdkExtent *extent; - extent = vmdk_add_extent(bs, extent_file, true, sectors, - 0, 0, 0, 0, sectors); + ret = vmdk_add_extent(bs, extent_file, true, sectors, + 0, 0, 0, 0, sectors, &extent); + if (ret < 0) { + return ret; + } extent->flat_start_offset = flat_offset << 9; - } else if (!strcmp(type, "SPARSE")) { - /* SPARSE extent */ + } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) { + /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/ ret = vmdk_open_sparse(bs, extent_file, bs->open_flags); if (ret) { bdrv_delete(extent_file); @@ -728,7 +765,7 @@ next_line: } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, - int64_t desc_offset) + uint64_t desc_offset) { int ret; char *buf = NULL; @@ -753,6 +790,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, goto exit; } if (strcmp(ct, "monolithicFlat") && + strcmp(ct, "vmfs") && + strcmp(ct, "vmfsSparse") && strcmp(ct, "twoGbMaxExtentSparse") && strcmp(ct, "twoGbMaxExtentFlat")) { fprintf(stderr, @@ -807,16 +846,17 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t offset, bool allocate) { - /* 128 sectors * 512 bytes each = grain size 64KB */ - uint8_t whole_grain[extent->cluster_sectors * 512]; + int ret = VMDK_OK; + uint8_t *whole_grain = NULL; /* we will be here if it's first write on non-exist grain(cluster). * try to read from parent image, if exist */ if (bs->backing_hd) { - int ret; - + whole_grain = + qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS); if (!vmdk_is_cid_valid(bs)) { - return VMDK_ERROR; + ret = VMDK_ERROR; + goto exit; } /* floor offset to cluster */ @@ -824,17 +864,21 @@ static int get_whole_cluster(BlockDriverState *bs, ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, extent->cluster_sectors); if (ret < 0) { - return VMDK_ERROR; + ret = VMDK_ERROR; + goto exit; } /* Write grain only into the active image */ ret = bdrv_write(extent->file, cluster_offset, whole_grain, extent->cluster_sectors); if (ret < 0) { - return VMDK_ERROR; + ret = VMDK_ERROR; + goto exit; } } - return VMDK_OK; +exit: + qemu_vfree(whole_grain); + return ret; } static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) @@ -1200,8 +1244,10 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, /** * vmdk_write: * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature - * if possible, otherwise return -ENOTSUP. - * @zero_dry_run: used for zeroed == true only, don't update L2 table, just + * if possible, otherwise return -ENOTSUP. + * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try + * with each cluster. By dry run we can find if the zero write + * is possible without modifying image data. * * Returns: error code with 0 for success. */ @@ -1328,6 +1374,8 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, int ret; BDRVVmdkState *s = bs->opaque; qemu_co_mutex_lock(&s->lock); + /* write zeroes could fail if sectors not aligned to cluster, test it with + * dry_run == true before really updating image */ ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true); if (!ret) { ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false); @@ -1336,7 +1384,6 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, return ret; } - static int vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress, bool zeroed_grain) { @@ -1367,12 +1414,12 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; header.capacity = filesize / 512; header.granularity = 128; - header.num_gtes_per_gte = 512; + header.num_gtes_per_gt = 512; grains = (filesize / 512 + header.granularity - 1) / header.granularity; - gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; + gt_size = ((header.num_gtes_per_gt * sizeof(uint32_t)) + 511) >> 9; gt_count = - (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; + (grains + header.num_gtes_per_gt - 1) / header.num_gtes_per_gt; gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; header.desc_offset = 1; @@ -1388,7 +1435,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, header.flags = cpu_to_le32(header.flags); header.capacity = cpu_to_le64(header.capacity); header.granularity = cpu_to_le64(header.granularity); - header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte); + header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt); header.desc_offset = cpu_to_le64(header.desc_offset); header.desc_size = cpu_to_le64(header.desc_size); header.rgd_offset = cpu_to_le64(header.rgd_offset); diff --git a/block/win32-aio.c b/block/win32-aio.c index fcb7c754da..5d1d199b61 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -105,13 +105,6 @@ static void win32_aio_completion_cb(EventNotifier *e) } } -static int win32_aio_flush_cb(EventNotifier *e) -{ - QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); - - return (s->count > 0) ? 1 : 0; -} - static void win32_aio_cancel(BlockDriverAIOCB *blockacb) { QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb; @@ -201,8 +194,7 @@ QEMUWin32AIOState *win32_aio_init(void) goto out_close_efd; } - qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb, - win32_aio_flush_cb); + qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb); return s; |