diff options
author | Anthony Liguori <anthony@codemonkey.ws> | 2013-10-31 17:02:26 +0100 |
---|---|---|
committer | Anthony Liguori <anthony@codemonkey.ws> | 2013-10-31 17:02:26 +0100 |
commit | a126050a103c924b03388a9a64ce9af8c96b0969 (patch) | |
tree | c93b9c5cdb8b2a0256845e596e198f1c65c4fcde /block | |
parent | ef5cfe5bbd8bb05a51afaf7ab313769eb9ef44b6 (diff) | |
parent | f4c129a38a5430b7342a7a23f53a22831154612f (diff) |
Merge remote-tracking branch 'kwolf/tags/for-anthony' into staging
Block patches for 1.7.0-rc0 (v2)
# gpg: Signature made Thu 31 Oct 2013 04:44:39 PM CET using RSA key ID C88F2FD6
# gpg: Can't check signature: public key not found
* kwolf/tags/for-anthony: (30 commits)
vmdk: Implment bdrv_get_specific_info
qapi: Add optional field 'compressed' to ImageInfo
qemu-iotests: prefill some data to test image
sheepdog: check simultaneous create in resend_aioreq
sheepdog: cancel aio requests if possible
sheepdog: make add_aio_request and send_aioreq void functions
sheepdog: try to reconnect to sheepdog after network error
coroutine: add co_aio_sleep_ns() to allow sleep in block drivers
sheepdog: reload inode outside of resend_aioreq
sheepdog: handle vdi objects in resend_aio_req
sheepdog: check return values of qemu_co_recv/send correctly
qemu-iotests: Test case for backing file deletion
qemu-iotests: drop duplicated "create_image"
qemu-iotests: Fix 051 reference output
block: Avoid unecessary drv->bdrv_getlength() calls
block: Disable BDRV_O_COPY_ON_READ for the backing file
ahci: fix win7 hang on boot
sheepdog: pass copy_policy in the request
sheepdog: explicitly set copies as type uint8_t
block: Don't copy backing file name on error
...
Message-id: 1383064269-27720-1-git-send-email-kwolf@redhat.com
Signed-off-by: Anthony Liguori <anthony@codemonkey.ws>
Diffstat (limited to 'block')
-rw-r--r-- | block/qcow2.c | 19 | ||||
-rw-r--r-- | block/raw-posix.c | 9 | ||||
-rw-r--r-- | block/raw-win32.c | 4 | ||||
-rw-r--r-- | block/raw_bsd.c | 1 | ||||
-rw-r--r-- | block/sheepdog.c | 352 | ||||
-rw-r--r-- | block/vmdk.c | 68 | ||||
-rw-r--r-- | block/vpc.c | 7 |
7 files changed, 325 insertions, 135 deletions
diff --git a/block/qcow2.c b/block/qcow2.c index c1abaffa19..6e5d98dc48 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1584,6 +1584,16 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } + bdrv_close(bs); + + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ + ret = bdrv_open(bs, filename, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + ret = 0; out: bdrv_unref(bs); @@ -1939,13 +1949,22 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BDRVQcowState *s = bs->opaque; + int64_t total_sectors = bs->total_sectors; int growable = bs->growable; + bool zero_beyond_eof = bs->zero_beyond_eof; int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); bs->growable = 1; + bs->zero_beyond_eof = false; ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); bs->growable = growable; + bs->zero_beyond_eof = zero_beyond_eof; + + /* bdrv_co_do_writev will have increased the total_sectors value to include + * the VM state - the VM state is however not an actual part of the block + * device, therefore, we need to restore the old value. */ + bs->total_sectors = total_sectors; return ret; } diff --git a/block/raw-posix.c b/block/raw-posix.c index 6f03fbf793..f6d48bbdb2 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1715,7 +1715,8 @@ static BlockDriver bdrv_host_floppy = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, @@ -1824,7 +1825,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, @@ -1951,7 +1953,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, diff --git a/block/raw-win32.c b/block/raw-win32.c index 676b5701db..2741e4de10 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -616,7 +616,9 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, + .bdrv_get_allocated_file_size = raw_get_allocated_file_size, }; diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 0078c1baeb..2265dcc03f 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -178,6 +178,7 @@ static BlockDriver bdrv_raw = { .bdrv_co_get_block_status = &raw_co_get_block_status, .bdrv_truncate = &raw_truncate, .bdrv_getlength = &raw_getlength, + .has_variable_length = true, .bdrv_get_info = &raw_get_info, .bdrv_is_inserted = &raw_is_inserted, .bdrv_media_changed = &raw_media_changed, diff --git a/block/sheepdog.c b/block/sheepdog.c index 5f81c93ee3..ef387de71f 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -125,8 +125,9 @@ typedef struct SheepdogObjReq { uint32_t data_length; uint64_t oid; uint64_t cow_oid; - uint32_t copies; - uint32_t rsvd; + uint8_t copies; + uint8_t copy_policy; + uint8_t reserved[6]; uint64_t offset; } SheepdogObjReq; @@ -138,7 +139,9 @@ typedef struct SheepdogObjRsp { uint32_t id; uint32_t data_length; uint32_t result; - uint32_t copies; + uint8_t copies; + uint8_t copy_policy; + uint8_t reserved[2]; uint32_t pad[6]; } SheepdogObjRsp; @@ -151,7 +154,9 @@ typedef struct SheepdogVdiReq { uint32_t data_length; uint64_t vdi_size; uint32_t vdi_id; - uint32_t copies; + uint8_t copies; + uint8_t copy_policy; + uint8_t reserved[2]; uint32_t snapid; uint32_t pad[3]; } SheepdogVdiReq; @@ -222,6 +227,11 @@ static inline uint64_t data_oid_to_idx(uint64_t oid) return oid & (MAX_DATA_OBJS - 1); } +static inline uint32_t oid_to_vid(uint64_t oid) +{ + return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT; +} + static inline uint64_t vid_to_vdi_oid(uint32_t vid) { return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT); @@ -289,11 +299,14 @@ struct SheepdogAIOCB { Coroutine *coroutine; void (*aio_done_func)(SheepdogAIOCB *); - bool canceled; + bool cancelable; + bool *finished; int nr_pending; }; typedef struct BDRVSheepdogState { + BlockDriverState *bs; + SheepdogInode inode; uint32_t min_dirty_data_idx; @@ -313,8 +326,11 @@ typedef struct BDRVSheepdogState { Coroutine *co_recv; uint32_t aioreq_seq_num; + + /* Every aio request must be linked to either of these queues. */ QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head; + QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; } BDRVSheepdogState; static const char * sd_strerror(int err) @@ -403,6 +419,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; + acb->cancelable = false; QLIST_REMOVE(aio_req, aio_siblings); g_free(aio_req); @@ -411,23 +428,68 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) { - if (!acb->canceled) { - qemu_coroutine_enter(acb->coroutine, NULL); + qemu_coroutine_enter(acb->coroutine, NULL); + if (acb->finished) { + *acb->finished = true; } qemu_aio_release(acb); } +/* + * Check whether the specified acb can be canceled + * + * We can cancel aio when any request belonging to the acb is: + * - Not processed by the sheepdog server. + * - Not linked to the inflight queue. + */ +static bool sd_acb_cancelable(const SheepdogAIOCB *acb) +{ + BDRVSheepdogState *s = acb->common.bs->opaque; + AIOReq *aioreq; + + if (!acb->cancelable) { + return false; + } + + QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) { + if (aioreq->aiocb == acb) { + return false; + } + } + + return true; +} + static void sd_aio_cancel(BlockDriverAIOCB *blockacb) { SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; + BDRVSheepdogState *s = acb->common.bs->opaque; + AIOReq *aioreq, *next; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + if (sd_acb_cancelable(acb)) { + /* Remove outstanding requests from pending and failed queues. */ + QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings, + next) { + if (aioreq->aiocb == acb) { + free_aio_req(s, aioreq); + } + } + QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings, + next) { + if (aioreq->aiocb == acb) { + free_aio_req(s, aioreq); + } + } - /* - * Sheepdog cannot cancel the requests which are already sent to - * the servers, so we just complete the request with -EIO here. - */ - acb->ret = -EIO; - qemu_coroutine_enter(acb->coroutine, NULL); - acb->canceled = true; + assert(acb->nr_pending == 0); + sd_finish_aiocb(acb); + return; + } + qemu_aio_wait(); + } } static const AIOCBInfo sd_aiocb_info = { @@ -448,7 +510,8 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->nb_sectors = nb_sectors; acb->aio_done_func = NULL; - acb->canceled = false; + acb->cancelable = true; + acb->finished = NULL; acb->coroutine = qemu_coroutine_self(); acb->ret = 0; acb->nr_pending = 0; @@ -489,13 +552,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, int ret; ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); - if (ret < sizeof(*hdr)) { + if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); return ret; } ret = qemu_co_send(sockfd, data, *wlen); - if (ret < *wlen) { + if (ret != *wlen) { error_report("failed to send a req, %s", strerror(errno)); } @@ -541,7 +604,7 @@ static coroutine_fn void do_co_req(void *opaque) qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); - if (ret < sizeof(*hdr)) { + if (ret != sizeof(*hdr)) { error_report("failed to get a rsp, %s", strerror(errno)); ret = -errno; goto out; @@ -553,7 +616,7 @@ static coroutine_fn void do_co_req(void *opaque) if (*rlen) { ret = qemu_co_recv(sockfd, data, *rlen); - if (ret < *rlen) { + if (ret != *rlen) { error_report("failed to get the data, %s", strerror(errno)); ret = -errno; goto out; @@ -596,11 +659,13 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, return srco.ret; } -static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type); -static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); - +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); +static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); +static int get_sheep_fd(BDRVSheepdogState *s); +static void co_write_request(void *opaque); static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) { @@ -623,22 +688,59 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) { AIOReq *aio_req; SheepdogAIOCB *acb; - int ret; while ((aio_req = find_pending_req(s, oid)) != NULL) { acb = aio_req->aiocb; /* move aio_req from pending list to inflight one */ QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, acb->qiov->iov, - acb->qiov->niov, false, acb->aiocb_type); - if (ret < 0) { - error_report("add_aio_request is failed"); - free_aio_req(s, aio_req); - if (!acb->nr_pending) { - sd_finish_aiocb(acb); - } + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false, + acb->aiocb_type); + } +} + +static coroutine_fn void reconnect_to_sdog(void *opaque) +{ + BDRVSheepdogState *s = opaque; + AIOReq *aio_req, *next; + + qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + close(s->fd); + s->fd = -1; + + /* Wait for outstanding write requests to be completed. */ + while (s->co_send != NULL) { + co_write_request(opaque); + } + + /* Try to reconnect the sheepdog server every one second. */ + while (s->fd < 0) { + s->fd = get_sheep_fd(s); + if (s->fd < 0) { + DPRINTF("Wait for connection to be established\n"); + co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME, + 1000000000ULL); } + }; + + /* + * Now we have to resend all the request in the inflight queue. However, + * resend_aioreq() can yield and newly created requests can be added to the + * inflight queue before the coroutine is resumed. To avoid mixing them, we + * have to move all the inflight requests to the failed queue before + * resend_aioreq() is called. + */ + QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) { + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings); + } + + /* Resend all the failed aio requests. */ + while (!QLIST_EMPTY(&s->failed_aio_head)) { + aio_req = QLIST_FIRST(&s->failed_aio_head); + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); + resend_aioreq(s, aio_req); } } @@ -658,15 +760,11 @@ static void coroutine_fn aio_read_response(void *opaque) SheepdogAIOCB *acb; uint64_t idx; - if (QLIST_EMPTY(&s->inflight_aio_head)) { - goto out; - } - /* read a header */ ret = qemu_co_recv(fd, &rsp, sizeof(rsp)); - if (ret < 0) { + if (ret != sizeof(rsp)) { error_report("failed to get the header, %s", strerror(errno)); - goto out; + goto err; } /* find the right aio_req from the inflight aio list */ @@ -677,7 +775,7 @@ static void coroutine_fn aio_read_response(void *opaque) } if (!aio_req) { error_report("cannot find aio_req %x", rsp.id); - goto out; + goto err; } acb = aio_req->aiocb; @@ -715,9 +813,9 @@ static void coroutine_fn aio_read_response(void *opaque) case AIOCB_READ_UDATA: ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, aio_req->iov_offset, rsp.data_length); - if (ret < 0) { + if (ret != rsp.data_length) { error_report("failed to get the data, %s", strerror(errno)); - goto out; + goto err; } break; case AIOCB_FLUSH_CACHE: @@ -748,11 +846,20 @@ static void coroutine_fn aio_read_response(void *opaque) case SD_RES_SUCCESS: break; case SD_RES_READONLY: - ret = resend_aioreq(s, aio_req); - if (ret == SD_RES_SUCCESS) { - goto out; + if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) { + ret = reload_inode(s, 0, ""); + if (ret < 0) { + goto err; + } } - /* fall through */ + if (is_data_obj(aio_req->oid)) { + aio_req->oid = vid_to_data_oid(s->inode.vdi_id, + data_oid_to_idx(aio_req->oid)); + } else { + aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); + } + resend_aioreq(s, aio_req); + goto out; default: acb->ret = -EIO; error_report("%s", sd_strerror(rsp.result)); @@ -769,6 +876,10 @@ static void coroutine_fn aio_read_response(void *opaque) } out: s->co_recv = NULL; + return; +err: + s->co_recv = NULL; + reconnect_to_sdog(opaque); } static void co_read_response(void *opaque) @@ -997,7 +1108,7 @@ out: return ret; } -static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type) { @@ -1059,29 +1170,25 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, /* send a header */ ret = qemu_co_send(s->fd, &hdr, sizeof(hdr)); - if (ret < 0) { - qemu_co_mutex_unlock(&s->lock); + if (ret != sizeof(hdr)) { error_report("failed to send a req, %s", strerror(errno)); - return -errno; + goto out; } if (wlen) { ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); - if (ret < 0) { - qemu_co_mutex_unlock(&s->lock); + if (ret != wlen) { error_report("failed to send a data, %s", strerror(errno)); - return -errno; } } - +out: socket_set_cork(s->fd, 0); qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s); + s->co_send = NULL; qemu_co_mutex_unlock(&s->lock); - - return 0; } -static int read_write_object(int fd, char *buf, uint64_t oid, int copies, +static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool write, bool create, uint32_t cache_flags) { @@ -1129,7 +1236,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } } -static int read_object(int fd, char *buf, uint64_t oid, int copies, +static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, uint32_t cache_flags) { @@ -1137,7 +1244,7 @@ static int read_object(int fd, char *buf, uint64_t oid, int copies, false, cache_flags); } -static int write_object(int fd, char *buf, uint64_t oid, int copies, +static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool create, uint32_t cache_flags) { @@ -1181,51 +1288,62 @@ out: return ret; } -static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) +/* Return true if the specified request is linked to the pending list. */ +static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) { - SheepdogAIOCB *acb = aio_req->aiocb; - bool create = false; - int ret; - - ret = reload_inode(s, 0, ""); - if (ret < 0) { - return ret; + AIOReq *areq; + QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { + if (areq != aio_req && areq->oid == aio_req->oid) { + /* + * Sheepdog cannot handle simultaneous create requests to the same + * object, so we cannot send the request until the previous request + * finishes. + */ + DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid); + aio_req->flags = 0; + aio_req->base_oid = 0; + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); + return true; + } } - aio_req->oid = vid_to_data_oid(s->inode.vdi_id, - data_oid_to_idx(aio_req->oid)); + return false; +} + +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) +{ + SheepdogAIOCB *acb = aio_req->aiocb; + bool create = false; /* check whether this request becomes a CoW one */ - if (acb->aiocb_type == AIOCB_WRITE_UDATA) { + if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { int idx = data_oid_to_idx(aio_req->oid); - AIOReq *areq; - if (s->inode.data_vdi_id[idx] == 0) { - create = true; - goto out; - } if (is_data_obj_writable(&s->inode, idx)) { goto out; } - /* link to the pending list if there is another CoW request to - * the same object */ - QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { - if (areq != aio_req && areq->oid == aio_req->oid) { - DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid); - QLIST_REMOVE(aio_req, aio_siblings); - QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); - return SD_RES_SUCCESS; - } + if (check_simultaneous_create(s, aio_req)) { + return; } - aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); - aio_req->flags |= SD_FLAG_CMD_COW; + if (s->inode.data_vdi_id[idx]) { + aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); + aio_req->flags |= SD_FLAG_CMD_COW; + } create = true; } out: - return add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - create, acb->aiocb_type); + if (is_data_obj(aio_req->oid)) { + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + acb->aiocb_type); + } else { + struct iovec iov; + iov.iov_base = &s->inode; + iov.iov_len = sizeof(s->inode); + add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + } } /* TODO Convert to fine grained options */ @@ -1255,6 +1373,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; const char *filename; + s->bs = bs; + opts = qemu_opts_create_nofail(&runtime_opts); qemu_opts_absorb_qdict(opts, options, &local_err); if (error_is_set(&local_err)) { @@ -1268,6 +1388,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, QLIST_INIT(&s->inflight_aio_head); QLIST_INIT(&s->pending_aio_head); + QLIST_INIT(&s->failed_aio_head); s->fd = -1; memset(vdi, 0, sizeof(vdi)); @@ -1344,7 +1465,8 @@ out: } static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, - uint32_t base_vid, uint32_t *vdi_id, int snapshot) + uint32_t base_vid, uint32_t *vdi_id, int snapshot, + uint8_t copy_policy) { SheepdogVdiReq hdr; SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; @@ -1374,6 +1496,7 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, hdr.data_length = wlen; hdr.vdi_size = vdi_size; + hdr.copy_policy = copy_policy; ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); @@ -1526,7 +1649,8 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, bdrv_unref(bs); } - ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0); + /* TODO: allow users to specify copy number */ + ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0, 0); if (!prealloc || ret) { goto out; } @@ -1621,7 +1745,6 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) */ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) { - int ret; BDRVSheepdogState *s = acb->common.bs->opaque; struct iovec iov; AIOReq *aio_req; @@ -1643,18 +1766,13 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), data_len, offset, 0, 0, offset); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); - if (ret) { - free_aio_req(s, aio_req); - acb->ret = -EIO; - goto out; - } + add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); acb->aio_done_func = sd_finish_aiocb; acb->aiocb_type = AIOCB_WRITE_UDATA; return; } -out: + sd_finish_aiocb(acb); } @@ -1716,7 +1834,7 @@ static int sd_create_branch(BDRVSheepdogState *s) */ deleted = sd_delete(s); ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, - !deleted); + !deleted, s->inode.copy_policy); if (ret) { goto out; } @@ -1840,35 +1958,16 @@ static int coroutine_fn sd_co_rw_vector(void *p) } aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); if (create) { - AIOReq *areq; - QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { - if (areq->oid == oid) { - /* - * Sheepdog cannot handle simultaneous create - * requests to the same object. So we cannot send - * the request until the previous request - * finishes. - */ - aio_req->flags = 0; - aio_req->base_oid = 0; - QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, - aio_siblings); - goto done; - } + if (check_simultaneous_create(s, aio_req)) { + goto done; } } - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - create, acb->aiocb_type); - if (ret < 0) { - error_report("add_aio_request is failed"); - free_aio_req(s, aio_req); - acb->ret = -EIO; - goto out; - } + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + acb->aiocb_type); done: offset = 0; idx++; @@ -1936,7 +2035,6 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) BDRVSheepdogState *s = bs->opaque; SheepdogAIOCB *acb; AIOReq *aio_req; - int ret; if (s->cache_flags != SD_FLAG_CMD_CACHE) { return 0; @@ -1949,13 +2047,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), 0, 0, 0, 0, 0); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); - if (ret < 0) { - error_report("add_aio_request is failed"); - free_aio_req(s, aio_req); - qemu_aio_release(acb); - return ret; - } + add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); qemu_coroutine_yield(); return acb->ret; @@ -2006,7 +2098,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, - 1); + 1, s->inode.copy_policy); if (ret < 0) { error_report("failed to create inode for snapshot. %s", strerror(errno)); diff --git a/block/vmdk.c b/block/vmdk.c index 32ec8b7766..a7ebd0f125 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -106,6 +106,7 @@ typedef struct VmdkExtent { uint32_t l2_cache_counts[L2_CACHE_SIZE]; int64_t cluster_sectors; + char *type; } VmdkExtent; typedef struct BDRVVmdkState { @@ -113,11 +114,13 @@ typedef struct BDRVVmdkState { uint64_t desc_offset; bool cid_updated; bool cid_checked; + uint32_t cid; uint32_t parent_cid; int num_extents; /* Extent array with num_extents entries, ascend ordered by address */ VmdkExtent *extents; Error *migration_blocker; + char *create_type; } BDRVVmdkState; typedef struct VmdkMetaData { @@ -214,6 +217,7 @@ static void vmdk_free_extents(BlockDriverState *bs) g_free(e->l1_table); g_free(e->l2_cache); g_free(e->l1_backup_table); + g_free(e->type); if (e->file != bs->file) { bdrv_unref(e->file); } @@ -534,6 +538,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, uint32_t l1_size, l1_entry_sectors; VMDK4Header header; VmdkExtent *extent; + BDRVVmdkState *s = bs->opaque; int64_t l1_backup_offset = 0; ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); @@ -549,6 +554,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, } } + if (!s->create_type) { + s->create_type = g_strdup("monolithicSparse"); + } + if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { /* * The footer takes precedence over the header, so read it in. The @@ -709,6 +718,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, int64_t flat_offset; char extent_path[PATH_MAX]; BlockDriverState *extent_file; + BDRVVmdkState *s = bs->opaque; + VmdkExtent *extent; while (*p) { /* parse extent line: @@ -751,7 +762,6 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, /* save to extents array */ if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) { /* FLAT extent */ - VmdkExtent *extent; ret = vmdk_add_extent(bs, extent_file, true, sectors, 0, 0, 0, 0, 0, &extent, errp); @@ -766,10 +776,12 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, bdrv_unref(extent_file); return ret; } + extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); return -ENOTSUP; } + extent->type = g_strdup(type); next_line: /* move to next line */ while (*p) { @@ -817,6 +829,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, ret = -ENOTSUP; goto exit; } + s->create_type = g_strdup(ct); s->desc_offset = 0; ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp); exit: @@ -843,6 +856,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, if (ret) { goto fail; } + s->cid = vmdk_read_cid(bs, 0); s->parent_cid = vmdk_read_cid(bs, 1); qemu_co_mutex_init(&s->lock); @@ -855,6 +869,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, return 0; fail: + g_free(s->create_type); + s->create_type = NULL; vmdk_free_extents(bs); return ret; } @@ -1766,6 +1782,7 @@ static void vmdk_close(BlockDriverState *bs) BDRVVmdkState *s = bs->opaque; vmdk_free_extents(bs); + g_free(s->create_type); migrate_del_blocker(s->migration_blocker); error_free(s->migration_blocker); @@ -1827,6 +1844,54 @@ static int vmdk_has_zero_init(BlockDriverState *bs) return 1; } +static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) +{ + ImageInfo *info = g_new0(ImageInfo, 1); + + *info = (ImageInfo){ + .filename = g_strdup(extent->file->filename), + .format = g_strdup(extent->type), + .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, + .compressed = extent->compressed, + .has_compressed = extent->compressed, + .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE, + .has_cluster_size = !extent->flat, + }; + + return info; +} + +static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) +{ + int i; + BDRVVmdkState *s = bs->opaque; + ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); + ImageInfoList **next; + + *spec_info = (ImageInfoSpecific){ + .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK, + { + .vmdk = g_new0(ImageInfoSpecificVmdk, 1), + }, + }; + + *spec_info->vmdk = (ImageInfoSpecificVmdk) { + .create_type = g_strdup(s->create_type), + .cid = s->cid, + .parent_cid = s->parent_cid, + }; + + next = &spec_info->vmdk->extents; + for (i = 0; i < s->num_extents; i++) { + *next = g_new0(ImageInfoList, 1); + (*next)->value = vmdk_get_extent_info(&s->extents[i]); + (*next)->next = NULL; + next = &(*next)->next; + } + + return spec_info; +} + static QEMUOptionParameter vmdk_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1879,6 +1944,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_co_get_block_status = vmdk_co_get_block_status, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, + .bdrv_get_specific_info = vmdk_get_specific_info, .create_options = vmdk_create_options, }; diff --git a/block/vpc.c b/block/vpc.c index b5dca3961e..627d11cb9b 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -260,6 +260,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } } + if (s->free_data_block_offset > bdrv_getlength(bs->file)) { + error_setg(errp, "block-vpc: free_data_block_offset points after " + "the end of file. The image has been truncated."); + ret = -EINVAL; + goto fail; + } + s->last_bitmap_offset = (int64_t) -1; #ifdef CACHE |