diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/backup.c | 20 | ||||
-rw-r--r-- | block/io.c | 12 | ||||
-rw-r--r-- | block/nfs.c | 38 | ||||
-rw-r--r-- | block/sheepdog.c | 168 |
4 files changed, 193 insertions, 45 deletions
diff --git a/block/backup.c b/block/backup.c index 965654d521..5696431711 100644 --- a/block/backup.c +++ b/block/backup.c @@ -89,7 +89,8 @@ static void cow_request_end(CowRequest *req) static int coroutine_fn backup_do_cow(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - bool *error_is_read) + bool *error_is_read, + bool is_write_notifier) { BackupBlockJob *job = (BackupBlockJob *)bs->job; CowRequest cow_request; @@ -129,8 +130,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, iov.iov_len = n * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, - &bounce_qiov); + if (is_write_notifier) { + ret = bdrv_co_no_copy_on_readv(bs, + start * BACKUP_SECTORS_PER_CLUSTER, + n, &bounce_qiov); + } else { + ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, + &bounce_qiov); + } if (ret < 0) { trace_backup_do_cow_read_fail(job, start, ret); if (error_is_read) { @@ -190,7 +197,7 @@ static int coroutine_fn backup_before_write_notify( assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - return backup_do_cow(req->bs, sector_num, nb_sectors, NULL); + return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true); } static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -303,7 +310,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) return ret; } ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + BACKUP_SECTORS_PER_CLUSTER, &error_is_read, + false); if ((ret < 0) && backup_error_action(job, error_is_read, -ret) == BLOCK_ERROR_ACTION_REPORT) { @@ -408,7 +416,7 @@ static void coroutine_fn backup_run(void *opaque) } /* FULL sync mode we copy the whole drive. */ ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false); if (ret < 0) { /* Depending on error action, fail now or retry cluster */ BlockErrorAction action = diff --git a/block/io.c b/block/io.c index d4bc83b33b..94e18e6a9d 100644 --- a/block/io.c +++ b/block/io.c @@ -932,7 +932,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, return ret; } - if (bs->copy_on_read) { + /* Don't do copy-on-read if we read data before write operation */ + if (bs->copy_on_read && !(flags & BDRV_REQ_NO_COPY_ON_READ)) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -1001,6 +1002,15 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); } +int coroutine_fn bdrv_co_no_copy_on_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + trace_bdrv_co_no_copy_on_readv(bs, sector_num, nb_sectors); + + return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, + BDRV_REQ_NO_COPY_ON_READ); +} + int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { diff --git a/block/nfs.c b/block/nfs.c index c026ff6883..887a98e3fc 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -43,6 +43,7 @@ typedef struct NFSClient { int events; bool has_zero_init; AioContext *aio_context; + blkcnt_t st_blocks; } NFSClient; typedef struct NFSRPC { @@ -374,6 +375,7 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, } ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE); + client->st_blocks = st.st_blocks; client->has_zero_init = S_ISREG(st.st_mode); goto out; fail: @@ -464,6 +466,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) NFSRPC task = {0}; struct stat st; + if (bdrv_is_read_only(bs) && + !(bs->open_flags & BDRV_O_NOCACHE)) { + return client->st_blocks * 512; + } + task.st = &st; if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb, &task) != 0) { @@ -475,7 +482,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) aio_poll(client->aio_context, true); } - return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize); + return (task.ret < 0 ? task.ret : st.st_blocks * 512); } static int nfs_file_truncate(BlockDriverState *bs, int64_t offset) @@ -484,6 +491,34 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset) return nfs_ftruncate(client->context, client->fh, offset); } +/* Note that this will not re-establish a connection with the NFS server + * - it is effectively a NOP. */ +static int nfs_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + NFSClient *client = state->bs->opaque; + struct stat st; + int ret = 0; + + if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) { + error_setg(errp, "Cannot open a read-only mount as read-write"); + return -EACCES; + } + + /* Update cache for read-only reopens */ + if (!(state->flags & BDRV_O_RDWR)) { + ret = nfs_fstat(client->context, client->fh, &st); + if (ret < 0) { + error_setg(errp, "Failed to fstat file: %s", + nfs_get_error(client->context)); + return ret; + } + client->st_blocks = st.st_blocks; + } + + return 0; +} + static BlockDriver bdrv_nfs = { .format_name = "nfs", .protocol_name = "nfs", @@ -499,6 +534,7 @@ static BlockDriver bdrv_nfs = { .bdrv_file_open = nfs_file_open, .bdrv_close = nfs_file_close, .bdrv_create = nfs_file_create, + .bdrv_reopen_prepare = nfs_reopen_prepare, .bdrv_co_readv = nfs_co_readv, .bdrv_co_writev = nfs_co_writev, diff --git a/block/sheepdog.c b/block/sheepdog.c index 67ca788d5c..e7e58b782c 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -28,7 +28,6 @@ #define SD_OP_READ_OBJ 0x02 #define SD_OP_WRITE_OBJ 0x03 /* 0x04 is used internally by Sheepdog */ -#define SD_OP_DISCARD_OBJ 0x05 #define SD_OP_NEW_VDI 0x11 #define SD_OP_LOCK_VDI 0x12 @@ -318,7 +317,7 @@ enum AIOCBState { AIOCB_DISCARD_OBJ, }; -#define AIOCBOverwrapping(x, y) \ +#define AIOCBOverlapping(x, y) \ (!(x->max_affect_data_idx < y->min_affect_data_idx \ || y->max_affect_data_idx < x->min_affect_data_idx)) @@ -342,6 +341,15 @@ struct SheepdogAIOCB { uint32_t min_affect_data_idx; uint32_t max_affect_data_idx; + /* + * The difference between affect_data_idx and dirty_data_idx: + * affect_data_idx represents range of index of all request types. + * dirty_data_idx represents range of index updated by COW requests. + * dirty_data_idx is used for updating an inode object. + */ + uint32_t min_dirty_data_idx; + uint32_t max_dirty_data_idx; + QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; }; @@ -351,9 +359,6 @@ typedef struct BDRVSheepdogState { SheepdogInode inode; - uint32_t min_dirty_data_idx; - uint32_t max_dirty_data_idx; - char name[SD_MAX_VDI_LEN]; bool is_snapshot; uint32_t cache_flags; @@ -373,10 +378,15 @@ typedef struct BDRVSheepdogState { QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; - CoQueue overwrapping_queue; + CoQueue overlapping_queue; QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head; } BDRVSheepdogState; +typedef struct BDRVSheepdogReopenState { + int fd; + int cache_flags; +} BDRVSheepdogReopenState; + static const char * sd_strerror(int err) { int i; @@ -556,6 +566,9 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE + acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size; + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; + return acb; } @@ -819,8 +832,8 @@ static void coroutine_fn aio_read_response(void *opaque) */ if (rsp.result == SD_RES_SUCCESS) { s->inode.data_vdi_id[idx] = s->inode.vdi_id; - s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); - s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); + acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx); + acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx); } } break; @@ -847,10 +860,6 @@ static void coroutine_fn aio_read_response(void *opaque) rsp.result = SD_RES_SUCCESS; s->discard_supported = false; break; - case SD_RES_SUCCESS: - idx = data_oid_to_idx(aio_req->oid); - s->inode.data_vdi_id[idx] = 0; - break; default: break; } @@ -1165,7 +1174,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, hdr.flags = SD_FLAG_CMD_WRITE | flags; break; case AIOCB_DISCARD_OBJ: - hdr.opcode = SD_OP_DISCARD_OBJ; + hdr.opcode = SD_OP_WRITE_OBJ; + hdr.flags = SD_FLAG_CMD_WRITE | flags; + s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0; + offset = offsetof(SheepdogInode, + data_vdi_id[data_oid_to_idx(oid)]); + oid = vid_to_vdi_oid(s->inode.vdi_id); + wlen = datalen = sizeof(uint32_t); break; } @@ -1466,13 +1481,11 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, } memcpy(&s->inode, buf, sizeof(s->inode)); - s->min_dirty_data_idx = UINT32_MAX; - s->max_dirty_data_idx = 0; bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); - qemu_co_queue_init(&s->overwrapping_queue); + qemu_co_queue_init(&s->overlapping_queue); qemu_opts_del(opts); g_free(buf); return 0; @@ -1486,6 +1499,68 @@ out: return ret; } +static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, + Error **errp) +{ + BDRVSheepdogState *s = state->bs->opaque; + BDRVSheepdogReopenState *re_s; + int ret = 0; + + re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1); + + re_s->cache_flags = SD_FLAG_CMD_CACHE; + if (state->flags & BDRV_O_NOCACHE) { + re_s->cache_flags = SD_FLAG_CMD_DIRECT; + } + + re_s->fd = get_sheep_fd(s, errp); + if (re_s->fd < 0) { + ret = re_s->fd; + return ret; + } + + return ret; +} + +static void sd_reopen_commit(BDRVReopenState *state) +{ + BDRVSheepdogReopenState *re_s = state->opaque; + BDRVSheepdogState *s = state->bs->opaque; + + if (s->fd) { + aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); + closesocket(s->fd); + } + + s->fd = re_s->fd; + s->cache_flags = re_s->cache_flags; + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + +static void sd_reopen_abort(BDRVReopenState *state) +{ + BDRVSheepdogReopenState *re_s = state->opaque; + BDRVSheepdogState *s = state->bs->opaque; + + if (re_s == NULL) { + return; + } + + if (re_s->fd) { + aio_set_fd_handler(s->aio_context, re_s->fd, NULL, NULL, NULL); + closesocket(re_s->fd); + } + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, Error **errp) { @@ -1922,16 +1997,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) AIOReq *aio_req; uint32_t offset, data_len, mn, mx; - mn = s->min_dirty_data_idx; - mx = s->max_dirty_data_idx; + mn = acb->min_dirty_data_idx; + mx = acb->max_dirty_data_idx; if (mn <= mx) { /* we need to update the vdi object. */ offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + mn * sizeof(s->inode.data_vdi_id[0]); data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); - s->min_dirty_data_idx = UINT32_MAX; - s->max_dirty_data_idx = 0; + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); @@ -2140,7 +2215,9 @@ static int coroutine_fn sd_co_rw_vector(void *p) } aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create, - old_oid, done); + old_oid, + acb->aiocb_type == AIOCB_DISCARD_OBJ ? + 0 : done); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, @@ -2157,12 +2234,12 @@ out: return 1; } -static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) +static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) { SheepdogAIOCB *cb; QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { - if (AIOCBOverwrapping(aiocb, cb)) { + if (AIOCBOverlapping(aiocb, cb)) { return true; } } @@ -2191,15 +2268,15 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, acb->aiocb_type = AIOCB_WRITE_UDATA; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2207,7 +2284,7 @@ retry: qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2224,15 +2301,15 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, acb->aio_done_func = sd_finish_aiocb; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2240,7 +2317,7 @@ retry: qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2576,28 +2653,36 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { SheepdogAIOCB *acb; - QEMUIOVector dummy; BDRVSheepdogState *s = bs->opaque; int ret; + QEMUIOVector discard_iov; + struct iovec iov; + uint32_t zero = 0; if (!s->discard_supported) { return 0; } - acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors); + memset(&discard_iov, 0, sizeof(discard_iov)); + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &zero; + iov.iov_len = sizeof(zero); + discard_iov.iov = &iov; + discard_iov.niov = 1; + acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors); acb->aiocb_type = AIOCB_DISCARD_OBJ; acb->aio_done_func = sd_finish_aiocb; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2605,7 +2690,7 @@ retry: qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2702,6 +2787,9 @@ static BlockDriver bdrv_sheepdog = { .instance_size = sizeof(BDRVSheepdogState), .bdrv_needs_filename = true, .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, @@ -2735,6 +2823,9 @@ static BlockDriver bdrv_sheepdog_tcp = { .instance_size = sizeof(BDRVSheepdogState), .bdrv_needs_filename = true, .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, @@ -2768,6 +2859,9 @@ static BlockDriver bdrv_sheepdog_unix = { .instance_size = sizeof(BDRVSheepdogState), .bdrv_needs_filename = true, .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, |