diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2012-09-25 16:06:16 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-09-25 16:06:16 -0500 |
commit | 444dbc381b94f5b54da521df61e751f28b00ce88 (patch) | |
tree | 81801cec4db20dfd55d32159b6ae673244c4fd57 | |
parent | 3988475b9b7fa251b00a29b076761d8c1c7e64dc (diff) | |
parent | dc1c13d96912731d4c7c7e13d31c93b8735f1203 (diff) |
Merge remote-tracking branch 'kwolf/for-anthony' into staging
* kwolf/for-anthony:
block: remove keep_read_only flag from BlockDriverState struct
block: convert bdrv_commit() to use bdrv_reopen()
block: vpc image file reopen
block: vdi image file reopen
block: vmdk image file reopen
block: qcow image file reopen
block: qcow2 image file reopen
block: qed image file reopen
block: raw image file reopen
block: raw-posix image file reopen
block: purge s->aligned_buf and s->aligned_buf_size from raw-posix.c
block: use BDRV_O_NOCACHE instead of s->aligned_buf in raw-posix.c
block: do not parse BDRV_O_CACHE_WB in block drivers
block: move open flag parsing in raw block drivers to helper functions
block: move aio initialization into a helper function
block: Framework for reopening files safely
block: make bdrv_set_enable_write_cache() modify open_flags
block: correctly set the keep_read_only flag
blockdev: preserve readonly and snapshot states across media changes
-rw-r--r-- | block.c | 299 | ||||
-rw-r--r-- | block.h | 18 | ||||
-rw-r--r-- | block/iscsi.c | 4 | ||||
-rw-r--r-- | block/qcow.c | 10 | ||||
-rw-r--r-- | block/qcow2.c | 10 | ||||
-rw-r--r-- | block/qed.c | 9 | ||||
-rw-r--r-- | block/raw-posix.c | 225 | ||||
-rw-r--r-- | block/raw-win32.c | 40 | ||||
-rw-r--r-- | block/raw.c | 10 | ||||
-rw-r--r-- | block/rbd.c | 6 | ||||
-rw-r--r-- | block/sheepdog.c | 14 | ||||
-rw-r--r-- | block/vdi.c | 7 | ||||
-rw-r--r-- | block/vmdk.c | 35 | ||||
-rw-r--r-- | block/vpc.c | 7 | ||||
-rw-r--r-- | block_int.h | 9 | ||||
-rw-r--r-- | blockdev.c | 2 |
16 files changed, 563 insertions, 142 deletions
@@ -668,7 +668,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, open_flags |= BDRV_O_RDWR; } - bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR); + bs->read_only = !(open_flags & BDRV_O_RDWR); /* Open the image, either directly or using a protocol */ if (drv->bdrv_file_open) { @@ -808,6 +808,10 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, goto unlink_and_fail; } + if (flags & BDRV_O_RDWR) { + flags |= BDRV_O_ALLOW_RDWR; + } + /* Open the image */ ret = bdrv_open_common(bs, filename, flags, drv); if (ret < 0) { @@ -837,12 +841,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, bdrv_close(bs); return ret; } - if (bs->is_temporary) { - bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR); - } else { - /* base image inherits from "parent" */ - bs->backing_hd->keep_read_only = bs->keep_read_only; - } } if (!bdrv_key_required(bs)) { @@ -863,6 +861,238 @@ unlink_and_fail: return ret; } +typedef struct BlockReopenQueueEntry { + bool prepared; + BDRVReopenState state; + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; +} BlockReopenQueueEntry; + +/* + * Adds a BlockDriverState to a simple queue for an atomic, transactional + * reopen of multiple devices. + * + * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT + * already performed, or alternatively may be NULL a new BlockReopenQueue will + * be created and initialized. This newly created BlockReopenQueue should be + * passed back in for subsequent calls that are intended to be of the same + * atomic 'set'. + * + * bs is the BlockDriverState to add to the reopen queue. + * + * flags contains the open flags for the associated bs + * + * returns a pointer to bs_queue, which is either the newly allocated + * bs_queue, or the existing bs_queue being used. + * + */ +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, int flags) +{ + assert(bs != NULL); + + BlockReopenQueueEntry *bs_entry; + if (bs_queue == NULL) { + bs_queue = g_new0(BlockReopenQueue, 1); + QSIMPLEQ_INIT(bs_queue); + } + + if (bs->file) { + bdrv_reopen_queue(bs_queue, bs->file, flags); + } + + bs_entry = g_new0(BlockReopenQueueEntry, 1); + QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); + + bs_entry->state.bs = bs; + bs_entry->state.flags = flags; + + return bs_queue; +} + +/* + * Reopen multiple BlockDriverStates atomically & transactionally. + * + * The queue passed in (bs_queue) must have been built up previous + * via bdrv_reopen_queue(). + * + * Reopens all BDS specified in the queue, with the appropriate + * flags. All devices are prepared for reopen, and failure of any + * device will cause all device changes to be abandonded, and intermediate + * data cleaned up. + * + * If all devices prepare successfully, then the changes are committed + * to all devices. + * + */ +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) +{ + int ret = -1; + BlockReopenQueueEntry *bs_entry, *next; + Error *local_err = NULL; + + assert(bs_queue != NULL); + + bdrv_drain_all(); + + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { + error_propagate(errp, local_err); + goto cleanup; + } + bs_entry->prepared = true; + } + + /* If we reach this point, we have success and just need to apply the + * changes + */ + QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + bdrv_reopen_commit(&bs_entry->state); + } + + ret = 0; + +cleanup: + QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (ret && bs_entry->prepared) { + bdrv_reopen_abort(&bs_entry->state); + } + g_free(bs_entry); + } + g_free(bs_queue); + return ret; +} + + +/* Reopen a single BlockDriverState with the specified flags. */ +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) +{ + int ret = -1; + Error *local_err = NULL; + BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); + + ret = bdrv_reopen_multiple(queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + } + return ret; +} + + +/* + * Prepares a BlockDriverState for reopen. All changes are staged in the + * 'opaque' field of the BDRVReopenState, which is used and allocated by + * the block driver layer .bdrv_reopen_prepare() + * + * bs is the BlockDriverState to reopen + * flags are the new open flags + * queue is the reopen queue + * + * Returns 0 on success, non-zero on error. On error errp will be set + * as well. + * + * On failure, bdrv_reopen_abort() will be called to clean up any data. + * It is the responsibility of the caller to then call the abort() or + * commit() for any other BDS that have been left in a prepare() state + * + */ +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, + Error **errp) +{ + int ret = -1; + Error *local_err = NULL; + BlockDriver *drv; + + assert(reopen_state != NULL); + assert(reopen_state->bs->drv != NULL); + drv = reopen_state->bs->drv; + + /* if we are to stay read-only, do not allow permission change + * to r/w */ + if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && + reopen_state->flags & BDRV_O_RDWR) { + error_set(errp, QERR_DEVICE_IS_READ_ONLY, + reopen_state->bs->device_name); + goto error; + } + + + ret = bdrv_flush(reopen_state->bs); + if (ret) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", + strerror(-ret)); + goto error; + } + + if (drv->bdrv_reopen_prepare) { + ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); + if (ret) { + if (local_err != NULL) { + error_propagate(errp, local_err); + } else { + error_set(errp, QERR_OPEN_FILE_FAILED, + reopen_state->bs->filename); + } + goto error; + } + } else { + /* It is currently mandatory to have a bdrv_reopen_prepare() + * handler for each supported drv. */ + error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, + drv->format_name, reopen_state->bs->device_name, + "reopening of file"); + ret = -1; + goto error; + } + + ret = 0; + +error: + return ret; +} + +/* + * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and + * makes them final by swapping the staging BlockDriverState contents into + * the active BlockDriverState contents. + */ +void bdrv_reopen_commit(BDRVReopenState *reopen_state) +{ + BlockDriver *drv; + + assert(reopen_state != NULL); + drv = reopen_state->bs->drv; + assert(drv != NULL); + + /* If there are any driver level actions to take */ + if (drv->bdrv_reopen_commit) { + drv->bdrv_reopen_commit(reopen_state); + } + + /* set BDS specific flags now */ + reopen_state->bs->open_flags = reopen_state->flags; + reopen_state->bs->enable_write_cache = !!(reopen_state->flags & + BDRV_O_CACHE_WB); + reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); +} + +/* + * Abort the reopen, and delete and free the staged changes in + * reopen_state + */ +void bdrv_reopen_abort(BDRVReopenState *reopen_state) +{ + BlockDriver *drv; + + assert(reopen_state != NULL); + drv = reopen_state->bs->drv; + assert(drv != NULL); + + if (drv->bdrv_reopen_abort) { + drv->bdrv_reopen_abort(reopen_state); + } +} + + void bdrv_close(BlockDriverState *bs) { bdrv_flush(bs); @@ -1269,13 +1499,11 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) int bdrv_commit(BlockDriverState *bs) { BlockDriver *drv = bs->drv; - BlockDriver *backing_drv; int64_t sector, total_sectors; int n, ro, open_flags; - int ret = 0, rw_ret = 0; + int ret = 0; uint8_t *buf; char filename[1024]; - BlockDriverState *bs_rw, *bs_ro; if (!drv) return -ENOMEDIUM; @@ -1284,42 +1512,18 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } - if (bs->backing_hd->keep_read_only) { - return -EACCES; - } - if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { return -EBUSY; } - backing_drv = bs->backing_hd->drv; ro = bs->backing_hd->read_only; strncpy(filename, bs->backing_hd->filename, sizeof(filename)); open_flags = bs->backing_hd->open_flags; if (ro) { - /* re-open as RW */ - bdrv_delete(bs->backing_hd); - bs->backing_hd = NULL; - bs_rw = bdrv_new(""); - rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, - backing_drv); - if (rw_ret < 0) { - bdrv_delete(bs_rw); - /* try to re-open read-only */ - bs_ro = bdrv_new(""); - ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, - backing_drv); - if (ret < 0) { - bdrv_delete(bs_ro); - /* drive not functional anymore */ - bs->drv = NULL; - return ret; - } - bs->backing_hd = bs_ro; - return rw_ret; + if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { + return -EACCES; } - bs->backing_hd = bs_rw; } total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; @@ -1356,20 +1560,8 @@ ro_cleanup: g_free(buf); if (ro) { - /* re-open as RO */ - bdrv_delete(bs->backing_hd); - bs->backing_hd = NULL; - bs_ro = bdrv_new(""); - ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, - backing_drv); - if (ret < 0) { - bdrv_delete(bs_ro); - /* drive not functional anymore */ - bs->drv = NULL; - return ret; - } - bs->backing_hd = bs_ro; - bs->backing_hd->keep_read_only = 0; + /* ignoring error return here */ + bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); } return ret; @@ -2168,6 +2360,13 @@ int bdrv_enable_write_cache(BlockDriverState *bs) void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) { bs->enable_write_cache = wce; + + /* so a reopen() will preserve wce */ + if (wce) { + bs->open_flags |= BDRV_O_CACHE_WB; + } else { + bs->open_flags &= ~BDRV_O_CACHE_WB; + } } int bdrv_is_encrypted(BlockDriverState *bs) @@ -80,6 +80,7 @@ typedef struct BlockDevOps { #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ #define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */ #define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) @@ -96,6 +97,15 @@ typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP } BlockQMPEventAction; +typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + void *opaque; +} BDRVReopenState; + + void bdrv_iostatus_enable(BlockDriverState *bs); void bdrv_iostatus_reset(BlockDriverState *bs); void bdrv_iostatus_disable(BlockDriverState *bs); @@ -130,6 +140,14 @@ int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags); int bdrv_open(BlockDriverState *bs, const char *filename, int flags, BlockDriver *drv); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, int flags); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); +void bdrv_reopen_commit(BDRVReopenState *reopen_state); +void bdrv_reopen_abort(BDRVReopenState *reopen_state); void bdrv_close(BlockDriverState *bs); int bdrv_attach_dev(BlockDriverState *bs, void *dev); void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev); diff --git a/block/iscsi.c b/block/iscsi.c index fb001b955c..d0b1a10ee4 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -262,10 +262,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, acb->task->xfer_dir = SCSI_XFER_WRITE; acb->task->cdb_size = 16; acb->task->cdb[0] = 0x8a; - if (!(bs->open_flags & BDRV_O_CACHE_WB)) { - /* set FUA on writes when cache mode is write through */ - acb->task->cdb[1] |= 0x04; - } lba = sector_qemu2lun(sector_num, iscsilun); *(uint32_t *)&acb->task->cdb[2] = htonl(lba >> 32); *(uint32_t *)&acb->task->cdb[6] = htonl(lba & 0xffffffff); diff --git a/block/qcow.c b/block/qcow.c index 7b5ab87d2d..b239c82ae0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -197,6 +197,15 @@ static int qcow_open(BlockDriverState *bs, int flags) return ret; } + +/* We have nothing to do for QCOW reopen, stubs just return + * success */ +static int qcow_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int qcow_set_key(BlockDriverState *bs, const char *key) { BDRVQcowState *s = bs->opaque; @@ -868,6 +877,7 @@ static BlockDriver bdrv_qcow = { .bdrv_probe = qcow_probe, .bdrv_open = qcow_open, .bdrv_close = qcow_close, + .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, .bdrv_co_readv = qcow_co_readv, diff --git a/block/qcow2.c b/block/qcow2.c index 8f183f1465..aa5e603cd3 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -52,6 +52,7 @@ typedef struct { uint32_t magic; uint32_t len; } QCowExtension; + #define QCOW2_EXT_MAGIC_END 0 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 @@ -558,6 +559,14 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) return 0; } +/* We have nothing to do for QCOW2 reopen, stubs just return + * success */ +static int qcow2_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -1679,6 +1688,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_probe = qcow2_probe, .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_create = qcow2_create, .bdrv_co_is_allocated = qcow2_co_is_allocated, .bdrv_set_key = qcow2_set_key, diff --git a/block/qed.c b/block/qed.c index 21cb239870..6c182ca917 100644 --- a/block/qed.c +++ b/block/qed.c @@ -505,6 +505,14 @@ out: return ret; } +/* We have nothing to do for QED reopen, stubs just return + * success */ +static int bdrv_qed_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static void bdrv_qed_close(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; @@ -1564,6 +1572,7 @@ static BlockDriver bdrv_qed = { .bdrv_rebind = bdrv_qed_rebind, .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, + .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, .bdrv_create = bdrv_qed_create, .bdrv_co_is_allocated = bdrv_qed_co_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty, diff --git a/block/raw-posix.c b/block/raw-posix.c index 6be20b1925..28d439fa81 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -133,13 +133,19 @@ typedef struct BDRVRawState { int use_aio; void *aio_ctx; #endif - uint8_t *aligned_buf; - unsigned aligned_buf_size; #ifdef CONFIG_XFS bool is_xfs : 1; #endif } BDRVRawState; +typedef struct BDRVRawReopenState { + int fd; + int open_flags; +#ifdef CONFIG_LINUX_AIO + int use_aio; +#endif +} BDRVRawReopenState; + static int fd_open(BlockDriverState *bs); static int64_t raw_getlength(BlockDriverState *bs); @@ -185,6 +191,57 @@ static int raw_normalize_devicepath(const char **filename) } #endif +static void raw_parse_flags(int bdrv_flags, int *open_flags) +{ + assert(open_flags != NULL); + + *open_flags |= O_BINARY; + *open_flags &= ~O_ACCMODE; + if (bdrv_flags & BDRV_O_RDWR) { + *open_flags |= O_RDWR; + } else { + *open_flags |= O_RDONLY; + } + + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((bdrv_flags & BDRV_O_NOCACHE)) { + *open_flags |= O_DIRECT; + } +} + +#ifdef CONFIG_LINUX_AIO +static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags) +{ + int ret = -1; + assert(aio_ctx != NULL); + assert(use_aio != NULL); + /* + * Currently Linux do AIO only for files opened with O_DIRECT + * specified so check NOCACHE flag too + */ + if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == + (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { + + /* if non-NULL, laio_init() has already been run */ + if (*aio_ctx == NULL) { + *aio_ctx = laio_init(); + if (!*aio_ctx) { + goto error; + } + } + *use_aio = 1; + } else { + *use_aio = 0; + } + + ret = 0; + +error: + return ret; +} +#endif + static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { @@ -196,20 +253,8 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return ret; } - s->open_flags = open_flags | O_BINARY; - s->open_flags &= ~O_ACCMODE; - if (bdrv_flags & BDRV_O_RDWR) { - s->open_flags |= O_RDWR; - } else { - s->open_flags |= O_RDONLY; - } - - /* Use O_DSYNC for write-through caching, no flags for write-back caching, - * and O_DIRECT for no caching. */ - if ((bdrv_flags & BDRV_O_NOCACHE)) - s->open_flags |= O_DIRECT; - if (!(bdrv_flags & BDRV_O_CACHE_WB)) - s->open_flags |= O_DSYNC; + s->open_flags = open_flags; + raw_parse_flags(bdrv_flags, &s->open_flags); s->fd = -1; fd = qemu_open(filename, s->open_flags, 0644); @@ -220,45 +265,17 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return ret; } s->fd = fd; - s->aligned_buf = NULL; - - if ((bdrv_flags & BDRV_O_NOCACHE)) { - /* - * Allocate a buffer for read/modify/write cycles. Chose the size - * pessimistically as we don't know the block size yet. - */ - s->aligned_buf_size = 32 * MAX_BLOCKSIZE; - s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size); - if (s->aligned_buf == NULL) { - goto out_close; - } - } /* We're falling back to POSIX AIO in some cases so init always */ if (paio_init() < 0) { - goto out_free_buf; + goto out_close; } #ifdef CONFIG_LINUX_AIO - /* - * Currently Linux do AIO only for files opened with O_DIRECT - * specified so check NOCACHE flag too - */ - if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == - (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) { - - s->aio_ctx = laio_init(); - if (!s->aio_ctx) { - goto out_free_buf; - } - s->use_aio = 1; - } else -#endif - { -#ifdef CONFIG_LINUX_AIO - s->use_aio = 0; -#endif + if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) { + goto out_close; } +#endif #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -268,8 +285,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, return 0; -out_free_buf: - qemu_vfree(s->aligned_buf); out_close: qemu_close(fd); return -errno; @@ -283,6 +298,109 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return raw_open_common(bs, filename, flags, 0); } +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRawState *s; + BDRVRawReopenState *raw_s; + int ret = 0; + + assert(state != NULL); + assert(state->bs != NULL); + + s = state->bs->opaque; + + state->opaque = g_malloc0(sizeof(BDRVRawReopenState)); + raw_s = state->opaque; + +#ifdef CONFIG_LINUX_AIO + raw_s->use_aio = s->use_aio; + + /* we can use s->aio_ctx instead of a copy, because the use_aio flag is + * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio() + * won't override aio_ctx if aio_ctx is non-NULL */ + if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) { + return -1; + } +#endif + + raw_parse_flags(state->flags, &raw_s->open_flags); + + raw_s->fd = -1; + + int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK; +#ifdef O_NOATIME + fcntl_flags |= O_NOATIME; +#endif + + if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) { + /* dup the original fd */ + /* TODO: use qemu fcntl wrapper */ +#ifdef F_DUPFD_CLOEXEC + raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0); +#else + raw_s->fd = dup(s->fd); + if (raw_s->fd != -1) { + qemu_set_cloexec(raw_s->fd); + } +#endif + if (raw_s->fd >= 0) { + ret = fcntl_setfl(raw_s->fd, raw_s->open_flags); + if (ret) { + qemu_close(raw_s->fd); + raw_s->fd = -1; + } + } + } + + /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */ + if (raw_s->fd == -1) { + assert(!(raw_s->open_flags & O_CREAT)); + raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags); + if (raw_s->fd == -1) { + ret = -1; + } + } + return ret; +} + + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawReopenState *raw_s = state->opaque; + BDRVRawState *s = state->bs->opaque; + + s->open_flags = raw_s->open_flags; + + qemu_close(s->fd); + s->fd = raw_s->fd; +#ifdef CONFIG_LINUX_AIO + s->use_aio = raw_s->use_aio; +#endif + + g_free(state->opaque); + state->opaque = NULL; +} + + +static void raw_reopen_abort(BDRVReopenState *state) +{ + BDRVRawReopenState *raw_s = state->opaque; + + /* nothing to do if NULL, we didn't get far enough */ + if (raw_s == NULL) { + return; + } + + if (raw_s->fd >= 0) { + qemu_close(raw_s->fd); + raw_s->fd = -1; + } + g_free(state->opaque); + state->opaque = NULL; +} + + /* XXX: use host sector size if necessary with: #ifdef DIOCGSECTORSIZE { @@ -330,7 +448,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * boundary. Check if this is the case or tell the low-level * driver that it needs to copy the buffer. */ - if (s->aligned_buf) { + if ((bs->open_flags & BDRV_O_NOCACHE)) { if (!qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO @@ -378,8 +496,6 @@ static void raw_close(BlockDriverState *bs) if (s->fd >= 0) { qemu_close(s->fd); s->fd = -1; - if (s->aligned_buf != NULL) - qemu_vfree(s->aligned_buf); } } @@ -735,6 +851,9 @@ static BlockDriver bdrv_file = { .instance_size = sizeof(BDRVRawState), .bdrv_probe = NULL, /* no probe for protocols */ .bdrv_file_open = raw_open, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, .bdrv_co_discard = raw_co_discard, diff --git a/block/raw-win32.c b/block/raw-win32.c index c56bf83375..78c830648b 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -77,6 +77,23 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) +{ + assert(access_flags != NULL); + assert(overlapped != NULL); + + if (flags & BDRV_O_RDWR) { + *access_flags = GENERIC_READ | GENERIC_WRITE; + } else { + *access_flags = GENERIC_READ; + } + + *overlapped = FILE_ATTRIBUTE_NORMAL; + if (flags & BDRV_O_NOCACHE) { + *overlapped |= FILE_FLAG_NO_BUFFERING; + } +} + static int raw_open(BlockDriverState *bs, const char *filename, int flags) { BDRVRawState *s = bs->opaque; @@ -85,17 +102,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) s->type = FTYPE_FILE; - if (flags & BDRV_O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } + raw_parse_flags(flags, &access_flags, &overlapped); - overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NOCACHE) - overlapped |= FILE_FLAG_NO_BUFFERING; - if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, overlapped, NULL); @@ -374,18 +382,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags) } s->type = find_device_type(bs, filename); - if (flags & BDRV_O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } + raw_parse_flags(flags, &access_flags, &overlapped); + create_flags = OPEN_EXISTING; - overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NOCACHE) - overlapped |= FILE_FLAG_NO_BUFFERING; - if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); diff --git a/block/raw.c b/block/raw.c index ff34ea41e7..253e949b8c 100644 --- a/block/raw.c +++ b/block/raw.c @@ -9,6 +9,14 @@ static int raw_open(BlockDriverState *bs, int flags) return 0; } +/* We have nothing to do for raw reopen, stubs just return + * success */ +static int raw_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -115,6 +123,8 @@ static BlockDriver bdrv_raw = { .bdrv_open = raw_open, .bdrv_close = raw_close, + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_co_readv = raw_co_readv, .bdrv_co_writev = raw_co_writev, .bdrv_co_is_allocated = raw_co_is_allocated, diff --git a/block/rbd.c b/block/rbd.c index 5a0f79fc8f..015a9db0ad 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -487,12 +487,6 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) rados_conf_set(s->cluster, "rbd_cache", "false"); } else { rados_conf_set(s->cluster, "rbd_cache", "true"); - if (!(flags & BDRV_O_CACHE_WB)) { - r = rados_conf_set(s->cluster, "rbd_cache_max_dirty", "0"); - if (r < 0) { - rados_conf_set(s->cluster, "rbd_cache", "false"); - } - } } if (strstr(conf, "conf=") == NULL) { diff --git a/block/sheepdog.c b/block/sheepdog.c index e0753ee9e5..4742f8ae6f 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1114,14 +1114,12 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) goto out; } - if (flags & BDRV_O_CACHE_WB) { - s->cache_enabled = 1; - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; - } + s->cache_enabled = 1; + s->flush_fd = connect_to_sdog(s->addr, s->port); + if (s->flush_fd < 0) { + error_report("failed to connect"); + ret = s->flush_fd; + goto out; } if (snapid || tag[0] != '\0') { diff --git a/block/vdi.c b/block/vdi.c index 550cf58a39..f35b12ec98 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -454,6 +454,12 @@ static int vdi_open(BlockDriverState *bs, int flags) return -1; } +static int vdi_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -761,6 +767,7 @@ static BlockDriver bdrv_vdi = { .bdrv_probe = vdi_probe, .bdrv_open = vdi_open, .bdrv_close = vdi_close, + .bdrv_reopen_prepare = vdi_reopen_prepare, .bdrv_create = vdi_create, .bdrv_co_is_allocated = vdi_co_is_allocated, .bdrv_make_empty = vdi_make_empty, diff --git a/block/vmdk.c b/block/vmdk.c index bba4c61a74..f2e861b074 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -300,6 +300,40 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) return 1; } +/* Queue extents, if any, for reopen() */ +static int vmdk_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVVmdkState *s; + int ret = -1; + int i; + VmdkExtent *e; + + assert(state != NULL); + assert(state->bs != NULL); + + if (queue == NULL) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "No reopen queue for VMDK extents"); + goto exit; + } + + s = state->bs->opaque; + + assert(s != NULL); + + for (i = 0; i < s->num_extents; i++) { + e = &s->extents[i]; + if (e->file != state->bs->file) { + bdrv_reopen_queue(queue, e->file, state->flags); + } + } + ret = 0; + +exit: + return ret; +} + static int vmdk_parent_open(BlockDriverState *bs) { char *p_name; @@ -1646,6 +1680,7 @@ static BlockDriver bdrv_vmdk = { .instance_size = sizeof(BDRVVmdkState), .bdrv_probe = vmdk_probe, .bdrv_open = vmdk_open, + .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_read = vmdk_co_read, .bdrv_write = vmdk_co_write, .bdrv_close = vmdk_close, diff --git a/block/vpc.c b/block/vpc.c index c0b82c4f57..b6bf52f140 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -265,6 +265,12 @@ static int vpc_open(BlockDriverState *bs, int flags) return err; } +static int vpc_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + /* * Returns the absolute byte offset of the given sector in the image file. * If the sector is not allocated, -1 is returned instead. @@ -783,6 +789,7 @@ static BlockDriver bdrv_vpc = { .bdrv_probe = vpc_probe, .bdrv_open = vpc_open, .bdrv_close = vpc_close, + .bdrv_reopen_prepare = vpc_reopen_prepare, .bdrv_create = vpc_create, .bdrv_read = vpc_co_read, diff --git a/block_int.h b/block_int.h index 4452f6f398..ac4245cb18 100644 --- a/block_int.h +++ b/block_int.h @@ -139,6 +139,13 @@ struct BlockDriver { int instance_size; int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); int (*bdrv_probe_device)(const char *filename); + + /* For handling image reopen for split or non-split files */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + int (*bdrv_open)(BlockDriverState *bs, int flags); int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags); int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num, @@ -268,7 +275,6 @@ struct BlockDriverState { int64_t total_sectors; /* if we are reading a disk image, give its size in sectors */ int read_only; /* if true, the media is read only */ - int keep_read_only; /* if true, the media was requested to stay read only */ int open_flags; /* flags used to open the file, re-used for re-open */ int encrypted; /* if true, the media is encrypted */ int valid_key; /* if true, a valid encryption key has been set */ @@ -336,6 +342,7 @@ struct BlockDriverState { /* long-running background operation */ BlockJob *job; + }; int get_tmp_filename(char *filename, int size); diff --git a/blockdev.c b/blockdev.c index 7c83baa353..e5d450f0bb 100644 --- a/blockdev.c +++ b/blockdev.c @@ -527,6 +527,8 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) if_name[type], mediastr, unit_id); } dinfo->bdrv = bdrv_new(dinfo->id); + dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0; + dinfo->bdrv->read_only = ro; dinfo->devaddr = devaddr; dinfo->type = type; dinfo->bus = bus_id; |