aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-05-14 17:52:46 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-05-14 17:52:46 +0100
commita9cb55a3562a16f7a4c22290f52e2313a3c05b6a (patch)
tree2aa25480f650c01fad03433f7abb864ad68b23e3 /block
parent7a9180b77eca258ab418ec00ab397392e70e55d9 (diff)
parent31be8a2a97ecba7d31a82932286489cac318e9e9 (diff)
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Block pull request * Support -drive cache.direct=off live migration for POSIX files # gpg: Signature made Sat 12 May 2018 10:27:51 BST # gpg: using RSA key 9CA4ABB381AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" # Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8 * remotes/stefanha/tags/block-pull-request: block/file-posix: add x-check-page-cache=on|off option block/file-posix: implement bdrv_co_invalidate_cache() on Linux checkpatch: reduce MAINTAINERS update message frequency checkpatch: emit a warning on file add/move/delete checkpatch: ignore email headers better checkpatch: check utf-8 content from a commit log when it's missing from charset checkpatch: add a --strict check for utf-8 in commit logs blockjob: drop block_job_pause/resume_all() Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/file-posix.c146
1 files changed, 144 insertions, 2 deletions
diff --git a/block/file-posix.c b/block/file-posix.c
index 3794c0007a..5a602cfe37 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -161,6 +161,7 @@ typedef struct BDRVRawState {
bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
+ bool check_cache_dropped;
PRManager *pr_mgr;
} BDRVRawState;
@@ -168,6 +169,7 @@ typedef struct BDRVRawState {
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
+ bool check_cache_dropped;
} BDRVRawReopenState;
static int fd_open(BlockDriverState *bs);
@@ -415,6 +417,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "id of persistent reservation manager object (default: none)",
},
+ {
+ .name = "x-check-cache-dropped",
+ .type = QEMU_OPT_BOOL,
+ .help = "check that page cache was dropped on live migration (default: off)"
+ },
{ /* end of list */ }
},
};
@@ -500,6 +507,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
}
+ s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
+ false);
+
s->open_flags = open_flags;
raw_parse_flags(bdrv_flags, &s->open_flags);
@@ -777,6 +787,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
{
BDRVRawState *s;
BDRVRawReopenState *rs;
+ QemuOpts *opts;
int ret = 0;
Error *local_err = NULL;
@@ -787,6 +798,19 @@ static int raw_reopen_prepare(BDRVReopenState *state,
state->opaque = g_new0(BDRVRawReopenState, 1);
rs = state->opaque;
+ rs->fd = -1;
+
+ /* Handle options changes */
+ opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, state->options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rs->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
+ s->check_cache_dropped);
if (s->type == FTYPE_CD) {
rs->open_flags |= O_NONBLOCK;
@@ -794,8 +818,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
raw_parse_flags(state->flags, &rs->open_flags);
- rs->fd = -1;
-
int fcntl_flags = O_APPEND | O_NONBLOCK;
#ifdef O_NOATIME
fcntl_flags |= O_NOATIME;
@@ -850,6 +872,8 @@ static int raw_reopen_prepare(BDRVReopenState *state,
}
}
+out:
+ qemu_opts_del(opts);
return ret;
}
@@ -858,6 +882,7 @@ static void raw_reopen_commit(BDRVReopenState *state)
BDRVRawReopenState *rs = state->opaque;
BDRVRawState *s = state->bs->opaque;
+ s->check_cache_dropped = rs->check_cache_dropped;
s->open_flags = rs->open_flags;
qemu_close(s->fd);
@@ -2236,6 +2261,120 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
return ret | BDRV_BLOCK_OFFSET_VALID;
}
+#if defined(__linux__)
+/* Verify that the file is not in the page cache */
+static void check_cache_dropped(BlockDriverState *bs, Error **errp)
+{
+ const size_t window_size = 128 * 1024 * 1024;
+ BDRVRawState *s = bs->opaque;
+ void *window = NULL;
+ size_t length = 0;
+ unsigned char *vec;
+ size_t page_size;
+ off_t offset;
+ off_t end;
+
+ /* mincore(2) page status information requires 1 byte per page */
+ page_size = sysconf(_SC_PAGESIZE);
+ vec = g_malloc(DIV_ROUND_UP(window_size, page_size));
+
+ end = raw_getlength(bs);
+
+ for (offset = 0; offset < end; offset += window_size) {
+ void *new_window;
+ size_t new_length;
+ size_t vec_end;
+ size_t i;
+ int ret;
+
+ /* Unmap previous window if size has changed */
+ new_length = MIN(end - offset, window_size);
+ if (new_length != length) {
+ munmap(window, length);
+ window = NULL;
+ length = 0;
+ }
+
+ new_window = mmap(window, new_length, PROT_NONE, MAP_PRIVATE,
+ s->fd, offset);
+ if (new_window == MAP_FAILED) {
+ error_setg_errno(errp, errno, "mmap failed");
+ break;
+ }
+
+ window = new_window;
+ length = new_length;
+
+ ret = mincore(window, length, vec);
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "mincore failed");
+ break;
+ }
+
+ vec_end = DIV_ROUND_UP(length, page_size);
+ for (i = 0; i < vec_end; i++) {
+ if (vec[i] & 0x1) {
+ error_setg(errp, "page cache still in use!");
+ break;
+ }
+ }
+ }
+
+ if (window) {
+ munmap(window, length);
+ }
+
+ g_free(vec);
+}
+#endif /* __linux__ */
+
+static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
+{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "The file descriptor is not open");
+ return;
+ }
+
+ if (s->open_flags & O_DIRECT) {
+ return; /* No host kernel page cache */
+ }
+
+#if defined(__linux__)
+ /* This sets the scene for the next syscall... */
+ ret = bdrv_co_flush(bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "flush failed");
+ return;
+ }
+
+ /* Linux does not invalidate pages that are dirty, locked, or mmapped by a
+ * process. These limitations are okay because we just fsynced the file,
+ * we don't use mmap, and the file should not be in use by other processes.
+ */
+ ret = posix_fadvise(s->fd, 0, 0, POSIX_FADV_DONTNEED);
+ if (ret != 0) { /* the return value is a positive errno */
+ error_setg_errno(errp, ret, "fadvise failed");
+ return;
+ }
+
+ if (s->check_cache_dropped) {
+ check_cache_dropped(bs, errp);
+ }
+#else /* __linux__ */
+ /* Do nothing. Live migration to a remote host with cache.direct=off is
+ * unsupported on other host operating systems. Cache consistency issues
+ * may occur but no error is reported here, partly because that's the
+ * historical behavior and partly because it's hard to differentiate valid
+ * configurations that should not cause errors.
+ */
+#endif /* !__linux__ */
+}
+
static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque)
@@ -2328,6 +2467,7 @@ BlockDriver bdrv_file = {
.bdrv_co_create_opts = raw_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_block_status = raw_co_block_status,
+ .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
.bdrv_co_preadv = raw_co_preadv,
@@ -2805,6 +2945,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
.bdrv_co_preadv = raw_co_preadv,
@@ -2927,6 +3068,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_co_preadv = raw_co_preadv,