aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-04-09 12:05:00 +0100
committerPeter Maydell <peter.maydell@linaro.org>2015-04-09 12:05:00 +0100
commita6f2cb037a82fb8679e70e175cfbc879dd829e06 (patch)
tree2474354310450cbe8857870456a83a0eb92fedc0
parentcf811fff2ae20008f00455d0ab2212a4dea0b56f (diff)
parent05b685fbabb7fdcab72cb42b27db916fd74b2265 (diff)
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu Apr 9 10:55:11 2015 BST using RSA key ID 81AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" * remotes/stefanha/tags/block-pull-request: block/iscsi: handle zero events from iscsi_which_events aio: strengthen memory barriers for bottom half scheduling virtio-blk: correctly dirty guest memory qcow2: Fix header update with overridden backing file Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--async.c28
-rw-r--r--block/iscsi.c33
-rw-r--r--block/qcow2.c29
-rw-r--r--block/qcow2.h6
-rw-r--r--hw/block/dataplane/virtio-blk.c3
-rw-r--r--hw/block/virtio-blk.c13
-rw-r--r--include/hw/virtio/virtio-blk.h1
-rwxr-xr-xtests/qemu-iotests/13095
-rw-r--r--tests/qemu-iotests/130.out43
-rw-r--r--tests/qemu-iotests/group1
10 files changed, 220 insertions, 32 deletions
diff --git a/async.c b/async.c
index 2be88cc9e9..2b51e87679 100644
--- a/async.c
+++ b/async.c
@@ -72,12 +72,13 @@ int aio_bh_poll(AioContext *ctx)
/* Make sure that fetching bh happens before accessing its members */
smp_read_barrier_depends();
next = bh->next;
- if (!bh->deleted && bh->scheduled) {
- bh->scheduled = 0;
- /* Paired with write barrier in bh schedule to ensure reading for
- * idle & callbacks coming after bh's scheduling.
- */
- smp_rmb();
+ /* The atomic_xchg is paired with the one in qemu_bh_schedule. The
+ * implicit memory barrier ensures that the callback sees all writes
+ * done by the scheduling thread. It also ensures that the scheduling
+ * thread sees the zero before bh->cb has run, and thus will call
+ * aio_notify again if necessary.
+ */
+ if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
if (!bh->idle)
ret = 1;
bh->idle = 0;
@@ -108,33 +109,28 @@ int aio_bh_poll(AioContext *ctx)
void qemu_bh_schedule_idle(QEMUBH *bh)
{
- if (bh->scheduled)
- return;
bh->idle = 1;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
- smp_wmb();
- bh->scheduled = 1;
+ atomic_mb_set(&bh->scheduled, 1);
}
void qemu_bh_schedule(QEMUBH *bh)
{
AioContext *ctx;
- if (bh->scheduled)
- return;
ctx = bh->ctx;
bh->idle = 0;
- /* Make sure that:
+ /* The memory barrier implicit in atomic_xchg makes sure that:
* 1. idle & any writes needed by the callback are done before the
* locations are read in the aio_bh_poll.
* 2. ctx is loaded before scheduled is set and the callback has a chance
* to execute.
*/
- smp_mb();
- bh->scheduled = 1;
- aio_notify(ctx);
+ if (atomic_xchg(&bh->scheduled, 1) == 0) {
+ aio_notify(ctx);
+ }
}
diff --git a/block/iscsi.c b/block/iscsi.c
index 3e34b1f3a2..ba33290000 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,7 @@ typedef struct IscsiLun {
uint64_t num_blocks;
int events;
QEMUTimer *nop_timer;
+ QEMUTimer *event_timer;
uint8_t lbpme;
uint8_t lbprz;
uint8_t has_write_same;
@@ -95,6 +96,7 @@ typedef struct IscsiAIOCB {
#endif
} IscsiAIOCB;
+#define EVENT_INTERVAL 250
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
@@ -256,21 +258,30 @@ static void
iscsi_set_events(IscsiLun *iscsilun)
{
struct iscsi_context *iscsi = iscsilun->iscsi;
- int ev;
+ int ev = iscsi_which_events(iscsi);
- /* We always register a read handler. */
- ev = POLLIN;
- ev |= iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
aio_set_fd_handler(iscsilun->aio_context,
iscsi_get_fd(iscsi),
- iscsi_process_read,
+ (ev & POLLIN) ? iscsi_process_read : NULL,
(ev & POLLOUT) ? iscsi_process_write : NULL,
iscsilun);
+ iscsilun->events = ev;
+ }
+ /* newer versions of libiscsi may return zero events. In this
+ * case start a timer to ensure we are able to return to service
+ * once this situation changes. */
+ if (!ev) {
+ timer_mod(iscsilun->event_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
}
+}
- iscsilun->events = ev;
+static void iscsi_timed_set_events(void *opaque)
+{
+ IscsiLun *iscsilun = opaque;
+ iscsi_set_events(iscsilun);
}
static void
@@ -1214,6 +1225,11 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
timer_free(iscsilun->nop_timer);
iscsilun->nop_timer = NULL;
}
+ if (iscsilun->event_timer) {
+ timer_del(iscsilun->event_timer);
+ timer_free(iscsilun->event_timer);
+ iscsilun->event_timer = NULL;
+ }
}
static void iscsi_attach_aio_context(BlockDriverState *bs,
@@ -1230,6 +1246,11 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
iscsi_nop_timed_event, iscsilun);
timer_mod(iscsilun->nop_timer,
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+
+ /* Prepare a timer for a delayed call to iscsi_set_events */
+ iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
+ QEMU_CLOCK_REALTIME, SCALE_MS,
+ iscsi_timed_set_events, iscsilun);
}
static bool iscsi_is_write_protected(IscsiLun *iscsilun)
diff --git a/block/qcow2.c b/block/qcow2.c
index 32bdf756ca..316a8db22b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -140,6 +140,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
return 3;
}
bs->backing_format[ext.len] = '\0';
+ s->image_backing_format = g_strdup(bs->backing_format);
#ifdef DEBUG_EXT
printf("Qcow2: Got format extension %s\n", bs->backing_format);
#endif
@@ -884,6 +885,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
bs->backing_file[len] = '\0';
+ s->image_backing_file = g_strdup(bs->backing_file);
}
/* Internal snapshots */
@@ -1457,6 +1459,9 @@ static void qcow2_close(BlockDriverState *bs)
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
+ g_free(s->image_backing_file);
+ g_free(s->image_backing_format);
+
g_free(s->cluster_cache);
qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs);
@@ -1622,9 +1627,10 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Backing file format header extension */
- if (*bs->backing_format) {
+ if (s->image_backing_format) {
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
- bs->backing_format, strlen(bs->backing_format),
+ s->image_backing_format,
+ strlen(s->image_backing_format),
buflen);
if (ret < 0) {
goto fail;
@@ -1682,8 +1688,8 @@ int qcow2_update_header(BlockDriverState *bs)
buflen -= ret;
/* Backing file name */
- if (*bs->backing_file) {
- size_t backing_file_len = strlen(bs->backing_file);
+ if (s->image_backing_file) {
+ size_t backing_file_len = strlen(s->image_backing_file);
if (buflen < backing_file_len) {
ret = -ENOSPC;
@@ -1691,7 +1697,7 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Using strncpy is ok here, since buf is not NUL-terminated. */
- strncpy(buf, bs->backing_file, buflen);
+ strncpy(buf, s->image_backing_file, buflen);
header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
header->backing_file_size = cpu_to_be32(backing_file_len);
@@ -1712,9 +1718,17 @@ fail:
static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt)
{
+ BDRVQcowState *s = bs->opaque;
+
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+ g_free(s->image_backing_file);
+ g_free(s->image_backing_format);
+
+ s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
+ s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
+
return qcow2_update_header(bs);
}
@@ -2751,8 +2765,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
if (backing_file || backing_format) {
- ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
- backing_format ?: bs->backing_format);
+ ret = qcow2_change_backing_file(bs,
+ backing_file ?: s->image_backing_file,
+ backing_format ?: s->image_backing_format);
if (ret < 0) {
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index aa6d367818..422b825138 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -283,6 +283,12 @@ typedef struct BDRVQcowState {
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
bool cache_discards;
+
+ /* Backing file path and format as stored in the image (this is not the
+ * effective path/format, which may be the result of a runtime option
+ * override) */
+ char *image_backing_file;
+ char *image_backing_format;
} BDRVQcowState;
struct QCowAIOCB;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd41478b08..3db139b8a4 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
VirtIOBlockDataPlane *s = req->dev->dataplane;
stb_p(&req->in->status, status);
- vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
- req->qiov.size + sizeof(*req->in));
+ vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
/* Suppress notification to guest by BH and its scheduled
* flag because requests are completed as a batch after io
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d2a1..9546fd2919 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
req->dev = s;
req->qiov.size = 0;
+ req->in_len = 0;
req->next = NULL;
req->mr_next = NULL;
return req;
@@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status);
- virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+ virtqueue_push(s->vq, &req->elem, req->in_len);
virtio_notify(vdev, s->vq);
}
@@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
if (ret) {
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT);
+ /* Note that memory may be dirtied on read failure. If the
+ * virtio request is not completed here, as is the case for
+ * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+ * correctly during live migration. While this is ugly,
+ * it is acceptable because the device is free to write to
+ * the memory until the request is completed (which will
+ * happen on the other side of the migration).
+ */
if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
continue;
}
@@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
exit(1);
}
+ /* We always touch the last byte, so just see how big in_iov is. */
+ req->in_len = iov_size(in_iov, in_num);
req->in = (void *)in_iov[in_num - 1].iov_base
+ in_iov[in_num - 1].iov_len
- sizeof(struct virtio_blk_inhdr);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3ffcd96b8..6bf5905c52 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out;
QEMUIOVector qiov;
+ size_t in_len;
struct VirtIOBlockReq *next;
struct VirtIOBlockReq *mr_next;
BlockAcctCookie acct;
diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130
new file mode 100755
index 0000000000..bc26247e3f
--- /dev/null
+++ b/tests/qemu-iotests/130
@@ -0,0 +1,95 @@
+#!/bin/bash
+#
+# Test that temporary backing file overrides (on the command line or in
+# blockdev-add) don't replace the original path stored in the image during
+# header updates.
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+qemu_comm_method="monitor"
+
+
+TEST_IMG="$TEST_IMG.orig" _make_test_img 64M
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img 64M
+_img_info | _filter_img_info
+
+echo
+echo "=== HMP commit ==="
+echo
+# bdrv_make_empty() involves a header update for qcow2
+
+# Test that a backing file isn't written
+_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base"
+_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
+_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
+_cleanup_qemu
+_img_info | _filter_img_info
+
+# Make sure that if there was a backing file that was just overridden on the
+# command line, that backing file is retained, with the right format
+_make_test_img -F raw -b "$TEST_IMG.orig" 64M
+_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT
+_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
+_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
+_cleanup_qemu
+_img_info | _filter_img_info
+
+echo
+echo "=== Marking image dirty (lazy refcounts) ==="
+echo
+
+# Test that a backing file isn't written
+_make_test_img 64M
+$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
+_img_info | _filter_img_info
+
+# Make sure that if there was a backing file that was just overridden on the
+# command line, that backing file is retained, with the right format
+_make_test_img -F raw -b "$TEST_IMG.orig" 64M
+$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,backing.driver=$IMGFMT,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
+_img_info | _filter_img_info
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/130.out b/tests/qemu-iotests/130.out
new file mode 100644
index 0000000000..ea68b5d283
--- /dev/null
+++ b/tests/qemu-iotests/130.out
@@ -0,0 +1,43 @@
+QA output created by 130
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+
+=== HMP commit ===
+
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
+(qemu)
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
+(qemu)
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+backing file: TEST_DIR/t.IMGFMT.orig
+backing file format: raw
+
+=== Marking image dirty (lazy refcounts) ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+backing file: TEST_DIR/t.IMGFMT.orig
+backing file format: raw
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 62621278e2..bcf25786ab 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -124,3 +124,4 @@
121 rw auto
123 rw auto quick
128 rw auto quick
+130 rw auto quick