diff options
-rw-r--r-- | async.c | 28 | ||||
-rw-r--r-- | block/iscsi.c | 33 | ||||
-rw-r--r-- | block/qcow2.c | 29 | ||||
-rw-r--r-- | block/qcow2.h | 6 | ||||
-rw-r--r-- | hw/block/dataplane/virtio-blk.c | 3 | ||||
-rw-r--r-- | hw/block/virtio-blk.c | 13 | ||||
-rw-r--r-- | include/hw/virtio/virtio-blk.h | 1 | ||||
-rwxr-xr-x | tests/qemu-iotests/130 | 95 | ||||
-rw-r--r-- | tests/qemu-iotests/130.out | 43 | ||||
-rw-r--r-- | tests/qemu-iotests/group | 1 |
10 files changed, 220 insertions, 32 deletions
@@ -72,12 +72,13 @@ int aio_bh_poll(AioContext *ctx) /* Make sure that fetching bh happens before accessing its members */ smp_read_barrier_depends(); next = bh->next; - if (!bh->deleted && bh->scheduled) { - bh->scheduled = 0; - /* Paired with write barrier in bh schedule to ensure reading for - * idle & callbacks coming after bh's scheduling. - */ - smp_rmb(); + /* The atomic_xchg is paired with the one in qemu_bh_schedule. The + * implicit memory barrier ensures that the callback sees all writes + * done by the scheduling thread. It also ensures that the scheduling + * thread sees the zero before bh->cb has run, and thus will call + * aio_notify again if necessary. + */ + if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) { if (!bh->idle) ret = 1; bh->idle = 0; @@ -108,33 +109,28 @@ int aio_bh_poll(AioContext *ctx) void qemu_bh_schedule_idle(QEMUBH *bh) { - if (bh->scheduled) - return; bh->idle = 1; /* Make sure that idle & any writes needed by the callback are done * before the locations are read in the aio_bh_poll. */ - smp_wmb(); - bh->scheduled = 1; + atomic_mb_set(&bh->scheduled, 1); } void qemu_bh_schedule(QEMUBH *bh) { AioContext *ctx; - if (bh->scheduled) - return; ctx = bh->ctx; bh->idle = 0; - /* Make sure that: + /* The memory barrier implicit in atomic_xchg makes sure that: * 1. idle & any writes needed by the callback are done before the * locations are read in the aio_bh_poll. * 2. ctx is loaded before scheduled is set and the callback has a chance * to execute. */ - smp_mb(); - bh->scheduled = 1; - aio_notify(ctx); + if (atomic_xchg(&bh->scheduled, 1) == 0) { + aio_notify(ctx); + } } diff --git a/block/iscsi.c b/block/iscsi.c index 3e34b1f3a2..ba33290000 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -56,6 +56,7 @@ typedef struct IscsiLun { uint64_t num_blocks; int events; QEMUTimer *nop_timer; + QEMUTimer *event_timer; uint8_t lbpme; uint8_t lbprz; uint8_t has_write_same; @@ -95,6 +96,7 @@ typedef struct IscsiAIOCB { #endif } IscsiAIOCB; +#define EVENT_INTERVAL 250 #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times) @@ -256,21 +258,30 @@ static void iscsi_set_events(IscsiLun *iscsilun) { struct iscsi_context *iscsi = iscsilun->iscsi; - int ev; + int ev = iscsi_which_events(iscsi); - /* We always register a read handler. */ - ev = POLLIN; - ev |= iscsi_which_events(iscsi); if (ev != iscsilun->events) { aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi), - iscsi_process_read, + (ev & POLLIN) ? iscsi_process_read : NULL, (ev & POLLOUT) ? iscsi_process_write : NULL, iscsilun); + iscsilun->events = ev; + } + /* newer versions of libiscsi may return zero events. In this + * case start a timer to ensure we are able to return to service + * once this situation changes. */ + if (!ev) { + timer_mod(iscsilun->event_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL); } +} - iscsilun->events = ev; +static void iscsi_timed_set_events(void *opaque) +{ + IscsiLun *iscsilun = opaque; + iscsi_set_events(iscsilun); } static void @@ -1214,6 +1225,11 @@ static void iscsi_detach_aio_context(BlockDriverState *bs) timer_free(iscsilun->nop_timer); iscsilun->nop_timer = NULL; } + if (iscsilun->event_timer) { + timer_del(iscsilun->event_timer); + timer_free(iscsilun->event_timer); + iscsilun->event_timer = NULL; + } } static void iscsi_attach_aio_context(BlockDriverState *bs, @@ -1230,6 +1246,11 @@ static void iscsi_attach_aio_context(BlockDriverState *bs, iscsi_nop_timed_event, iscsilun); timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); + + /* Prepare a timer for a delayed call to iscsi_set_events */ + iscsilun->event_timer = aio_timer_new(iscsilun->aio_context, + QEMU_CLOCK_REALTIME, SCALE_MS, + iscsi_timed_set_events, iscsilun); } static bool iscsi_is_write_protected(IscsiLun *iscsilun) diff --git a/block/qcow2.c b/block/qcow2.c index 32bdf756ca..316a8db22b 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -140,6 +140,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, return 3; } bs->backing_format[ext.len] = '\0'; + s->image_backing_format = g_strdup(bs->backing_format); #ifdef DEBUG_EXT printf("Qcow2: Got format extension %s\n", bs->backing_format); #endif @@ -884,6 +885,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } bs->backing_file[len] = '\0'; + s->image_backing_file = g_strdup(bs->backing_file); } /* Internal snapshots */ @@ -1457,6 +1459,9 @@ static void qcow2_close(BlockDriverState *bs) g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); + g_free(s->image_backing_file); + g_free(s->image_backing_format); + g_free(s->cluster_cache); qemu_vfree(s->cluster_data); qcow2_refcount_close(bs); @@ -1622,9 +1627,10 @@ int qcow2_update_header(BlockDriverState *bs) } /* Backing file format header extension */ - if (*bs->backing_format) { + if (s->image_backing_format) { ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, - bs->backing_format, strlen(bs->backing_format), + s->image_backing_format, + strlen(s->image_backing_format), buflen); if (ret < 0) { goto fail; @@ -1682,8 +1688,8 @@ int qcow2_update_header(BlockDriverState *bs) buflen -= ret; /* Backing file name */ - if (*bs->backing_file) { - size_t backing_file_len = strlen(bs->backing_file); + if (s->image_backing_file) { + size_t backing_file_len = strlen(s->image_backing_file); if (buflen < backing_file_len) { ret = -ENOSPC; @@ -1691,7 +1697,7 @@ int qcow2_update_header(BlockDriverState *bs) } /* Using strncpy is ok here, since buf is not NUL-terminated. */ - strncpy(buf, bs->backing_file, buflen); + strncpy(buf, s->image_backing_file, buflen); header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); header->backing_file_size = cpu_to_be32(backing_file_len); @@ -1712,9 +1718,17 @@ fail: static int qcow2_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt) { + BDRVQcowState *s = bs->opaque; + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); + g_free(s->image_backing_file); + g_free(s->image_backing_format); + + s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; + s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; + return qcow2_update_header(bs); } @@ -2751,8 +2765,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (backing_file || backing_format) { - ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file, - backing_format ?: bs->backing_format); + ret = qcow2_change_backing_file(bs, + backing_file ?: s->image_backing_file, + backing_format ?: s->image_backing_format); if (ret < 0) { return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index aa6d367818..422b825138 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -283,6 +283,12 @@ typedef struct BDRVQcowState { QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; QTAILQ_HEAD (, Qcow2DiscardRegion) discards; bool cache_discards; + + /* Backing file path and format as stored in the image (this is not the + * effective path/format, which may be the result of a runtime option + * override) */ + char *image_backing_file; + char *image_backing_format; } BDRVQcowState; struct QCowAIOCB; diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index cd41478b08..3db139b8a4 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) VirtIOBlockDataPlane *s = req->dev->dataplane; stb_p(&req->in->status, status); - vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, - req->qiov.size + sizeof(*req->in)); + vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); /* Suppress notification to guest by BH and its scheduled * flag because requests are completed as a batch after io diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 000c38d2a1..9546fd2919 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); req->dev = s; req->qiov.size = 0; + req->in_len = 0; req->next = NULL; req->mr_next = NULL; return req; @@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, trace_virtio_blk_req_complete(req, status); stb_p(&req->in->status, status); - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); + virtqueue_push(s->vq, &req->elem, req->in_len); virtio_notify(vdev, s->vq); } @@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret) if (ret) { int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); bool is_read = !(p & VIRTIO_BLK_T_OUT); + /* Note that memory may be dirtied on read failure. If the + * virtio request is not completed here, as is the case for + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied + * correctly during live migration. While this is ugly, + * it is acceptable because the device is free to write to + * the memory until the request is completed (which will + * happen on the other side of the migration). + */ if (virtio_blk_handle_rw_error(req, -ret, is_read)) { continue; } @@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) exit(1); } + /* We always touch the last byte, so just see how big in_iov is. */ + req->in_len = iov_size(in_iov, in_num); req->in = (void *)in_iov[in_num - 1].iov_base + in_iov[in_num - 1].iov_len - sizeof(struct virtio_blk_inhdr); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index b3ffcd96b8..6bf5905c52 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq { struct virtio_blk_inhdr *in; struct virtio_blk_outhdr out; QEMUIOVector qiov; + size_t in_len; struct VirtIOBlockReq *next; struct VirtIOBlockReq *mr_next; BlockAcctCookie acct; diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130 new file mode 100755 index 0000000000..bc26247e3f --- /dev/null +++ b/tests/qemu-iotests/130 @@ -0,0 +1,95 @@ +#!/bin/bash +# +# Test that temporary backing file overrides (on the command line or in +# blockdev-add) don't replace the original path stored in the image during +# header updates. +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +qemu_comm_method="monitor" + + +TEST_IMG="$TEST_IMG.orig" _make_test_img 64M +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +_make_test_img 64M +_img_info | _filter_img_info + +echo +echo "=== HMP commit ===" +echo +# bdrv_make_empty() involves a header update for qcow2 + +# Test that a backing file isn't written +_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base" +_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)" +_send_qemu_cmd $QEMU_HANDLE '' '(qemu)' +_cleanup_qemu +_img_info | _filter_img_info + +# Make sure that if there was a backing file that was just overridden on the +# command line, that backing file is retained, with the right format +_make_test_img -F raw -b "$TEST_IMG.orig" 64M +_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT +_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)" +_send_qemu_cmd $QEMU_HANDLE '' '(qemu)' +_cleanup_qemu +_img_info | _filter_img_info + +echo +echo "=== Marking image dirty (lazy refcounts) ===" +echo + +# Test that a backing file isn't written +_make_test_img 64M +$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io +_img_info | _filter_img_info + +# Make sure that if there was a backing file that was just overridden on the +# command line, that backing file is retained, with the right format +_make_test_img -F raw -b "$TEST_IMG.orig" 64M +$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,backing.driver=$IMGFMT,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io +_img_info | _filter_img_info + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/130.out b/tests/qemu-iotests/130.out new file mode 100644 index 0000000000..ea68b5d283 --- /dev/null +++ b/tests/qemu-iotests/130.out @@ -0,0 +1,43 @@ +QA output created by 130 +Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) + +=== HMP commit === + +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) c[K[Dco[K[D[Dcom[K[D[D[Dcomm[K[D[D[D[Dcommi[K[D[D[D[D[Dcommit[K[D[D[D[D[D[Dcommit [K[D[D[D[D[D[D[Dcommit i[K[D[D[D[D[D[D[D[Dcommit id[K[D[D[D[D[D[D[D[D[Dcommit ide[K[D[D[D[D[D[D[D[D[D[Dcommit ide0[K[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-[K[D[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-h[K[D[D[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-hd[K[D[D[D[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-hd0[K +(qemu) +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw' +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) c[K[Dco[K[D[Dcom[K[D[D[Dcomm[K[D[D[D[Dcommi[K[D[D[D[D[Dcommit[K[D[D[D[D[D[Dcommit [K[D[D[D[D[D[D[Dcommit i[K[D[D[D[D[D[D[D[Dcommit id[K[D[D[D[D[D[D[D[D[Dcommit ide[K[D[D[D[D[D[D[D[D[D[Dcommit ide0[K[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-[K[D[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-h[K[D[D[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-hd[K[D[D[D[D[D[D[D[D[D[D[D[D[D[Dcommit ide0-hd0[K +(qemu) +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +backing file: TEST_DIR/t.IMGFMT.orig +backing file format: raw + +=== Marking image dirty (lazy refcounts) === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw' +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +backing file: TEST_DIR/t.IMGFMT.orig +backing file format: raw +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 62621278e2..bcf25786ab 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -124,3 +124,4 @@ 121 rw auto 123 rw auto quick 128 rw auto quick +130 rw auto quick |