aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2022-02-12 22:04:07 +0000
committerPeter Maydell <peter.maydell@linaro.org>2022-02-12 22:04:07 +0000
commit48033ad678ae2def43bf0d543a2c4c3d2a93feaf (patch)
tree2bcf336c3e01c23c74f63861130861376efc45b2
parentda36afa2d8dc9c778292ff172083caba9558b4fa (diff)
parent8cfbe929e8c26050f0a4580a1606a370a947d4ce (diff)
Merge remote-tracking branch 'remotes/vsementsov/tags/pull-nbd-2022-02-09-v2' into staging
nbd: handle AioContext change correctly v2: add my s-o-b marks to each commit # gpg: Signature made Fri 11 Feb 2022 13:14:55 GMT # gpg: using RSA key 8B9C26CDB2FD147C880E86A1561F24C1F19F79FB # gpg: Good signature from "Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 8B9C 26CD B2FD 147C 880E 86A1 561F 24C1 F19F 79FB * remotes/vsementsov/tags/pull-nbd-2022-02-09-v2: iotests/281: Let NBD connection yield in iothread block/nbd: Move s->ioc on AioContext change iotests/281: Test lingering timers iotests.py: Add QemuStorageDaemon class block/nbd: Assert there are no timers when closed block/nbd: Delete open timer when done block/nbd: Delete reconnect delay timer when done Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/nbd.c64
-rwxr-xr-xtests/qemu-iotests/281101
-rw-r--r--tests/qemu-iotests/281.out4
-rw-r--r--tests/qemu-iotests/iotests.py40
4 files changed, 205 insertions, 4 deletions
diff --git a/block/nbd.c b/block/nbd.c
index 63dbfa807d..5853d85d60 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -110,6 +110,10 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs)
yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
+ /* Must not leave timers behind that would access freed data */
+ assert(!s->reconnect_delay_timer);
+ assert(!s->open_timer);
+
object_unref(OBJECT(s->tlscreds));
qapi_free_SocketAddress(s->saddr);
s->saddr = NULL;
@@ -381,6 +385,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
}
nbd_co_do_establish_connection(s->bs, NULL);
+
+ /*
+ * The reconnect attempt is done (maybe successfully, maybe not), so
+ * we no longer need this timer. Delete it so it will not outlive
+ * this I/O request (so draining removes all timers).
+ */
+ reconnect_delay_timer_del(s);
}
static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
@@ -1878,11 +1889,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ /*
+ * The connect attempt is done, so we no longer need this timer.
+ * Delete it, because we do not want it to be around when this node
+ * is drained or closed.
+ */
+ open_timer_del(s);
+
nbd_client_connection_enable_retry(s->conn);
return 0;
fail:
+ open_timer_del(s);
nbd_clear_bdrvstate(bs);
return ret;
}
@@ -2036,6 +2055,42 @@ static void nbd_cancel_in_flight(BlockDriverState *bs)
nbd_co_establish_connection_cancel(s->conn);
}
+static void nbd_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ /* The open_timer is used only during nbd_open() */
+ assert(!s->open_timer);
+
+ /*
+ * The reconnect_delay_timer is scheduled in I/O paths when the
+ * connection is lost, to cancel the reconnection attempt after a
+ * given time. Once this attempt is done (successfully or not),
+ * nbd_reconnect_attempt() ensures the timer is deleted before the
+ * respective I/O request is resumed.
+ * Since the AioContext can only be changed when a node is drained,
+ * the reconnect_delay_timer cannot be active here.
+ */
+ assert(!s->reconnect_delay_timer);
+
+ if (s->ioc) {
+ qio_channel_attach_aio_context(s->ioc, new_context);
+ }
+}
+
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ assert(!s->open_timer);
+ assert(!s->reconnect_delay_timer);
+
+ if (s->ioc) {
+ qio_channel_detach_aio_context(s->ioc);
+ }
+}
+
static BlockDriver bdrv_nbd = {
.format_name = "nbd",
.protocol_name = "nbd",
@@ -2059,6 +2114,9 @@ static BlockDriver bdrv_nbd = {
.bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight,
+
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
};
static BlockDriver bdrv_nbd_tcp = {
@@ -2084,6 +2142,9 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight,
+
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
};
static BlockDriver bdrv_nbd_unix = {
@@ -2109,6 +2170,9 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight,
+
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
};
static void bdrv_nbd_init(void)
diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281
index 318e333939..5e1339bd75 100755
--- a/tests/qemu-iotests/281
+++ b/tests/qemu-iotests/281
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
-# group: rw quick
+# group: rw
#
# Test cases for blockdev + IOThread interactions
#
@@ -20,8 +20,9 @@
#
import os
+import time
import iotests
-from iotests import qemu_img
+from iotests import qemu_img, QemuStorageDaemon
image_len = 64 * 1024 * 1024
@@ -243,6 +244,102 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase):
# Hangs on failure, we expect this error.
self.assert_qmp(result, 'error/class', 'GenericError')
+# Test for RHBZ#2033626
+class TestYieldingAndTimers(iotests.QMPTestCase):
+ sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+ qsd = None
+
+ def setUp(self):
+ self.create_nbd_export()
+
+ # Simple VM with an NBD block device connected to the NBD export
+ # provided by the QSD, and an (initially unused) iothread
+ self.vm = iotests.VM()
+ self.vm.add_object('iothread,id=iothr')
+ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' +
+ f'server.path={self.sock},export=exp,' +
+ 'reconnect-delay=1,open-timeout=1')
+
+ self.vm.launch()
+
+ def tearDown(self):
+ self.stop_nbd_export()
+ self.vm.shutdown()
+
+ def test_timers_with_blockdev_del(self):
+ # The NBD BDS will have had an active open timer, because setUp() gave
+ # a positive value for @open-timeout. It should be gone once the BDS
+ # has been opened.
+ # (But there used to be a bug where it remained active, which will
+ # become important below.)
+
+ # Stop and restart the NBD server, and do some I/O on the client to
+ # trigger a reconnect and start the reconnect delay timer
+ self.stop_nbd_export()
+ self.create_nbd_export()
+
+ result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io nbd "write 0 512"')
+ self.assert_qmp(result, 'return', '')
+
+ # Reconnect is done, so the reconnect delay timer should be gone.
+ # (This is similar to how the open timer should be gone after open,
+ # and similarly there used to be a bug where it was not gone.)
+
+ # Delete the BDS to see whether both timers are gone. If they are not,
+ # they will remain active, fire later, and then access freed data.
+ # (Or, with "block/nbd: Assert there are no timers when closed"
+ # applied, the assertions added in that patch will fail.)
+ result = self.vm.qmp('blockdev-del', node_name='nbd')
+ self.assert_qmp(result, 'return', {})
+
+ # Give the timers some time to fire (both have a timeout of 1 s).
+ # (Sleeping in an iotest may ring some alarm bells, but note that if
+ # the timing is off here, the test will just always pass. If we kill
+ # the VM too early, then we just kill the timers before they can fire,
+ # thus not see the error, and so the test will pass.)
+ time.sleep(2)
+
+ def test_yield_in_iothread(self):
+ # Move the NBD node to the I/O thread; the NBD block driver should
+ # attach the connection's QIOChannel to that thread's AioContext, too
+ result = self.vm.qmp('x-blockdev-set-iothread',
+ node_name='nbd', iothread='iothr')
+ self.assert_qmp(result, 'return', {})
+
+ # Do some I/O that will be throttled by the QSD, so that the network
+ # connection hopefully will yield here. When it is resumed, it must
+ # then be resumed in the I/O thread's AioContext.
+ result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io nbd "read 0 128K"')
+ self.assert_qmp(result, 'return', '')
+
+ def create_nbd_export(self):
+ assert self.qsd is None
+
+ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s),
+ # writes are not.
+ self.qsd = QemuStorageDaemon(
+ '--object',
+ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536',
+
+ '--blockdev',
+ 'null-co,node-name=null,read-zeroes=true',
+
+ '--blockdev',
+ 'throttle,node-name=thr,file=null,throttle-group=thrgr',
+
+ '--nbd-server',
+ f'addr.type=unix,addr.path={self.sock}',
+
+ '--export',
+ 'nbd,id=exp,node-name=thr,name=exp,writable=true'
+ )
+
+ def stop_nbd_export(self):
+ self.qsd.stop()
+ self.qsd = None
+
if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2'],
supported_protocols=['file'],
diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out
index 89968f35d7..3f8a935a08 100644
--- a/tests/qemu-iotests/281.out
+++ b/tests/qemu-iotests/281.out
@@ -1,5 +1,5 @@
-....
+......
----------------------------------------------------------------------
-Ran 4 tests
+Ran 6 tests
OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 8cdb381f2a..6ba65eb1ff 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -73,6 +73,8 @@ if os.environ.get('QEMU_NBD_OPTIONS'):
qemu_prog = os.environ.get('QEMU_PROG', 'qemu')
qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ')
+qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon')
+
gdb_qemu_env = os.environ.get('GDB_OPTIONS')
qemu_gdb = []
if gdb_qemu_env:
@@ -345,6 +347,44 @@ class QemuIoInteractive:
return self._read_output()
+class QemuStorageDaemon:
+ def __init__(self, *args: str, instance_id: str = 'a'):
+ assert '--pidfile' not in args
+ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
+ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
+
+ # Cannot use with here, we want the subprocess to stay around
+ # pylint: disable=consider-using-with
+ self._p = subprocess.Popen(all_args)
+ while not os.path.exists(self.pidfile):
+ if self._p.poll() is not None:
+ cmd = ' '.join(all_args)
+ raise RuntimeError(
+ 'qemu-storage-daemon terminated with exit code ' +
+ f'{self._p.returncode}: {cmd}')
+
+ time.sleep(0.01)
+
+ with open(self.pidfile, encoding='utf-8') as f:
+ self._pid = int(f.read().strip())
+
+ assert self._pid == self._p.pid
+
+ def stop(self, kill_signal=15):
+ self._p.send_signal(kill_signal)
+ self._p.wait()
+ self._p = None
+
+ try:
+ os.remove(self.pidfile)
+ except OSError:
+ pass
+
+ def __del__(self):
+ if self._p is not None:
+ self.stop(kill_signal=9)
+
+
def qemu_nbd(*args):
'''Run qemu-nbd in daemon mode and return the parent's exit code'''
return subprocess.call(qemu_nbd_args + ['--fork'] + list(args))