diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2021-12-23 07:56:01 -0800 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2021-12-23 07:56:01 -0800 |
commit | 1bd88c4542e97f49955c142f8dc04dd32df9e91f (patch) | |
tree | db76f6a4ac0ebefcf14e4e7e189f1ec34b3c16e6 | |
parent | 6f016a2f7909eb6d595436a2f56cdf7cdf3e3c68 (diff) | |
parent | ab7f7e67a7e7b49964109501dfcde4ec29bae60e (diff) |
Merge tag 'pull-nbd-2021-12-22-v2' of https://src.openvz.org/scm/~vsementsov/qemu into staging
nbd: reconnect-on-open feature
v2: simple fix for mypy and pylint complains on patch 04
# gpg: Signature made Thu 23 Dec 2021 12:45:20 AM PST
# gpg: using RSA key 8B9C26CDB2FD147C880E86A1561F24C1F19F79FB
# gpg: Good signature from "Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 8B9C 26CD B2FD 147C 880E 86A1 561F 24C1 F19F 79FB
* tag 'pull-nbd-2021-12-22-v2' of https://src.openvz.org/scm/~vsementsov/qemu:
iotests: add nbd-reconnect-on-open test
iotests.py: add qemu_io_popen()
iotests.py: add and use qemu_io_wrap_args()
iotests.py: add qemu_tool_popen()
nbd/client-connection: improve error message of cancelled attempt
nbd/client-connection: nbd_co_establish_connection(): return real error
nbd: allow reconnect on open, with corresponding new options
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | block/nbd.c | 45 | ||||
-rw-r--r-- | nbd/client-connection.c | 57 | ||||
-rw-r--r-- | qapi/block-core.json | 9 | ||||
-rw-r--r-- | tests/qemu-iotests/iotests.py | 37 | ||||
-rwxr-xr-x | tests/qemu-iotests/tests/nbd-reconnect-on-open | 71 | ||||
-rw-r--r-- | tests/qemu-iotests/tests/nbd-reconnect-on-open.out | 11 |
6 files changed, 199 insertions, 31 deletions
diff --git a/block/nbd.c b/block/nbd.c index 5ef462db1b..63dbfa807d 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -80,6 +80,7 @@ typedef struct BDRVNBDState { NBDClientState state; QEMUTimer *reconnect_delay_timer; + QEMUTimer *open_timer; NBDClientRequest requests[MAX_NBD_REQUESTS]; NBDReply reply; @@ -87,6 +88,7 @@ typedef struct BDRVNBDState { /* Connection parameters */ uint32_t reconnect_delay; + uint32_t open_timeout; SocketAddress *saddr; char *export, *tlscredsid; QCryptoTLSCreds *tlscreds; @@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs) s->state = NBD_CLIENT_QUIT; } +static void open_timer_del(BDRVNBDState *s) +{ + if (s->open_timer) { + timer_free(s->open_timer); + s->open_timer = NULL; + } +} + +static void open_timer_cb(void *opaque) +{ + BDRVNBDState *s = opaque; + + nbd_co_establish_connection_cancel(s->conn); + open_timer_del(s); +} + +static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) +{ + assert(!s->open_timer); + s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs), + QEMU_CLOCK_REALTIME, + SCALE_NS, + open_timer_cb, s); + timer_mod(s->open_timer, expire_time_ns); +} + static bool nbd_client_connecting(BDRVNBDState *s) { NBDClientState state = qatomic_load_acquire(&s->state); @@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = { "future requests before a successful reconnect will " "immediately fail. Default 0", }, + { + .name = "open-timeout", + .type = QEMU_OPT_NUMBER, + .help = "In seconds. If zero, the nbd driver tries the connection " + "only once, and fails to open if the connection fails. " + "If non-zero, the nbd driver will repeat connection " + "attempts until successful or until @open-timeout seconds " + "have elapsed. Default 0", + }, { /* end of list */ } }, }; @@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options, } s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0); + s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0); ret = 0; @@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, s->conn = nbd_client_connection_new(s->saddr, true, s->export, s->x_dirty_bitmap, s->tlscreds); - /* TODO: Configurable retry-until-timeout behaviour. */ + if (s->open_timeout) { + nbd_client_connection_enable_retry(s->conn); + open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + s->open_timeout * NANOSECONDS_PER_SECOND); + } + s->state = NBD_CLIENT_CONNECTING_WAIT; ret = nbd_do_establish_connection(bs, errp); if (ret < 0) { diff --git a/nbd/client-connection.c b/nbd/client-connection.c index 695f855754..2bda42641d 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -39,16 +39,18 @@ struct NBDClientConnection { QemuMutex mutex; + NBDExportInfo updated_info; /* - * @sioc and @err represent a connection attempt. While running - * is true, they are only used by the connection thread, and mutex - * locking is not needed. Once the thread finishes, - * nbd_co_establish_connection then steals these pointers while - * under the mutex. + * @sioc represents a successful result. While thread is running, @sioc is + * used only by thread and not protected by mutex. When thread is not + * running, @sioc is stolen by nbd_co_establish_connection() under mutex. */ - NBDExportInfo updated_info; QIOChannelSocket *sioc; QIOChannel *ioc; + /* + * @err represents previous attempt. It may be copied by + * nbd_co_establish_connection() when it reports failure. + */ Error *err; /* All further fields are accessed only under mutex */ @@ -170,18 +172,18 @@ static void *connect_thread_func(void *opaque) qemu_mutex_lock(&conn->mutex); while (!conn->detached) { + Error *local_err = NULL; + assert(!conn->sioc); conn->sioc = qio_channel_socket_new(); qemu_mutex_unlock(&conn->mutex); - error_free(conn->err); - conn->err = NULL; conn->updated_info = conn->initial_info; ret = nbd_connect(conn->sioc, conn->saddr, conn->do_negotiation ? &conn->updated_info : NULL, - conn->tlscreds, &conn->ioc, &conn->err); + conn->tlscreds, &conn->ioc, &local_err); /* * conn->updated_info will finally be returned to the user. Clear the @@ -194,6 +196,10 @@ static void *connect_thread_func(void *opaque) qemu_mutex_lock(&conn->mutex); + error_free(conn->err); + conn->err = NULL; + error_propagate(&conn->err, local_err); + if (ret < 0) { object_unref(OBJECT(conn->sioc)); conn->sioc = NULL; @@ -311,14 +317,17 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, } conn->running = true; - error_free(conn->err); - conn->err = NULL; qemu_thread_create(&thread, "nbd-connect", connect_thread_func, conn, QEMU_THREAD_DETACHED); } if (!blocking) { - error_setg(errp, "No connection at the moment"); + if (conn->err) { + error_propagate(errp, error_copy(conn->err)); + } else { + error_setg(errp, "No connection at the moment"); + } + return NULL; } @@ -339,14 +348,30 @@ nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info, * attempt as failed, but leave the connection thread running, * to reuse it for the next connection attempt. */ - error_setg(errp, "Connection attempt cancelled by other operation"); + if (conn->err) { + error_propagate(errp, error_copy(conn->err)); + } else { + /* + * The only possible case here is cancelling by open_timer + * during nbd_open(). So, the error message is for that case. + * If we have more use cases, we can refactor + * nbd_co_establish_connection_cancel() to take an additional + * parameter cancel_reason, that would be passed than to the + * caller of cancelled nbd_co_establish_connection(). + */ + error_setg(errp, "Connection attempt cancelled by timeout"); + } + return NULL; } else { - error_propagate(errp, conn->err); - conn->err = NULL; - if (!conn->sioc) { + /* Thread finished. There must be either error or sioc */ + assert(!conn->err != !conn->sioc); + + if (conn->err) { + error_propagate(errp, error_copy(conn->err)); return NULL; } + if (conn->do_negotiation) { memcpy(info, &conn->updated_info, sizeof(*info)); if (conn->ioc) { diff --git a/qapi/block-core.json b/qapi/block-core.json index 1d3dd9cb48..bd0b285245 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4096,6 +4096,12 @@ # future requests before a successful reconnect will # immediately fail. Default 0 (Since 4.2) # +# @open-timeout: In seconds. If zero, the nbd driver tries the connection +# only once, and fails to open if the connection fails. +# If non-zero, the nbd driver will repeat connection attempts +# until successful or until @open-timeout seconds have elapsed. +# Default 0 (Since 7.0) +# # Features: # @unstable: Member @x-dirty-bitmap is experimental. # @@ -4106,7 +4112,8 @@ '*export': 'str', '*tls-creds': 'str', '*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] }, - '*reconnect-delay': 'uint32' } } + '*reconnect-delay': 'uint32', + '*open-timeout': 'uint32' } } ## # @BlockdevOptionsRaw: diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 83bfedb902..1e2f2391d1 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -138,14 +138,22 @@ def unarchive_sample_image(sample, fname): shutil.copyfileobj(f_in, f_out) +def qemu_tool_popen(args: Sequence[str], + connect_stderr: bool = True) -> 'subprocess.Popen[str]': + stderr = subprocess.STDOUT if connect_stderr else None + # pylint: disable=consider-using-with + return subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=stderr, + universal_newlines=True) + + def qemu_tool_pipe_and_status(tool: str, args: Sequence[str], connect_stderr: bool = True) -> Tuple[str, int]: """ Run a tool and return both its output and its exit code """ - stderr = subprocess.STDOUT if connect_stderr else None - with subprocess.Popen(args, stdout=subprocess.PIPE, - stderr=stderr, universal_newlines=True) as subp: + with qemu_tool_popen(args, connect_stderr) as subp: output = subp.communicate()[0] if subp.returncode < 0: cmd = ' '.join(args) @@ -233,10 +241,18 @@ def img_info_log(filename, filter_path=None, imgopts=False, extra_args=()): filter_path = filename log(filter_img_info(output, filter_path)) +def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: + if '-f' in args or '--image-opts' in args: + return qemu_io_args_no_fmt + list(args) + else: + return qemu_io_args + list(args) + +def qemu_io_popen(*args): + return qemu_tool_popen(qemu_io_wrap_args(args)) + def qemu_io(*args): '''Run qemu-io and return the stdout data''' - args = qemu_io_args + list(args) - return qemu_tool_pipe_and_status('qemu-io', args)[0] + return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0] def qemu_io_log(*args): result = qemu_io(*args) @@ -245,12 +261,7 @@ def qemu_io_log(*args): def qemu_io_silent(*args): '''Run qemu-io and return the exit code, suppressing stdout''' - if '-f' in args or '--image-opts' in args: - default_args = qemu_io_args_no_fmt - else: - default_args = qemu_io_args - - args = default_args + list(args) + args = qemu_io_wrap_args(args) result = subprocess.run(args, stdout=subprocess.DEVNULL, check=False) if result.returncode < 0: sys.stderr.write('qemu-io received signal %i: %s\n' % @@ -259,14 +270,14 @@ def qemu_io_silent(*args): def qemu_io_silent_check(*args): '''Run qemu-io and return the true if subprocess returned 0''' - args = qemu_io_args + list(args) + args = qemu_io_wrap_args(args) result = subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, check=False) return result.returncode == 0 class QemuIoInteractive: def __init__(self, *args): - self.args = qemu_io_args_no_fmt + list(args) + self.args = qemu_io_wrap_args(args) # We need to keep the Popen objext around, and not # close it immediately. Therefore, disable the pylint check: # pylint: disable=consider-using-with diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open b/tests/qemu-iotests/tests/nbd-reconnect-on-open new file mode 100755 index 0000000000..8be721a24f --- /dev/null +++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# +# Test nbd reconnect on open +# +# Copyright (c) 2020 Virtuozzo International GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import time + +import iotests +from iotests import qemu_img_create, file_path, qemu_io_popen, qemu_nbd, \ + qemu_io_log, log + +iotests.script_initialize(supported_fmts=['qcow2']) + +disk, nbd_sock = file_path('disk', 'nbd-sock') + + +def create_args(open_timeout): + return ['--image-opts', '-c', 'read 0 1M', + f'driver=nbd,open-timeout={open_timeout},' + f'server.type=unix,server.path={nbd_sock}'] + + +def check_fail_to_connect(open_timeout): + log(f'Check fail to connect with {open_timeout} seconds of timeout') + + start_t = time.time() + qemu_io_log(*create_args(open_timeout)) + delta_t = time.time() - start_t + + max_delta = open_timeout + 0.2 + if open_timeout <= delta_t <= max_delta: + log(f'qemu_io finished in {open_timeout}..{max_delta} seconds, OK') + else: + note = 'too early' if delta_t < open_timeout else 'too long' + log(f'qemu_io finished in {delta_t:.1f} seconds, {note}') + + +qemu_img_create('-f', iotests.imgfmt, disk, '1M') + +# Start NBD client when NBD server is not yet running. It should not fail, but +# wait for 5 seconds for the server to be available. +client = qemu_io_popen(*create_args(5)) + +time.sleep(1) +qemu_nbd('-k', nbd_sock, '-f', iotests.imgfmt, disk) + +# client should succeed +log(client.communicate()[0], filters=[iotests.filter_qemu_io]) + +# Server was started without --persistent flag, so it should be off now. Let's +# check it and at the same time check that with open-timeout=0 client fails +# immediately. +check_fail_to_connect(0) + +# Check that we will fail after non-zero timeout if server is still unavailable +check_fail_to_connect(1) diff --git a/tests/qemu-iotests/tests/nbd-reconnect-on-open.out b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out new file mode 100644 index 0000000000..a35ae30ea4 --- /dev/null +++ b/tests/qemu-iotests/tests/nbd-reconnect-on-open.out @@ -0,0 +1,11 @@ +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +Check fail to connect with 0 seconds of timeout +qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory + +qemu_io finished in 0..0.2 seconds, OK +Check fail to connect with 1 seconds of timeout +qemu-io: can't open: Failed to connect to 'TEST_DIR/PID-nbd-sock': No such file or directory + +qemu_io finished in 1..1.2 seconds, OK |