From 066eb006b54308be60fc2a435a04cde8f4187502 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 1 Feb 2018 21:21:27 +0100 Subject: 9pfs: drop v9fs_register_transport() No good reasons to do this outside of v9fs_device_realize_common(). Signed-off-by: Greg Kurz Reviewed-by: Stefano Stabellini --- hw/9pfs/9p.c | 6 +++++- hw/9pfs/9p.h | 10 ++-------- hw/9pfs/virtio-9p-device.c | 8 ++------ hw/9pfs/xen-9p-backend.c | 3 +-- 4 files changed, 10 insertions(+), 17 deletions(-) (limited to 'hw') diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 909a611394..364c7cb446 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -3485,7 +3485,8 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) } /* Returns 0 on success, 1 on failure. */ -int v9fs_device_realize_common(V9fsState *s, Error **errp) +int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, + Error **errp) { int i, len; struct stat stat; @@ -3493,6 +3494,9 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp) V9fsPath path; int rc = 1; + assert(!s->transport); + s->transport = t; + /* initialize pdu allocator */ QLIST_INIT(&s->free_list); QLIST_INIT(&s->active_list); diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index ffe658ab89..5ced427d86 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -346,7 +346,8 @@ void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...); void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs); int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, const char *name, V9fsPath *path); -int v9fs_device_realize_common(V9fsState *s, Error **errp); +int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, + Error **errp); void v9fs_device_unrealize_common(V9fsState *s, Error **errp); V9fsPDU *pdu_alloc(V9fsState *s); @@ -366,11 +367,4 @@ struct V9fsTransport { void (*push_and_notify)(V9fsPDU *pdu); }; -static inline int v9fs_register_transport(V9fsState *s, const V9fsTransport *t) -{ - assert(!s->transport); - s->transport = t; - return 0; -} - #endif diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 43f4e53f33..775e8ff766 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -198,17 +198,13 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) V9fsVirtioState *v = VIRTIO_9P(dev); V9fsState *s = &v->state; - if (v9fs_device_realize_common(s, errp)) { - goto out; + if (v9fs_device_realize_common(s, &virtio_9p_transport, errp)) { + return; } v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag); virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size); v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output); - v9fs_register_transport(s, &virtio_9p_transport); - -out: - return; } static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp) diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index df2a4100bf..14f0d6a50e 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -446,7 +446,6 @@ static int xen_9pfs_connect(struct XenDevice *xendev) xen_9pdev->id = s->fsconf.fsdev_id = g_strdup_printf("xen9p%d", xendev->dev); xen_9pdev->tag = s->fsconf.tag = xenstore_read_fe_str(xendev, "tag"); - v9fs_register_transport(s, &xen_9p_transport); fsdev = qemu_opts_create(qemu_find_opts("fsdev"), s->fsconf.tag, 1, NULL); @@ -455,7 +454,7 @@ static int xen_9pfs_connect(struct XenDevice *xendev) qemu_opt_set(fsdev, "security_model", xen_9pdev->security_model, NULL); qemu_opts_set_id(fsdev, s->fsconf.fsdev_id); qemu_fsdev_add(fsdev); - v9fs_device_realize_common(s, NULL); + v9fs_device_realize_common(s, &xen_9p_transport, NULL); return 0; -- cgit v1.2.3 From fc78d5ee7622342699d9d9626c8df712f1486e07 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 1 Feb 2018 21:21:27 +0100 Subject: 9pfs: Correctly handle cancelled requests # Background I was investigating spurious non-deterministic EINTR returns from various 9p file system operations in a Linux guest served from the qemu 9p server. ## EINTR, ERESTARTSYS and the linux kernel When a signal arrives that the Linux kernel needs to deliver to user-space while a given thread is blocked (in the 9p case waiting for a reply to its request in 9p_client_rpc -> wait_event_interruptible), it asks whatever driver is currently running to abort its current operation (in the 9p case causing the submission of a TFLUSH message) and return to user space. In these situations, the error message reported is generally ERESTARTSYS. If the userspace processes specified SA_RESTART, this means that the system call will get restarted upon completion of the signal handler delivery (assuming the signal handler doesn't modify the process state in complicated ways not relevant here). If SA_RESTART is not specified, ERESTARTSYS gets translated to EINTR and user space is expected to handle the restart itself. ## The 9p TFLUSH command The 9p TFLUSH commands requests that the server abort an ongoing operation. The man page [1] specifies: ``` If it recognizes oldtag as the tag of a pending transaction, it should abort any pending response and discard that tag. [...] When the client sends a Tflush, it must wait to receive the corresponding Rflush before reusing oldtag for subsequent messages. If a response to the flushed request is received before the Rflush, the client must honor the response as if it had not been flushed, since the completed request may signify a state change in the server ``` In particular, this means that the server must not send a reply with the orignal tag in response to the cancellation request, because the client is obligated to interpret such a reply as a coincidental reply to the original request. # The bug When qemu receives a TFlush request, it sets the `cancelled` flag on the relevant pdu. This flag is periodically checked, e.g. in `v9fs_co_name_to_path`, and if set, the operation is aborted and the error is set to EINTR. However, the server then violates the spec, by returning to the client an Rerror response, rather than discarding the message entirely. As a result, the client is required to assume that said Rerror response is a result of the original request, not a result of the cancellation and thus passes the EINTR error back to user space. This is not the worst thing it could do, however as discussed above, the correct error code would have been ERESTARTSYS, such that user space programs with SA_RESTART set get correctly restarted upon completion of the signal handler. Instead, such programs get spurious EINTR results that they were not expecting to handle. It should be noted that there are plenty of user space programs that do not set SA_RESTART and do not correctly handle EINTR either. However, that is then a userspace bug. It should also be noted that this bug has been mitigated by a recent commit to the Linux kernel [2], which essentially prevents the kernel from sending Tflush requests unless the process is about to die (in which case the process likely doesn't care about the response). Nevertheless, for older kernels and to comply with the spec, I believe this change is beneficial. # Implementation The fix is fairly simple, just skipping notification of a reply if the pdu was previously cancelled. We do however, also notify the transport layer that we're doing this, so it can clean up any resources it may be holding. I also added a new trace event to distinguish operations that caused an error reply from those that were cancelled. One complication is that we only omit sending the message on EINTR errors in order to avoid confusing the rest of the code (which may assume that a client knows about a fid if it sucessfully passed it off to pud_complete without checking for cancellation status). This does mean that if the server acts upon the cancellation flag, it always needs to set err to EINTR. I believe this is true of the current code. [1] https://9fans.github.io/plan9port/man/man9/flush.html [2] https://github.com/torvalds/linux/commit/9523feac272ccad2ad8186ba4fcc891 Signed-off-by: Keno Fischer Reviewed-by: Greg Kurz [groug, send a zero-sized reply instead of detaching the buffer] Signed-off-by: Greg Kurz Acked-by: Michael S. Tsirkin Reviewed-by: Stefano Stabellini --- hw/9pfs/9p.c | 18 ++++++++++++++++++ hw/9pfs/trace-events | 1 + 2 files changed, 19 insertions(+) (limited to 'hw') diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 364c7cb446..e88bb50f13 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -630,6 +630,24 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) V9fsState *s = pdu->s; int ret; + /* + * The 9p spec requires that successfully cancelled pdus receive no reply. + * Sending a reply would confuse clients because they would + * assume that any EINTR is the actual result of the operation, + * rather than a consequence of the cancellation. However, if + * the operation completed (succesfully or with an error other + * than caused be cancellation), we do send out that reply, both + * for efficiency and to avoid confusing the rest of the state machine + * that assumes passing a non-error here will mean a successful + * transmission of the reply. + */ + bool discard = pdu->cancelled && len == -EINTR; + if (discard) { + trace_v9fs_rcancel(pdu->tag, pdu->id); + pdu->size = 0; + goto out_notify; + } + if (len < 0) { int err = -len; len = 7; diff --git a/hw/9pfs/trace-events b/hw/9pfs/trace-events index 08a4abf22e..1aee350c42 100644 --- a/hw/9pfs/trace-events +++ b/hw/9pfs/trace-events @@ -1,6 +1,7 @@ # See docs/devel/tracing.txt for syntax documentation. # hw/9pfs/virtio-9p.c +v9fs_rcancel(uint16_t tag, uint8_t id) "tag %d id %d" v9fs_rerror(uint16_t tag, uint8_t id, int err) "tag %d id %d err %d" v9fs_version(uint16_t tag, uint8_t id, int32_t msize, char* version) "tag %d id %d msize %d version %s" v9fs_version_return(uint16_t tag, uint8_t id, int32_t msize, char* version) "tag %d id %d msize %d version %s" -- cgit v1.2.3 From 2893ddd5988a38196e3ca72853985814de831672 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 1 Feb 2018 21:21:27 +0100 Subject: tests: virtio-9p: use the synth backend The purpose of virtio-9p-test is to test the virtio-9p device, especially the 9p server state machine. We don't really care what fsdev backend we're using. Moreover, if we want to be able to test the flush request or a device reset with in-flights I/O, it is close to impossible to achieve with a physical backend because we cannot ask it reliably to put an I/O on hold at a specific point in time. Fortunately, we can do that with the synthetic backend, which allows to register callbacks on read/write accesses to a specific file. This will be used by a later patch to test the 9P flush request. The walk request test is converted to using the synth backend. Signed-off-by: Greg Kurz Reviewed-by: Stefan Hajnoczi --- hw/9pfs/9p-synth.c | 16 ++++++++++++++++ hw/9pfs/9p-synth.h | 4 ++++ 2 files changed, 20 insertions(+) (limited to 'hw') diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index 8f255e91c0..dcbd320da1 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -19,6 +19,7 @@ #include "qemu/rcu.h" #include "qemu/rcu_queue.h" #include "qemu/cutils.h" +#include "sysemu/qtest.h" /* Root node for synth file system */ static V9fsSynthNode synth_root = { @@ -527,6 +528,21 @@ static int synth_init(FsContext *ctx, Error **errp) /* Mark the subsystem is ready for use */ synth_fs = 1; + + if (qtest_enabled()) { + V9fsSynthNode *node = NULL; + int i, ret; + + /* Directory hierarchy for WALK test */ + for (i = 0; i < P9_MAXWELEM; i++) { + char *name = g_strdup_printf(QTEST_V9FS_SYNTH_WALK_FILE, i); + + ret = qemu_v9fs_synth_mkdir(node, 0700, name, &node); + assert(!ret); + g_free(name); + } + } + return 0; } diff --git a/hw/9pfs/9p-synth.h b/hw/9pfs/9p-synth.h index 49c2fc7b27..876b4ef582 100644 --- a/hw/9pfs/9p-synth.h +++ b/hw/9pfs/9p-synth.h @@ -49,4 +49,8 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode, const char *name, v9fs_synth_read read, v9fs_synth_write write, void *arg); +/* qtest stuff */ + +#define QTEST_V9FS_SYNTH_WALK_FILE "WALK%d" + #endif -- cgit v1.2.3 From 82469aaefea4f8e7a4469c3ec1f680bbf0341c98 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 1 Feb 2018 21:21:28 +0100 Subject: tests: virtio-9p: add LOPEN operation test Trivial test of a successful open. Signed-off-by: Greg Kurz Reviewed-by: Stefan Hajnoczi --- hw/9pfs/9p-synth.c | 5 +++++ hw/9pfs/9p-synth.h | 1 + 2 files changed, 6 insertions(+) (limited to 'hw') diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index dcbd320da1..f17b74f444 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -541,6 +541,11 @@ static int synth_init(FsContext *ctx, Error **errp) assert(!ret); g_free(name); } + + /* File for LOPEN test */ + ret = qemu_v9fs_synth_add_file(NULL, 0, QTEST_V9FS_SYNTH_LOPEN_FILE, + NULL, NULL, ctx); + assert(!ret); } return 0; diff --git a/hw/9pfs/9p-synth.h b/hw/9pfs/9p-synth.h index 876b4ef582..2a8d6fd00d 100644 --- a/hw/9pfs/9p-synth.h +++ b/hw/9pfs/9p-synth.h @@ -52,5 +52,6 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode, /* qtest stuff */ #define QTEST_V9FS_SYNTH_WALK_FILE "WALK%d" +#define QTEST_V9FS_SYNTH_LOPEN_FILE "LOPEN" #endif -- cgit v1.2.3 From 354b86f85f516fecb60185f9c2b8e5933177b300 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 1 Feb 2018 21:21:28 +0100 Subject: tests: virtio-9p: add WRITE operation test Trivial test of a successful write. Signed-off-by: Greg Kurz (groug, handle potential overflow when computing request size, add missing g_free(buf), backend handles one written byte at a time to validate the server doesn't do short-reads) Reviewed-by: Stefan Hajnoczi --- hw/9pfs/9p-synth.c | 11 +++++++++++ hw/9pfs/9p-synth.h | 1 + 2 files changed, 12 insertions(+) (limited to 'hw') diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index f17b74f444..ade3460706 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -515,6 +515,12 @@ static int synth_unlinkat(FsContext *ctx, V9fsPath *dir, return -1; } +static ssize_t v9fs_synth_qtest_write(void *buf, int len, off_t offset, + void *arg) +{ + return 1; +} + static int synth_init(FsContext *ctx, Error **errp) { QLIST_INIT(&synth_root.child); @@ -546,6 +552,11 @@ static int synth_init(FsContext *ctx, Error **errp) ret = qemu_v9fs_synth_add_file(NULL, 0, QTEST_V9FS_SYNTH_LOPEN_FILE, NULL, NULL, ctx); assert(!ret); + + /* File for WRITE test */ + ret = qemu_v9fs_synth_add_file(NULL, 0, QTEST_V9FS_SYNTH_WRITE_FILE, + NULL, v9fs_synth_qtest_write, ctx); + assert(!ret); } return 0; diff --git a/hw/9pfs/9p-synth.h b/hw/9pfs/9p-synth.h index 2a8d6fd00d..a74032d7bd 100644 --- a/hw/9pfs/9p-synth.h +++ b/hw/9pfs/9p-synth.h @@ -53,5 +53,6 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode, #define QTEST_V9FS_SYNTH_WALK_FILE "WALK%d" #define QTEST_V9FS_SYNTH_LOPEN_FILE "LOPEN" +#define QTEST_V9FS_SYNTH_WRITE_FILE "WRITE" #endif -- cgit v1.2.3 From 357e2f7f4e4dc68f01d5b81f5cd669874314e14a Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 1 Feb 2018 21:21:28 +0100 Subject: tests: virtio-9p: add FLUSH operation test The idea is to send a victim request that will possibly block in the server and to send a flush request to cancel the victim request. This patch adds two test to verifiy that: - the server does not reply to a victim request that was actually cancelled - the server replies to the flush request after replying to the victim request if it could not cancel it 9p request cancellation reference: http://man.cat-v.org/plan_9/5/flush Signed-off-by: Greg Kurz Reviewed-by: Stefan Hajnoczi (groug, change the test to only write a single byte to avoid any alignment or endianess consideration) --- hw/9pfs/9p-synth.c | 20 ++++++++++++++++++++ hw/9pfs/9p-synth.h | 7 +++++++ hw/9pfs/9p.c | 1 + 3 files changed, 28 insertions(+) (limited to 'hw') diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index ade3460706..18082dffe8 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -521,6 +521,20 @@ static ssize_t v9fs_synth_qtest_write(void *buf, int len, off_t offset, return 1; } +static ssize_t v9fs_synth_qtest_flush_write(void *buf, int len, off_t offset, + void *arg) +{ + bool should_block = !!*(uint8_t *)buf; + + if (should_block) { + /* This will cause the server to call us again until we're cancelled */ + errno = EINTR; + return -1; + } + + return 1; +} + static int synth_init(FsContext *ctx, Error **errp) { QLIST_INIT(&synth_root.child); @@ -557,6 +571,12 @@ static int synth_init(FsContext *ctx, Error **errp) ret = qemu_v9fs_synth_add_file(NULL, 0, QTEST_V9FS_SYNTH_WRITE_FILE, NULL, v9fs_synth_qtest_write, ctx); assert(!ret); + + /* File for FLUSH test */ + ret = qemu_v9fs_synth_add_file(NULL, 0, QTEST_V9FS_SYNTH_FLUSH_FILE, + NULL, v9fs_synth_qtest_flush_write, + ctx); + assert(!ret); } return 0; diff --git a/hw/9pfs/9p-synth.h b/hw/9pfs/9p-synth.h index a74032d7bd..af7a993a1e 100644 --- a/hw/9pfs/9p-synth.h +++ b/hw/9pfs/9p-synth.h @@ -55,4 +55,11 @@ int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode, #define QTEST_V9FS_SYNTH_LOPEN_FILE "LOPEN" #define QTEST_V9FS_SYNTH_WRITE_FILE "WRITE" +/* Any write to the "FLUSH" file is handled one byte at a time by the + * backend. If the byte is zero, the backend returns success (ie, 1), + * otherwise it forces the server to try again forever. Thus allowing + * the client to cancel the request. + */ +#define QTEST_V9FS_SYNTH_FLUSH_FILE "FLUSH" + #endif diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index e88bb50f13..85a1ed8171 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -24,6 +24,7 @@ #include "coth.h" #include "trace.h" #include "migration/blocker.h" +#include "sysemu/qtest.h" int open_fd_hw; int total_open_fd; -- cgit v1.2.3