diff options
-rw-r--r-- | docs/COLO-FT.txt | 4 | ||||
-rw-r--r-- | docs/about/deprecated.rst | 10 | ||||
-rw-r--r-- | docs/system/ppc/powernv.rst | 2 | ||||
-rw-r--r-- | ebpf/ebpf_rss-stub.c | 8 | ||||
-rw-r--r-- | ebpf/ebpf_rss.c | 118 | ||||
-rw-r--r-- | ebpf/ebpf_rss.h | 10 | ||||
-rw-r--r-- | ebpf/trace-events | 8 | ||||
-rw-r--r-- | hw/net/trace-events | 8 | ||||
-rw-r--r-- | hw/net/virtio-net.c | 75 | ||||
-rw-r--r-- | meson.build | 10 | ||||
-rw-r--r-- | net/colo-compare.c | 3 | ||||
-rw-r--r-- | net/meson.build | 2 | ||||
-rw-r--r-- | net/net.c | 4 | ||||
-rw-r--r-- | net/stream.c | 34 | ||||
-rw-r--r-- | net/tap-win32.c | 15 | ||||
-rw-r--r-- | qapi/net.json | 13 | ||||
-rw-r--r-- | qemu-options.hx | 46 | ||||
-rw-r--r-- | tests/qtest/ipmi-bt-test.c | 2 | ||||
-rw-r--r-- | tests/qtest/netdev-socket.c | 2 | ||||
-rw-r--r-- | tests/qtest/vhost-user-test.c | 2 | ||||
-rw-r--r-- | tests/unit/test-char.c | 8 |
21 files changed, 245 insertions, 139 deletions
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 2e760a4aee..2283a09c08 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -193,8 +193,8 @@ any IP's here, except for the $primary_ip variable. -device piix3-usb-uhci -device usb-tablet -name secondary \ -netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \ -device rtl8139,id=e0,netdev=hn0 \ - -chardev socket,id=red0,host=$primary_ip,port=9003,reconnect=1 \ - -chardev socket,id=red1,host=$primary_ip,port=9004,reconnect=1 \ + -chardev socket,id=red0,host=$primary_ip,port=9003,reconnect-ms=1000 \ + -chardev socket,id=red1,host=$primary_ip,port=9004,reconnect-ms=1000 \ -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \ -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \ -object filter-rewriter,id=rew0,netdev=hn0,queue=all \ diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index ce38a3d0cf..1e1e9f5f18 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -400,6 +400,16 @@ Backend ``memory`` (since 9.0) The ``reconnect`` option only allows specifiying second granularity timeouts, which is not enough for all types of use cases, use ``reconnect-ms`` instead. + +Net device options +'''''''''''''''''' + +Stream ``reconnect`` (since 9.2) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``reconnect`` option only allows specifiying second granularity timeouts, +which is not enough for all types of use cases, use ``reconnect-ms`` instead. + CPU device properties ''''''''''''''''''''' diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst index 09f3965858..de7a807ac7 100644 --- a/docs/system/ppc/powernv.rst +++ b/docs/system/ppc/powernv.rst @@ -181,7 +181,7 @@ connected to a remote QEMU machine acting as BMC, using these options .. code-block:: bash - -chardev socket,id=ipmi0,host=localhost,port=9002,reconnect=10 \ + -chardev socket,id=ipmi0,host=localhost,port=9002,reconnect-ms=10000 \ -device ipmi-bmc-extern,id=bmc0,chardev=ipmi0 \ -device isa-ipmi-bt,bmc=bmc0,irq=10 \ -nodefaults diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c index 8d7fae2ad9..d0e7f99fb9 100644 --- a/ebpf/ebpf_rss-stub.c +++ b/ebpf/ebpf_rss-stub.c @@ -23,19 +23,21 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) return false; } -bool ebpf_rss_load(struct EBPFRSSContext *ctx) +bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp) { return false; } bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, - int config_fd, int toeplitz_fd, int table_fd) + int config_fd, int toeplitz_fd, int table_fd, + Error **errp) { return false; } bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, - uint16_t *indirections_table, uint8_t *toeplitz_key) + uint16_t *indirections_table, uint8_t *toeplitz_key, + Error **errp) { return false; } diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c index dcaa80f380..e793786c17 100644 --- a/ebpf/ebpf_rss.c +++ b/ebpf/ebpf_rss.c @@ -47,34 +47,37 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1); } -static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx, Error **errp) { - if (!ebpf_rss_is_loaded(ctx)) { - return false; - } - ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_configuration, 0); if (ctx->mmap_configuration == MAP_FAILED) { - trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array"); + trace_ebpf_rss_mmap_error(ctx, "configuration"); + error_setg(errp, "Unable to map eBPF configuration array"); return false; } ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_toeplitz_key, 0); if (ctx->mmap_toeplitz_key == MAP_FAILED) { - trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key"); + trace_ebpf_rss_mmap_error(ctx, "toeplitz key"); + error_setg(errp, "Unable to map eBPF toeplitz array"); goto toeplitz_fail; } ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, ctx->map_indirections_table, 0); if (ctx->mmap_indirections_table == MAP_FAILED) { - trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table"); + trace_ebpf_rss_mmap_error(ctx, "indirections table"); + error_setg(errp, "Unable to map eBPF indirection array"); goto indirection_fail; } + trace_ebpf_rss_mmap(ctx, + ctx->mmap_configuration, + ctx->mmap_toeplitz_key, + ctx->mmap_indirections_table); return true; indirection_fail: @@ -90,10 +93,6 @@ toeplitz_fail: static void ebpf_rss_munmap(struct EBPFRSSContext *ctx) { - if (!ebpf_rss_is_loaded(ctx)) { - return; - } - munmap(ctx->mmap_indirections_table, qemu_real_host_page_size()); munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size()); munmap(ctx->mmap_configuration, qemu_real_host_page_size()); @@ -103,7 +102,7 @@ static void ebpf_rss_munmap(struct EBPFRSSContext *ctx) ctx->mmap_indirections_table = NULL; } -bool ebpf_rss_load(struct EBPFRSSContext *ctx) +bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp) { struct rss_bpf *rss_bpf_ctx; @@ -113,14 +112,16 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) rss_bpf_ctx = rss_bpf__open(); if (rss_bpf_ctx == NULL) { - trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object"); + trace_ebpf_rss_open_error(ctx); + error_setg(errp, "Unable to open eBPF RSS object"); goto error; } bpf_program__set_type(rss_bpf_ctx->progs.tun_rss_steering_prog, BPF_PROG_TYPE_SOCKET_FILTER); if (rss_bpf__load(rss_bpf_ctx)) { - trace_ebpf_error("eBPF RSS", "can not load RSS program"); + trace_ebpf_rss_load_error(ctx); + error_setg(errp, "Unable to load eBPF program"); goto error; } @@ -134,7 +135,12 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) ctx->map_toeplitz_key = bpf_map__fd( rss_bpf_ctx->maps.tap_rss_map_toeplitz_key); - if (!ebpf_rss_mmap(ctx)) { + trace_ebpf_rss_load(ctx, + ctx->program_fd, + ctx->map_configuration, + ctx->map_indirections_table, + ctx->map_toeplitz_key); + if (!ebpf_rss_mmap(ctx, errp)) { goto error; } @@ -151,13 +157,28 @@ error: } bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, - int config_fd, int toeplitz_fd, int table_fd) + int config_fd, int toeplitz_fd, int table_fd, + Error **errp) { if (ebpf_rss_is_loaded(ctx)) { + error_setg(errp, "eBPF program is already loaded"); return false; } - if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) { + if (program_fd < 0) { + error_setg(errp, "eBPF program FD is not open"); + return false; + } + if (config_fd < 0) { + error_setg(errp, "eBPF config FD is not open"); + return false; + } + if (toeplitz_fd < 0) { + error_setg(errp, "eBPF toeplitz FD is not open"); + return false; + } + if (table_fd < 0) { + error_setg(errp, "eBPF indirection FD is not open"); return false; } @@ -166,7 +187,13 @@ bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, ctx->map_toeplitz_key = toeplitz_fd; ctx->map_indirections_table = table_fd; - if (!ebpf_rss_mmap(ctx)) { + trace_ebpf_rss_load(ctx, + ctx->program_fd, + ctx->map_configuration, + ctx->map_indirections_table, + ctx->map_toeplitz_key); + + if (!ebpf_rss_mmap(ctx, errp)) { ctx->program_fd = -1; ctx->map_configuration = -1; ctx->map_toeplitz_key = -1; @@ -177,25 +204,22 @@ bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, return true; } -static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx, +static void ebpf_rss_set_config(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config) { - if (!ebpf_rss_is_loaded(ctx)) { - return false; - } - memcpy(ctx->mmap_configuration, config, sizeof(*config)); - return true; } static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, uint16_t *indirections_table, - size_t len) + size_t len, + Error **errp) { char *cursor = ctx->mmap_indirections_table; - if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL || - len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + if (len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + error_setg(errp, "Indirections table length %zu exceeds limit %d", + len, VIRTIO_NET_RSS_MAX_TABLE_LEN); return false; } @@ -207,43 +231,51 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, return true; } -static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, +static void ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, uint8_t *toeplitz_key) { /* prepare toeplitz key */ uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {}; - if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) { - return false; - } memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE); *(uint32_t *)toe = ntohl(*(uint32_t *)toe); memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE); - return true; } bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, - uint16_t *indirections_table, uint8_t *toeplitz_key) + uint16_t *indirections_table, uint8_t *toeplitz_key, + Error **errp) { - if (!ebpf_rss_is_loaded(ctx) || config == NULL || - indirections_table == NULL || toeplitz_key == NULL) { + if (!ebpf_rss_is_loaded(ctx)) { + error_setg(errp, "eBPF program is not loaded"); return false; } - - if (!ebpf_rss_set_config(ctx, config)) { + if (config == NULL) { + error_setg(errp, "eBPF config table is NULL"); return false; } - - if (!ebpf_rss_set_indirections_table(ctx, indirections_table, - config->indirections_len)) { + if (indirections_table == NULL) { + error_setg(errp, "eBPF indirections table is NULL"); + return false; + } + if (toeplitz_key == NULL) { + error_setg(errp, "eBPF toeplitz key is NULL"); return false; } - if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) { + ebpf_rss_set_config(ctx, config); + + if (!ebpf_rss_set_indirections_table(ctx, indirections_table, + config->indirections_len, + errp)) { return false; } + ebpf_rss_set_toepliz_key(ctx, toeplitz_key); + + trace_ebpf_rss_set_data(ctx, config, indirections_table, toeplitz_key); + return true; } @@ -253,6 +285,8 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx) return; } + trace_ebpf_rss_unload(ctx); + ebpf_rss_munmap(ctx); if (ctx->obj) { diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h index 239242b0d2..86a5787789 100644 --- a/ebpf/ebpf_rss.h +++ b/ebpf/ebpf_rss.h @@ -14,6 +14,8 @@ #ifndef QEMU_EBPF_RSS_H #define QEMU_EBPF_RSS_H +#include "qapi/error.h" + #define EBPF_RSS_MAX_FDS 4 struct EBPFRSSContext { @@ -41,13 +43,15 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx); bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx); -bool ebpf_rss_load(struct EBPFRSSContext *ctx); +bool ebpf_rss_load(struct EBPFRSSContext *ctx, Error **errp); bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd, - int config_fd, int toeplitz_fd, int table_fd); + int config_fd, int toeplitz_fd, int table_fd, + Error **errp); bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config, - uint16_t *indirections_table, uint8_t *toeplitz_key); + uint16_t *indirections_table, uint8_t *toeplitz_key, + Error **errp); void ebpf_rss_unload(struct EBPFRSSContext *ctx); diff --git a/ebpf/trace-events b/ebpf/trace-events index b3ad1a35f2..bf3d9b6451 100644 --- a/ebpf/trace-events +++ b/ebpf/trace-events @@ -1,4 +1,10 @@ # See docs/devel/tracing.rst for syntax documentation. # ebpf-rss.c -ebpf_error(const char *s1, const char *s2) "error in %s: %s" +ebpf_rss_load(void *ctx, int progfd, int cfgfd, int toepfd, int indirfd) "ctx=%p program-fd=%d config-fd=%d toeplitz-fd=%d indirection-fd=%d" +ebpf_rss_load_error(void *ctx) "ctx=%p" +ebpf_rss_mmap(void *ctx, void *cfgptr, void *toepptr, void *indirptr) "ctx=%p config-ptr=%p toeplitz-ptr=%p indirection-ptr=%p" +ebpf_rss_mmap_error(void *ctx, const char *object) "ctx=%p object=%s" +ebpf_rss_open_error(void *ctx) "ctx=%p" +ebpf_rss_set_data(void *ctx, void *cfgptr, void *toepptr, void *indirptr) "ctx=%p config-ptr=%p toeplitz-ptr=%p indirection-ptr=%p" +ebpf_rss_unload(void *ctx) "rss unload ctx=%p" diff --git a/hw/net/trace-events b/hw/net/trace-events index 4c6687923e..91a3d0c054 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -394,9 +394,11 @@ virtio_net_announce_notify(void) "" virtio_net_announce_timer(int round) "%d" virtio_net_handle_announce(int round) "%d" virtio_net_post_load_device(void) -virtio_net_rss_disable(void) -virtio_net_rss_error(const char *msg, uint32_t value) "%s, value 0x%08x" -virtio_net_rss_enable(uint32_t p1, uint16_t p2, uint8_t p3) "hashes 0x%x, table of %d, key of %d" +virtio_net_rss_load(void *nic, size_t nfds, void *fds) "nic=%p nfds=%zu fds=%p" +virtio_net_rss_attach_ebpf(void *nic, int prog_fd) "nic=%p prog-fd=%d" +virtio_net_rss_disable(void *nic) "nic=%p" +virtio_net_rss_error(void *nic, const char *msg, uint32_t value) "nic=%p msg=%s, value 0x%08x" +virtio_net_rss_enable(void *nic, uint32_t p1, uint16_t p2, uint8_t p3) "nic=%p hashes 0x%x, table of %d, key of %d" # tulip.c tulip_reg_write(uint64_t addr, const char *name, int size, uint64_t val) "addr 0x%02"PRIx64" (%s) size %d value 0x%08"PRIx64 diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index fb84d142ee..f2104ed364 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1241,6 +1241,7 @@ static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) return false; } + trace_virtio_net_rss_attach_ebpf(nic, prog_fd); return nc->info->set_steering_ebpf(nc, prog_fd); } @@ -1254,7 +1255,7 @@ static void rss_data_to_rss_config(struct VirtioNetRssData *data, config->default_queue = data->default_queue; } -static bool virtio_net_attach_epbf_rss(VirtIONet *n) +static bool virtio_net_attach_ebpf_rss(VirtIONet *n) { struct EBPFRSSConfig config = {}; @@ -1265,7 +1266,8 @@ static bool virtio_net_attach_epbf_rss(VirtIONet *n) rss_data_to_rss_config(&n->rss_data, &config); if (!ebpf_rss_set_all(&n->ebpf_rss, &config, - n->rss_data.indirections_table, n->rss_data.key)) { + n->rss_data.indirections_table, n->rss_data.key, + NULL)) { return false; } @@ -1276,7 +1278,7 @@ static bool virtio_net_attach_epbf_rss(VirtIONet *n) return true; } -static void virtio_net_detach_epbf_rss(VirtIONet *n) +static void virtio_net_detach_ebpf_rss(VirtIONet *n) { virtio_net_attach_ebpf_to_backend(n->nic, -1); } @@ -1286,8 +1288,8 @@ static void virtio_net_commit_rss_config(VirtIONet *n) if (n->rss_data.enabled) { n->rss_data.enabled_software_rss = n->rss_data.populate_hash; if (n->rss_data.populate_hash) { - virtio_net_detach_epbf_rss(n); - } else if (!virtio_net_attach_epbf_rss(n)) { + virtio_net_detach_ebpf_rss(n); + } else if (!virtio_net_attach_ebpf_rss(n)) { if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { warn_report("Can't load eBPF RSS for vhost"); } else { @@ -1296,12 +1298,13 @@ static void virtio_net_commit_rss_config(VirtIONet *n) } } - trace_virtio_net_rss_enable(n->rss_data.hash_types, + trace_virtio_net_rss_enable(n, + n->rss_data.hash_types, n->rss_data.indirections_len, sizeof(n->rss_data.key)); } else { - virtio_net_detach_epbf_rss(n); - trace_virtio_net_rss_disable(); + virtio_net_detach_ebpf_rss(n); + trace_virtio_net_rss_disable(n); } } @@ -1315,28 +1318,27 @@ static void virtio_net_disable_rss(VirtIONet *n) virtio_net_commit_rss_config(n); } -static bool virtio_net_load_ebpf_fds(VirtIONet *n) +static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) { int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; int ret = true; int i = 0; if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { - warn_report("Expected %d file descriptors but got %d", - EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); - return false; - } + error_setg(errp, "Expected %d file descriptors but got %d", + EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); + return false; + } for (i = 0; i < n->nr_ebpf_rss_fds; i++) { - fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], - &error_warn); + fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); if (fds[i] < 0) { ret = false; goto exit; } } - ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]); + ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); exit: if (!ret) { @@ -1348,13 +1350,16 @@ exit: return ret; } -static bool virtio_net_load_ebpf(VirtIONet *n) +static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) { bool ret = false; if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { - if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) { - ret = ebpf_rss_load(&n->ebpf_rss); + trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds); + if (n->ebpf_rss_fds) { + ret = virtio_net_load_ebpf_fds(n, errp); + } else { + ret = ebpf_rss_load(&n->ebpf_rss, errp); } } @@ -1401,17 +1406,17 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); n->rss_data.indirections_len = virtio_lduw_p(vdev, &cfg.indirection_table_mask); - n->rss_data.indirections_len++; if (!do_rss) { - n->rss_data.indirections_len = 1; + n->rss_data.indirections_len = 0; } - if (!is_power_of_2(n->rss_data.indirections_len)) { - err_msg = "Invalid size of indirection table"; + if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { + err_msg = "Too large indirection table"; err_value = n->rss_data.indirections_len; goto error; } - if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { - err_msg = "Too large indirection table"; + n->rss_data.indirections_len++; + if (!is_power_of_2(n->rss_data.indirections_len)) { + err_msg = "Invalid size of indirection table"; err_value = n->rss_data.indirections_len; goto error; } @@ -1482,7 +1487,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, virtio_net_commit_rss_config(n); return queue_pairs; error: - trace_virtio_net_rss_error(err_msg, err_value); + trace_virtio_net_rss_error(n, err_msg, err_value); virtio_net_disable_rss(n); return 0; } @@ -3760,7 +3765,23 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) net_rx_pkt_init(&n->rx_pkt); if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { - virtio_net_load_ebpf(n); + Error *err = NULL; + if (!virtio_net_load_ebpf(n, &err)) { + /* + * If user explicitly gave QEMU RSS FDs to use, then + * failing to use them must be considered a fatal + * error. If no RSS FDs were provided, QEMU is trying + * eBPF on a "best effort" basis only, so report a + * warning and allow fallback to software RSS. + */ + if (n->ebpf_rss_fds) { + error_propagate(errp, err); + } else { + warn_report("unable to load eBPF RSS: %s", + error_get_pretty(err)); + error_free(err); + } + } } } diff --git a/meson.build b/meson.build index 85594fd3f1..f7d4517521 100644 --- a/meson.build +++ b/meson.build @@ -2172,8 +2172,14 @@ endif # libxdp libxdp = not_found if not get_option('af_xdp').auto() or have_system - libxdp = dependency('libxdp', required: get_option('af_xdp'), - version: '>=1.4.0', method: 'pkg-config') + if libbpf.found() + libxdp = dependency('libxdp', required: get_option('af_xdp'), + version: '>=1.4.0', method: 'pkg-config') + else + if get_option('af_xdp').enabled() + error('libxdp requested, but libbpf is not available') + endif + endif endif # libdw diff --git a/net/colo-compare.c b/net/colo-compare.c index c4ad0ab71f..39f90c4065 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -412,8 +412,7 @@ static void colo_compare_tcp(CompareState *s, Connection *conn) * can ensure that the packet's payload is acknowledged by * primary and secondary. */ - uint32_t min_ack = conn->pack - conn->sack > 0 ? - conn->sack : conn->pack; + uint32_t min_ack = MIN(conn->pack, conn->sack); pri: if (g_queue_is_empty(&conn->primary_list)) { diff --git a/net/meson.build b/net/meson.build index e0cd71470e..bb97b4dcbe 100644 --- a/net/meson.build +++ b/net/meson.build @@ -39,7 +39,7 @@ if have_netmap system_ss.add(files('netmap.c')) endif -system_ss.add(when: libxdp, if_true: files('af-xdp.c')) +system_ss.add(when: [libxdp, libbpf], if_true: files('af-xdp.c')) if have_vhost_net_user system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) @@ -542,6 +542,10 @@ void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int qemu_get_vnet_hdr_len(NetClientState *nc) { + if (!nc) { + return 0; + } + return nc->vnet_hdr_len; } diff --git a/net/stream.c b/net/stream.c index 97e6ec6679..4de5613844 100644 --- a/net/stream.c +++ b/net/stream.c @@ -51,7 +51,7 @@ typedef struct NetStreamState { guint ioc_write_tag; SocketReadState rs; unsigned int send_index; /* number of bytes sent*/ - uint32_t reconnect; + uint32_t reconnect_ms; guint timer_tag; SocketAddress *addr; } NetStreamState; @@ -387,10 +387,9 @@ static gboolean net_stream_reconnect(gpointer data) static void net_stream_arm_reconnect(NetStreamState *s) { - if (s->reconnect && s->timer_tag == 0) { + if (s->reconnect_ms && s->timer_tag == 0) { qemu_set_info_str(&s->nc, "connecting"); - s->timer_tag = g_timeout_add_seconds(s->reconnect, - net_stream_reconnect, s); + s->timer_tag = g_timeout_add(s->reconnect_ms, net_stream_reconnect, s); } } @@ -398,7 +397,7 @@ static int net_stream_client_init(NetClientState *peer, const char *model, const char *name, SocketAddress *addr, - uint32_t reconnect, + uint32_t reconnect_ms, Error **errp) { NetStreamState *s; @@ -412,8 +411,8 @@ static int net_stream_client_init(NetClientState *peer, s->ioc = QIO_CHANNEL(sioc); s->nc.link_down = true; - s->reconnect = reconnect; - if (reconnect) { + s->reconnect_ms = reconnect_ms; + if (reconnect_ms) { s->addr = QAPI_CLONE(SocketAddress, addr); } qio_channel_socket_connect_async(sioc, addr, @@ -432,13 +431,24 @@ int net_init_stream(const Netdev *netdev, const char *name, sock = &netdev->u.stream; if (!sock->has_server || !sock->server) { + uint32_t reconnect_ms = 0; + + if (sock->has_reconnect && sock->has_reconnect_ms) { + error_setg(errp, "'reconnect' and 'reconnect-ms' are mutually " + "exclusive"); + return -1; + } else if (sock->has_reconnect_ms) { + reconnect_ms = sock->reconnect_ms; + } else if (sock->has_reconnect) { + reconnect_ms = sock->reconnect * 1000u; + } + return net_stream_client_init(peer, "stream", name, sock->addr, - sock->has_reconnect ? sock->reconnect : 0, - errp); + reconnect_ms, errp); } - if (sock->has_reconnect) { - error_setg(errp, "'reconnect' option is incompatible with " - "socket in server mode"); + if (sock->has_reconnect || sock->has_reconnect_ms) { + error_setg(errp, "'reconnect' and 'reconnect-ms' options are " + "incompatible with socket in server mode"); return -1; } return net_stream_server_init(peer, "stream", name, sock->addr, errp); diff --git a/net/tap-win32.c b/net/tap-win32.c index 7edbd71633..671dee970f 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -214,7 +214,7 @@ static int is_tap_win32_dev(const char *guid) for (;;) { char enum_name[256]; - char unit_string[256]; + g_autofree char *unit_string = NULL; HKEY unit_key; char component_id_string[] = "ComponentId"; char component_id[256]; @@ -239,8 +239,7 @@ static int is_tap_win32_dev(const char *guid) return FALSE; } - snprintf (unit_string, sizeof(unit_string), "%s\\%s", - ADAPTER_KEY, enum_name); + unit_string = g_strdup_printf("%s\\%s", ADAPTER_KEY, enum_name); status = RegOpenKeyEx( HKEY_LOCAL_MACHINE, @@ -315,7 +314,7 @@ static int get_device_guid( while (!stop) { char enum_name[256]; - char connection_string[256]; + g_autofree char *connection_string = NULL; HKEY connection_key; char name_data[256]; DWORD name_type; @@ -338,9 +337,7 @@ static int get_device_guid( return -1; } - snprintf(connection_string, - sizeof(connection_string), - "%s\\%s\\Connection", + connection_string = g_strdup_printf("%s\\%s\\Connection", NETWORK_CONNECTIONS_KEY, enum_name); status = RegOpenKeyEx( @@ -595,7 +592,7 @@ static void tap_win32_free_buffer(tap_win32_overlapped_t *overlapped, static int tap_win32_open(tap_win32_overlapped_t **phandle, const char *preferred_name) { - char device_path[256]; + g_autofree char *device_path = NULL; char device_guid[0x100]; int rc; HANDLE handle; @@ -617,7 +614,7 @@ static int tap_win32_open(tap_win32_overlapped_t **phandle, if (rc) return -1; - snprintf (device_path, sizeof(device_path), "%s%s%s", + device_path = g_strdup_printf("%s%s%s", USERMODEDEVICEDIR, device_guid, TAPSUFFIX); diff --git a/qapi/net.json b/qapi/net.json index 87fc0d0b28..2739a2f423 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -650,15 +650,26 @@ # attempt a reconnect after the given number of seconds. Setting # this to zero disables this function. (default: 0) (since 8.0) # +# @reconnect-ms: For a client socket, if a socket is disconnected, then +# attempt a reconnect after the given number of milliseconds. Setting +# this to zero disables this function. This member is mutually +# exclusive with @reconnect. (default: 0) (Since: 9.2) +# # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. # +# Features: +# +# @deprecated: Member @reconnect is deprecated. Use @reconnect-ms +# instead. +# # Since: 7.2 ## { 'struct': 'NetdevStreamOptions', 'data': { 'addr': 'SocketAddress', '*server': 'bool', - '*reconnect': 'uint32' } } + '*reconnect': { 'type': 'int', 'features': [ 'deprecated' ] }, + '*reconnect-ms': 'int' } } ## # @NetdevDgramOptions: diff --git a/qemu-options.hx b/qemu-options.hx index daae494147..dacc9790a4 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1102,7 +1102,7 @@ SRST external entity that provides the IPMI services. A connection is made to an external BMC simulator. If you do this, - it is strongly recommended that you use the "reconnect=" chardev + it is strongly recommended that you use the "reconnect-ms=" chardev option to reconnect to the simulator if the connection is lost. Note that if this is not used carefully, it can be a security issue, as the interface has the ability to send resets, NMIs, and power off @@ -2833,9 +2833,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" - "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" - "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" - "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect-ms=milliseconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect-ms=milliseconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect-ms=milliseconds]\n" " configure a network backend to connect to another network\n" " using a socket connection in stream mode.\n" "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" @@ -3291,7 +3291,7 @@ SRST -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\ -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4 -``-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]`` +``-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect-ms=milliseconds]`` Configure a network backend to connect to another QEMU virtual machine or a proxy using a TCP/IP socket. ``server=on|off`` @@ -3318,8 +3318,8 @@ SRST ``ipv6=on|off`` whether to accept IPv6 addresses, default to try both IPv4 and IPv6 - ``reconnect=seconds`` - for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of seconds. + ``reconnect-ms=milliseconds`` + for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of milliseconds. Setting this to zero disables this function. (default: 0) Example (two guests connected using a TCP/IP socket): @@ -3333,9 +3333,9 @@ SRST # second VM |qemu_system| linux.img \\ -device virtio-net,netdev=net0,mac=52:54:00:12:34:57 \\ - -netdev stream,id=net0,server=off,addr.type=inet,addr.host=localhost,addr.port=1234,reconnect=5 + -netdev stream,id=net0,server=off,addr.type=inet,addr.host=localhost,addr.port=1234,reconnect-ms=5000 -``-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]`` +``-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect-ms=milliseconds]`` Configure a network backend to connect to another QEMU virtual machine or a proxy using a stream oriented unix domain socket. ``server=on|off`` @@ -3350,8 +3350,8 @@ SRST ``tight=on|off`` if false, pad an abstract socket address with enough null bytes to make it fill struct sockaddr_un member sun_path. - ``reconnect=seconds`` - for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of seconds. + ``reconnect-ms=milliseconds`` + for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of milliseconds. Setting this to zero disables this function. (default: 0) Example (using passt as a replacement of -netdev user): @@ -3377,9 +3377,9 @@ SRST # second VM |qemu_system| linux.img \\ -device virtio-net,netdev=net0,mac=52:54:00:12:34:57 \\ - -netdev stream,id=net0,server=off,addr.type=unix,addr.path=/tmp/qemu0,reconnect=5 + -netdev stream,id=net0,server=off,addr.type=unix,addr.path=/tmp/qemu0,reconnect-ms=5000 -``-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]`` +``-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect-ms=milliseconds]`` Configure a network backend to connect to another QEMU virtual machine or a proxy using a stream oriented socket file descriptor. ``server=on|off`` @@ -3388,8 +3388,8 @@ SRST ``addr.str=file-descriptor`` file descriptor number to use as a socket - ``reconnect=seconds`` - for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of seconds. + ``reconnect-ms=milliseconds`` + for a client socket, if a socket is disconnected, then attempt a reconnect after the given number of milliseconds. Setting this to zero disables this function. (default: 0) ``-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]`` @@ -3679,9 +3679,9 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, "-chardev help\n" "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4=on|off][,ipv6=on|off][,nodelay=on|off]\n" - " [,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,mux=on|off]\n" + " [,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect-ms=milliseconds][,mux=on|off]\n" " [,logfile=PATH][,logappend=on|off][,tls-creds=ID][,tls-authz=ID] (tcp)\n" - "-chardev socket,id=id,path=path[,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds]\n" + "-chardev socket,id=id,path=path[,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect-ms=milliseconds]\n" " [,mux=on|off][,logfile=PATH][,logappend=on|off][,abstract=on|off][,tight=on|off] (unix)\n" "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n" " [,localport=localport][,ipv4=on|off][,ipv6=on|off][,mux=on|off]\n" @@ -3792,7 +3792,7 @@ The available backends are: A void device. This device will not emit any data, and will drop any data it receives. The null backend does not take any options. -``-chardev socket,id=id[,TCP options or unix options][,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,tls-creds=id][,tls-authz=id]`` +``-chardev socket,id=id[,TCP options or unix options][,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect-ms=milliseconds][,tls-creds=id][,tls-authz=id]`` Create a two-way stream socket, which can be either a TCP or a unix socket. A unix socket will be created if ``path`` is specified. Behaviour is undefined if TCP options are specified for a unix @@ -3809,9 +3809,9 @@ The available backends are: ``websocket=on|off`` specifies that the socket uses WebSocket protocol for communication. - ``reconnect`` sets the timeout for reconnecting on non-server + ``reconnect-ms`` sets the timeout for reconnecting on non-server sockets when the remote end goes away. qemu will delay this many - seconds and then attempt to reconnect. Zero disables reconnecting, + milliseconds and then attempt to reconnect. Zero disables reconnecting, and is the default. ``tls-creds`` requests enablement of the TLS protocol for @@ -4390,14 +4390,14 @@ SRST ``telnet options:`` localhost 5555 - ``tcp:[host]:port[,server=on|off][,wait=on|off][,nodelay=on|off][,reconnect=seconds]`` + ``tcp:[host]:port[,server=on|off][,wait=on|off][,nodelay=on|off][,reconnect-ms=milliseconds]`` The TCP Net Console has two modes of operation. It can send the serial I/O to a location or wait for a connection from a location. By default the TCP Net Console is sent to host at the port. If you use the ``server=on`` option QEMU will wait for a client socket application to connect to the port before continuing, unless the ``wait=on|off`` option was specified. The ``nodelay=on|off`` - option disables the Nagle buffering algorithm. The ``reconnect=on`` + option disables the Nagle buffering algorithm. The ``reconnect-ms`` option only applies if ``server=no`` is set, if the connection goes down it will attempt to reconnect at the given interval. If host is omitted, 0.0.0.0 is assumed. Only one TCP connection at a @@ -4427,7 +4427,7 @@ SRST The WebSocket protocol is used instead of raw tcp socket. The port acts as a WebSocket server. Client mode is not supported. - ``unix:path[,server=on|off][,wait=on|off][,reconnect=seconds]`` + ``unix:path[,server=on|off][,wait=on|off][,reconnect-ms=milliseconds]`` A unix domain socket is used instead of a tcp socket. The option works the same as if you had specified ``-serial tcp`` except the unix domain socket path is used for connections. diff --git a/tests/qtest/ipmi-bt-test.c b/tests/qtest/ipmi-bt-test.c index 13f7c841f5..637732fd5a 100644 --- a/tests/qtest/ipmi-bt-test.c +++ b/tests/qtest/ipmi-bt-test.c @@ -411,7 +411,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); global_qtest = qtest_initf( - " -chardev socket,id=ipmi0,host=127.0.0.1,port=%d,reconnect=10" + " -chardev socket,id=ipmi0,host=127.0.0.1,port=%d,reconnect-ms=10000" " -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0" " -device isa-ipmi-bt,bmc=bmc0", emu_port); qtest_irq_intercept_in(global_qtest, "ioapic"); diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c index fc7d11961e..317af03817 100644 --- a/tests/qtest/netdev-socket.c +++ b/tests/qtest/netdev-socket.c @@ -204,7 +204,7 @@ static void test_stream_unix_reconnect(void) qts1 = qtest_initf("-nodefaults -M none " "-netdev stream,server=false,id=st0,addr.type=unix," - "addr.path=%s,reconnect=1", path); + "addr.path=%s,reconnect-ms=1000", path); wait_stream_connected(qts0, "st0", &addr); g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_UNIX); diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index d6075001e7..8948fb81ef 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -920,7 +920,7 @@ static void wait_for_rings_started(TestServer *s, size_t count) static inline void test_server_connect(TestServer *server) { - test_server_create_chr(server, ",reconnect=1"); + test_server_create_chr(server, ",reconnect-ms=1000"); } static gboolean diff --git a/tests/unit/test-char.c b/tests/unit/test-char.c index a1c6bb874c..a6e8753e1c 100644 --- a/tests/unit/test-char.c +++ b/tests/unit/test-char.c @@ -1545,18 +1545,18 @@ int main(int argc, char **argv) static CharSocketClientTestConfig client2 ## name = \ { addr, NULL, true, false, char_socket_event }; \ static CharSocketClientTestConfig client3 ## name = \ - { addr, ",reconnect=1", false, false, char_socket_event }; \ + { addr, ",reconnect-ms=1000", false, false, char_socket_event }; \ static CharSocketClientTestConfig client4 ## name = \ - { addr, ",reconnect=1", true, false, char_socket_event }; \ + { addr, ",reconnect-ms=1000", true, false, char_socket_event }; \ static CharSocketClientTestConfig client5 ## name = \ { addr, NULL, false, true, char_socket_event }; \ static CharSocketClientTestConfig client6 ## name = \ { addr, NULL, true, true, char_socket_event }; \ static CharSocketClientTestConfig client7 ## name = \ - { addr, ",reconnect=1", true, false, \ + { addr, ",reconnect-ms=1000", true, false, \ char_socket_event_with_error }; \ static CharSocketClientTestConfig client8 ## name = \ - { addr, ",reconnect=1", false, false, char_socket_event }; \ + { addr, ",reconnect-ms=1000", false, false, char_socket_event };\ g_test_add_data_func("/char/socket/client/mainloop/" # name, \ &client1 ##name, char_socket_client_test); \ g_test_add_data_func("/char/socket/client/wait-conn/" # name, \ |