aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-07-16 13:12:05 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-07-16 13:12:05 +0100
commitee5128bb00f90dd301991d80d1db5224ce924c84 (patch)
tree334afadd110954829ca0d7ac09b83c8f3b0d4d9e
parent8746309137ba470d1b2e8f5ce86ac228625db940 (diff)
parenta134321ef676723768973537bb9b49365ae2062e (diff)
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
# gpg: Signature made Wed 15 Jul 2020 14:49:07 BST # gpg: using RSA key EF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal] # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * remotes/jasowang/tags/net-pull-request: ftgmac100: fix dblac write test net: detect errors from probing vnet hdr flag for TAP devices net: check if the file descriptor is valid before using it qemu-options.hx: Clean up and fix typo for colo-compare net/colo-compare.c: Expose compare "max_queue_size" to users hw/net: Added CSO for IPv6 virtio-net: fix removal of failover device Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/net/ftgmac100.c14
-rw-r--r--hw/net/net_tx_pkt.c15
-rw-r--r--hw/net/virtio-net.c1
-rw-r--r--include/qemu/sockets.h1
-rw-r--r--net/colo-compare.c43
-rw-r--r--net/socket.c9
-rw-r--r--net/tap-bsd.c2
-rw-r--r--net/tap-linux.c8
-rw-r--r--net/tap-solaris.c2
-rw-r--r--net/tap-stub.c2
-rw-r--r--net/tap.c50
-rw-r--r--net/tap_int.h2
-rw-r--r--qemu-options.hx33
-rw-r--r--util/oslib-posix.c26
-rw-r--r--util/oslib-win32.c57
15 files changed, 188 insertions, 77 deletions
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 043ba61b86..5f4b26fc5f 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -810,16 +810,18 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
s->phydata = value & 0xffff;
break;
case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
- if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
+ if (FTGMAC100_DBLAC_TXDES_SIZE(value) < sizeof(FTGMAC100Desc)) {
qemu_log_mask(LOG_GUEST_ERROR,
- "%s: transmit descriptor too small : %d bytes\n",
- __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac));
+ "%s: transmit descriptor too small: %" PRIx64
+ " bytes\n", __func__,
+ FTGMAC100_DBLAC_TXDES_SIZE(value));
break;
}
- if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
+ if (FTGMAC100_DBLAC_RXDES_SIZE(value) < sizeof(FTGMAC100Desc)) {
qemu_log_mask(LOG_GUEST_ERROR,
- "%s: receive descriptor too small : %d bytes\n",
- __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac));
+ "%s: receive descriptor too small : %" PRIx64
+ " bytes\n", __func__,
+ FTGMAC100_DBLAC_RXDES_SIZE(value));
break;
}
s->dblac = value;
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index 162f802dd7..331c73cfc0 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -468,8 +468,8 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
/* num of iovec without vhdr */
uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1;
uint16_t csl;
- struct ip_header *iphdr;
size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
+ uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len);
/* Put zero to checksum field */
iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
@@ -477,9 +477,18 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
/* Calculate L4 TCP/UDP checksum */
csl = pkt->payload_len;
+ csum_cntr = 0;
+ cso = 0;
/* add pseudo header to csum */
- iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
- csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso);
+ if (l3_proto == ETH_P_IP) {
+ csum_cntr = eth_calc_ip4_pseudo_hdr_csum(
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
+ csl, &cso);
+ } else if (l3_proto == ETH_P_IPV6) {
+ csum_cntr = eth_calc_ip6_pseudo_hdr_csum(
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
+ csl, pkt->l4proto, &cso);
+ }
/* data checksum */
csum_cntr +=
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 10cc958396..4895af1cbe 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3416,6 +3416,7 @@ static void virtio_net_device_unrealize(DeviceState *dev)
g_free(n->vlans);
if (n->failover) {
+ device_listener_unregister(&n->primary_listener);
g_free(n->primary_device_id);
g_free(n->standby_id);
qobject_unref(n->primary_device_dict);
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 57cd049d6e..7d1f813576 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -18,6 +18,7 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
int socket_set_cork(int fd, int v);
int socket_set_nodelay(int fd);
void qemu_set_block(int fd);
+int qemu_try_set_nonblock(int fd);
void qemu_set_nonblock(int fd);
int socket_set_fast_reuse(int fd);
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 398b7546ff..cc15f23dea 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -59,6 +59,7 @@ static bool colo_compare_active;
static QemuMutex event_mtx;
static QemuCond event_complete_cond;
static int event_unhandled_count;
+static uint32_t max_queue_size;
/*
* + CompareState ++
@@ -222,7 +223,7 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
*/
static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
{
- if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
+ if (g_queue_get_length(queue) <= max_queue_size) {
if (pkt->ip->ip_p == IPPROTO_TCP) {
fill_pkt_tcp_info(pkt, max_ack);
g_queue_insert_sorted(queue,
@@ -1134,6 +1135,37 @@ static void compare_set_expired_scan_cycle(Object *obj, Visitor *v,
s->expired_scan_cycle = value;
}
+static void get_max_queue_size(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ uint32_t value = max_queue_size;
+
+ visit_type_uint32(v, name, &value, errp);
+}
+
+static void set_max_queue_size(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ uint32_t value;
+
+ visit_type_uint32(v, name, &value, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ if (!value) {
+ error_setg(&local_err, "Property '%s.%s' requires a positive value",
+ object_get_typename(obj), name);
+ goto out;
+ }
+ max_queue_size = value;
+
+out:
+ error_propagate(errp, local_err);
+}
+
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
{
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
@@ -1251,6 +1283,11 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS;
}
+ if (!max_queue_size) {
+ /* Set default queue size to 1024 */
+ max_queue_size = MAX_QUEUE_SIZE;
+ }
+
if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
!qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
return;
@@ -1370,6 +1407,10 @@ static void colo_compare_init(Object *obj)
compare_get_expired_scan_cycle,
compare_set_expired_scan_cycle, NULL, NULL);
+ object_property_add(obj, "max_queue_size", "uint32",
+ get_max_queue_size,
+ set_max_queue_size, NULL, NULL);
+
s->vnet_hdr = false;
object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
compare_set_vnet_hdr);
diff --git a/net/socket.c b/net/socket.c
index c92354049b..2d21fddd9c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -725,13 +725,18 @@ int net_init_socket(const Netdev *netdev, const char *name,
}
if (sock->has_fd) {
- int fd;
+ int fd, ret;
fd = monitor_fd_param(cur_mon, sock->fd, errp);
if (fd == -1) {
return -1;
}
- qemu_set_nonblock(fd);
+ ret = qemu_try_set_nonblock(fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
+ name, fd);
+ return -1;
+ }
if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast,
errp)) {
return -1;
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index a5c3707f80..77aaf674b1 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -211,7 +211,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}
-int tap_probe_vnet_hdr(int fd)
+int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index e0dd442ee3..b0635e9e32 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -147,13 +147,15 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
}
}
-int tap_probe_vnet_hdr(int fd)
+int tap_probe_vnet_hdr(int fd, Error **errp)
{
struct ifreq ifr;
if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
- error_report("TUNGETIFF ioctl() failed: %s", strerror(errno));
- return 0;
+ /* TUNGETIFF is available since kernel v2.6.27 */
+ error_setg_errno(errp, errno,
+ "Unable to query TUNGETIFF on FD %d", fd);
+ return -1;
}
return ifr.ifr_flags & IFF_VNET_HDR;
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index d03165c57c..0475a58207 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -207,7 +207,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}
-int tap_probe_vnet_hdr(int fd)
+int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}
diff --git a/net/tap-stub.c b/net/tap-stub.c
index a9ab8f8293..de525a2e69 100644
--- a/net/tap-stub.c
+++ b/net/tap-stub.c
@@ -37,7 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}
-int tap_probe_vnet_hdr(int fd)
+int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}
diff --git a/net/tap.c b/net/tap.c
index f9dcc2ef51..14dc904fca 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -598,7 +598,11 @@ int net_init_bridge(const Netdev *netdev, const char *name,
}
qemu_set_nonblock(fd);
- vnet_hdr = tap_probe_vnet_hdr(fd);
+ vnet_hdr = tap_probe_vnet_hdr(fd, errp);
+ if (vnet_hdr < 0) {
+ close(fd);
+ return -1;
+ }
s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
@@ -690,6 +694,8 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
if (vhostfdname) {
+ int ret;
+
vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
if (vhostfd == -1) {
if (tap->has_vhostforce && tap->vhostforce) {
@@ -699,7 +705,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
return;
}
- qemu_set_nonblock(vhostfd);
+ ret = qemu_try_set_nonblock(vhostfd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
+ name, fd);
+ return;
+ }
} else {
vhostfd = open("/dev/vhost-net", O_RDWR);
if (vhostfd < 0) {
@@ -767,6 +778,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
Error *err = NULL;
const char *vhostfdname;
char ifname[128];
+ int ret = 0;
assert(netdev->type == NET_CLIENT_DRIVER_TAP);
tap = &netdev->u.tap;
@@ -795,9 +807,18 @@ int net_init_tap(const Netdev *netdev, const char *name,
return -1;
}
- qemu_set_nonblock(fd);
+ ret = qemu_try_set_nonblock(fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
+ name, fd);
+ return -1;
+ }
- vnet_hdr = tap_probe_vnet_hdr(fd);
+ vnet_hdr = tap_probe_vnet_hdr(fd, errp);
+ if (vnet_hdr < 0) {
+ close(fd);
+ return -1;
+ }
net_init_tap_one(tap, peer, "tap", name, NULL,
script, downscript,
@@ -810,7 +831,6 @@ int net_init_tap(const Netdev *netdev, const char *name,
char **fds;
char **vhost_fds;
int nfds = 0, nvhosts = 0;
- int ret = 0;
if (tap->has_ifname || tap->has_script || tap->has_downscript ||
tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
@@ -842,11 +862,19 @@ int net_init_tap(const Netdev *netdev, const char *name,
goto free_fail;
}
- qemu_set_nonblock(fd);
+ ret = qemu_try_set_nonblock(fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
+ name, fd);
+ goto free_fail;
+ }
if (i == 0) {
- vnet_hdr = tap_probe_vnet_hdr(fd);
- } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
+ vnet_hdr = tap_probe_vnet_hdr(fd, errp);
+ if (vnet_hdr < 0) {
+ goto free_fail;
+ }
+ } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
error_setg(errp,
"vnet_hdr not consistent across given tap fds");
ret = -1;
@@ -891,7 +919,11 @@ free_fail:
}
qemu_set_nonblock(fd);
- vnet_hdr = tap_probe_vnet_hdr(fd);
+ vnet_hdr = tap_probe_vnet_hdr(fd, errp);
+ if (vnet_hdr < 0) {
+ close(fd);
+ return -1;
+ }
net_init_tap_one(tap, peer, "bridge", name, ifname,
script, downscript, vhostfdname,
diff --git a/net/tap_int.h b/net/tap_int.h
index e3194b23f4..225a49ea48 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -34,7 +34,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
-int tap_probe_vnet_hdr(int fd);
+int tap_probe_vnet_hdr(int fd, Error **errp);
int tap_probe_vnet_hdr_len(int fd, int len);
int tap_probe_has_ufo(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
diff --git a/qemu-options.hx b/qemu-options.hx
index d2c1e95bcf..65147ad971 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4695,24 +4695,25 @@ SRST
stored. The file format is libpcap, so it can be analyzed with
tools such as tcpdump or Wireshark.
- ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}``
- Colo-compare gets packet from primary\_inchardevid and
- secondary\_inchardevid, than compare primary packet with
- secondary packet. If the packets are same, we will output
- primary packet to outdevchardevid, else we will notify
- colo-frame do checkpoint and send primary packet to
- outdevchardevid. In order to improve efficiency, we need to put
- the task of comparison in another thread. If it has the
- vnet\_hdr\_support flag, colo compare will send/recv packet with
- vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the
- maximum delay colo-compare wait for the packet.
- The expired\_scan\_cycle=@var{ms} to set the period of scanning
- expired primary node network packets.
- If you want to use Xen COLO, will need the notify\_dev to
+ ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}][,max_queue_size=@var{size}]``
+ Colo-compare gets packet from primary\_in chardevid and
+ secondary\_in, then compare whether the payload of primary packet
+ and secondary packet are the same. If same, it will output
+ primary packet to out\_dev, else it will notify COLO-framework to do
+ checkpoint and send primary packet to out\_dev. In order to
+ improve efficiency, we need to put the task of comparison in
+ another iothread. If it has the vnet\_hdr\_support flag,
+ colo compare will send/recv packet with vnet\_hdr\_len.
+ The compare\_timeout=@var{ms} determines the maximum time of the
+ colo-compare hold the packet. The expired\_scan\_cycle=@var{ms}
+ is to set the period of scanning expired primary node network packets.
+ The max\_queue\_size=@var{size} is to set the max compare queue
+ size depend on user environment.
+ If user want to use Xen COLO, need to add the notify\_dev to
notify Xen colo-frame to do checkpoint.
- we must use it with the help of filter-mirror and
- filter-redirector.
+ COLO-compare must be used with the help of filter-mirror,
+ filter-redirector and filter-rewriter.
::
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index e60aea85b6..36bf8593f8 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -260,25 +260,35 @@ void qemu_set_block(int fd)
assert(f != -1);
}
-void qemu_set_nonblock(int fd)
+int qemu_try_set_nonblock(int fd)
{
int f;
f = fcntl(fd, F_GETFL);
- assert(f != -1);
- f = fcntl(fd, F_SETFL, f | O_NONBLOCK);
-#ifdef __OpenBSD__
if (f == -1) {
+ return -errno;
+ }
+ if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) {
+#ifdef __OpenBSD__
/*
* Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on
* memory devices and sets errno to ENODEV.
* It's OK if we fail to set O_NONBLOCK on devices like /dev/null,
* because they will never block anyway.
*/
- assert(errno == ENODEV);
- }
-#else
- assert(f != -1);
+ if (errno == ENODEV) {
+ return 0;
+ }
#endif
+ return -errno;
+ }
+ return 0;
+}
+
+void qemu_set_nonblock(int fd)
+{
+ int f;
+ f = qemu_try_set_nonblock(fd);
+ assert(f == 0);
}
int socket_set_fast_reuse(int fd)
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 3b49d27297..7eedbe5859 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -132,31 +132,6 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
}
#endif /* CONFIG_LOCALTIME_R */
-void qemu_set_block(int fd)
-{
- unsigned long opt = 0;
- WSAEventSelect(fd, NULL, 0);
- ioctlsocket(fd, FIONBIO, &opt);
-}
-
-void qemu_set_nonblock(int fd)
-{
- unsigned long opt = 1;
- ioctlsocket(fd, FIONBIO, &opt);
- qemu_fd_register(fd);
-}
-
-int socket_set_fast_reuse(int fd)
-{
- /* Enabling the reuse of an endpoint that was used by a socket still in
- * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
- * fast reuse is the default and SO_REUSEADDR does strange things. So we
- * don't have to do anything here. More info can be found at:
- * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
- return 0;
-}
-
-
static int socket_error(void)
{
switch (WSAGetLastError()) {
@@ -233,6 +208,38 @@ static int socket_error(void)
}
}
+void qemu_set_block(int fd)
+{
+ unsigned long opt = 0;
+ WSAEventSelect(fd, NULL, 0);
+ ioctlsocket(fd, FIONBIO, &opt);
+}
+
+int qemu_try_set_nonblock(int fd)
+{
+ unsigned long opt = 1;
+ if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) {
+ return -socket_error();
+ }
+ qemu_fd_register(fd);
+ return 0;
+}
+
+void qemu_set_nonblock(int fd)
+{
+ (void)qemu_try_set_nonblock(fd);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+ /* Enabling the reuse of an endpoint that was used by a socket still in
+ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
+ * fast reuse is the default and SO_REUSEADDR does strange things. So we
+ * don't have to do anything here. More info can be found at:
+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
+ return 0;
+}
+
int inet_aton(const char *cp, struct in_addr *ia)
{
uint32_t addr = inet_addr(cp);