diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/Makefile.objs | 1 | ||||
-rw-r--r-- | net/announce.c | 140 | ||||
-rw-r--r-- | net/colo-compare.c | 8 | ||||
-rw-r--r-- | net/netmap.c | 110 | ||||
-rw-r--r-- | net/trace-events | 3 |
5 files changed, 191 insertions, 71 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs index df2b409066..8262f033b9 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o common-obj-y += socket.o common-obj-y += dump.o common-obj-y += eth.o +common-obj-y += announce.o common-obj-$(CONFIG_L2TPV3) += l2tpv3.o common-obj-$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET_USER)) += vhost-user.o common-obj-$(call land,$(call lnot,$(CONFIG_VIRTIO_NET)),$(CONFIG_VHOST_NET_USER)) += vhost-user-stub.o diff --git a/net/announce.c b/net/announce.c new file mode 100644 index 0000000000..91e9a6e267 --- /dev/null +++ b/net/announce.c @@ -0,0 +1,140 @@ +/* + * Self-announce + * (c) 2017-2019 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "net/announce.h" +#include "net/net.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-visit-net.h" +#include "qapi/qapi-commands-net.h" +#include "trace.h" + +int64_t qemu_announce_timer_step(AnnounceTimer *timer) +{ + int64_t step; + + step = timer->params.initial + + (timer->params.rounds - timer->round - 1) * + timer->params.step; + + if (step < 0 || step > timer->params.max) { + step = timer->params.max; + } + timer_mod(timer->tm, qemu_clock_get_ms(timer->type) + step); + + return step; +} + +void qemu_announce_timer_del(AnnounceTimer *timer) +{ + if (timer->tm) { + timer_del(timer->tm); + timer_free(timer->tm); + timer->tm = NULL; + } +} + +/* + * Under BQL/main thread + * Reset the timer to the given parameters/type/notifier. + */ +void qemu_announce_timer_reset(AnnounceTimer *timer, + AnnounceParameters *params, + QEMUClockType type, + QEMUTimerCB *cb, + void *opaque) +{ + /* + * We're under the BQL, so the current timer can't + * be firing, so we should be able to delete it. + */ + qemu_announce_timer_del(timer); + + QAPI_CLONE_MEMBERS(AnnounceParameters, &timer->params, params); + timer->round = params->rounds; + timer->type = type; + timer->tm = timer_new_ms(type, cb, opaque); +} + +#ifndef ETH_P_RARP +#define ETH_P_RARP 0x8035 +#endif +#define ARP_HTYPE_ETH 0x0001 +#define ARP_PTYPE_IP 0x0800 +#define ARP_OP_REQUEST_REV 0x3 + +static int announce_self_create(uint8_t *buf, + uint8_t *mac_addr) +{ + /* Ethernet header. */ + memset(buf, 0xff, 6); /* destination MAC addr */ + memcpy(buf + 6, mac_addr, 6); /* source MAC addr */ + *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */ + + /* RARP header. */ + *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */ + *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */ + *(buf + 18) = 6; /* hardware addr length (ethernet) */ + *(buf + 19) = 4; /* protocol addr length (IPv4) */ + *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */ + memcpy(buf + 22, mac_addr, 6); /* source hw addr */ + memset(buf + 28, 0x00, 4); /* source protocol addr */ + memcpy(buf + 32, mac_addr, 6); /* target hw addr */ + memset(buf + 38, 0x00, 4); /* target protocol addr */ + + /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */ + memset(buf + 42, 0x00, 18); + + return 60; /* len (FCS will be added by hardware) */ +} + +static void qemu_announce_self_iter(NICState *nic, void *opaque) +{ + uint8_t buf[60]; + int len; + + trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr)); + len = announce_self_create(buf, nic->conf->macaddr.a); + + qemu_send_packet_raw(qemu_get_queue(nic), buf, len); + + /* if the NIC provides it's own announcement support, use it as well */ + if (nic->ncs->info->announce) { + nic->ncs->info->announce(nic->ncs); + } +} +static void qemu_announce_self_once(void *opaque) +{ + AnnounceTimer *timer = (AnnounceTimer *)opaque; + + qemu_foreach_nic(qemu_announce_self_iter, NULL); + + if (--timer->round) { + qemu_announce_timer_step(timer); + } else { + qemu_announce_timer_del(timer); + } +} + +void qemu_announce_self(AnnounceTimer *timer, AnnounceParameters *params) +{ + qemu_announce_timer_reset(timer, params, QEMU_CLOCK_REALTIME, + qemu_announce_self_once, timer); + if (params->rounds) { + qemu_announce_self_once(timer); + } else { + qemu_announce_timer_del(timer); + } +} + +void qmp_announce_self(AnnounceParameters *params, Error **errp) +{ + static AnnounceTimer announce_timer; + qemu_announce_self(&announce_timer, params); +} diff --git a/net/colo-compare.c b/net/colo-compare.c index 3e515f3023..bf10526f05 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -294,14 +294,6 @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt, return true; } } - if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) { - if (colo_compare_packet_payload(ppkt, spkt, - ppkt->header_size, spkt->header_size, - ppkt->payload_size)) { - *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY; - return true; - } - } /* one part of secondary packet payload still need to be compared */ if (!after(ppkt->seq_end, spkt->seq_end)) { diff --git a/net/netmap.c b/net/netmap.c index 2d11a8f4be..0cc8f545c5 100644 --- a/net/netmap.c +++ b/net/netmap.c @@ -154,65 +154,27 @@ static void netmap_writable(void *opaque) qemu_flush_queued_packets(&s->nc); } -static ssize_t netmap_receive(NetClientState *nc, - const uint8_t *buf, size_t size) -{ - NetmapState *s = DO_UPCAST(NetmapState, nc, nc); - struct netmap_ring *ring = s->tx; - uint32_t i; - uint32_t idx; - uint8_t *dst; - - if (unlikely(!ring)) { - /* Drop. */ - return size; - } - - if (unlikely(size > ring->nr_buf_size)) { - RD(5, "[netmap_receive] drop packet of size %d > %d\n", - (int)size, ring->nr_buf_size); - return size; - } - - if (nm_ring_empty(ring)) { - /* No available slots in the netmap TX ring. */ - netmap_write_poll(s, true); - return 0; - } - - i = ring->cur; - idx = ring->slot[i].buf_idx; - dst = (uint8_t *)NETMAP_BUF(ring, idx); - - ring->slot[i].len = size; - ring->slot[i].flags = 0; - pkt_copy(buf, dst, size); - ring->cur = ring->head = nm_ring_next(ring, i); - ioctl(s->nmd->fd, NIOCTXSYNC, NULL); - - return size; -} - static ssize_t netmap_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); struct netmap_ring *ring = s->tx; + unsigned int tail = ring->tail; + ssize_t totlen = 0; uint32_t last; uint32_t idx; uint8_t *dst; int j; uint32_t i; - if (unlikely(!ring)) { - /* Drop the packet. */ - return iov_size(iov, iovcnt); - } - - last = i = ring->cur; + last = i = ring->head; if (nm_ring_space(ring) < iovcnt) { - /* Not enough netmap slots. */ + /* Not enough netmap slots. Tell the kernel that we have seen the new + * available slots (so that it notifies us again when it has more + * ones), but without publishing any new slots to be processed + * (e.g., we don't advance ring->head). */ + ring->cur = tail; netmap_write_poll(s, true); return 0; } @@ -222,14 +184,17 @@ static ssize_t netmap_receive_iov(NetClientState *nc, int offset = 0; int nm_frag_size; + totlen += iov_frag_size; + /* Split each iovec fragment over more netmap slots, if necessary. */ while (iov_frag_size) { nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); - if (unlikely(nm_ring_empty(ring))) { - /* We run out of netmap slots while splitting the + if (unlikely(i == tail)) { + /* We ran out of netmap slots while splitting the iovec fragments. */ + ring->cur = tail; netmap_write_poll(s, true); return 0; } @@ -251,12 +216,24 @@ static ssize_t netmap_receive_iov(NetClientState *nc, /* The last slot must not have NS_MOREFRAG set. */ ring->slot[last].flags &= ~NS_MOREFRAG; - /* Now update ring->cur and ring->head. */ - ring->cur = ring->head = i; + /* Now update ring->head and ring->cur to publish the new slots and + * the new wakeup point. */ + ring->head = ring->cur = i; ioctl(s->nmd->fd, NIOCTXSYNC, NULL); - return iov_size(iov, iovcnt); + return totlen; +} + +static ssize_t netmap_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + struct iovec iov; + + iov.iov_base = (void *)buf; + iov.iov_len = size; + + return netmap_receive_iov(nc, &iov, 1); } /* Complete a previous send (backend --> guest) and enable the @@ -272,39 +249,46 @@ static void netmap_send(void *opaque) { NetmapState *s = opaque; struct netmap_ring *ring = s->rx; + unsigned int tail = ring->tail; - /* Keep sending while there are available packets into the netmap + /* Keep sending while there are available slots in the netmap RX ring and the forwarding path towards the peer is open. */ - while (!nm_ring_empty(ring)) { - uint32_t i; + while (ring->head != tail) { + uint32_t i = ring->head; uint32_t idx; bool morefrag; int iovcnt = 0; int iovsize; + /* Get a (possibly multi-slot) packet. */ do { - i = ring->cur; idx = ring->slot[i].buf_idx; morefrag = (ring->slot[i].flags & NS_MOREFRAG); - s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx); + s->iov[iovcnt].iov_base = (void *)NETMAP_BUF(ring, idx); s->iov[iovcnt].iov_len = ring->slot[i].len; iovcnt++; + i = nm_ring_next(ring, i); + } while (i != tail && morefrag); - ring->cur = ring->head = nm_ring_next(ring, i); - } while (!nm_ring_empty(ring) && morefrag); + /* Advance ring->cur to tell the kernel that we have seen the slots. */ + ring->cur = i; - if (unlikely(nm_ring_empty(ring) && morefrag)) { - RD(5, "[netmap_send] ran out of slots, with a pending" - "incomplete packet\n"); + if (unlikely(morefrag)) { + /* This is a truncated packet, so we can stop without releasing the + * incomplete slots by updating ring->head. We will hopefully + * re-read the complete packet the next time we are called. */ + break; } iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, netmap_send_completed); + /* Release the slots to the kernel. */ + ring->head = i; + if (iovsize == 0) { /* The peer does not receive anymore. Packet is queued, stop - * reading from the backend until netmap_send_completed() - */ + * reading from the backend until netmap_send_completed(). */ netmap_read_poll(s, false); break; } diff --git a/net/trace-events b/net/trace-events index 7b594cfdd2..3417ac05b0 100644 --- a/net/trace-events +++ b/net/trace-events @@ -1,5 +1,8 @@ # See docs/devel/tracing.txt for syntax documentation. +# net/announce.c +qemu_announce_self_iter(const char *mac) "%s" + # net/vhost-user.c vhost_user_event(const char *chr, int event) "chr: %s got event: %d" |