aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/announce.c140
-rw-r--r--net/colo-compare.c8
-rw-r--r--net/netmap.c110
-rw-r--r--net/trace-events3
5 files changed, 191 insertions, 71 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs
index df2b409066..8262f033b9 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
common-obj-y += socket.o
common-obj-y += dump.o
common-obj-y += eth.o
+common-obj-y += announce.o
common-obj-$(CONFIG_L2TPV3) += l2tpv3.o
common-obj-$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET_USER)) += vhost-user.o
common-obj-$(call land,$(call lnot,$(CONFIG_VIRTIO_NET)),$(CONFIG_VHOST_NET_USER)) += vhost-user-stub.o
diff --git a/net/announce.c b/net/announce.c
new file mode 100644
index 0000000000..91e9a6e267
--- /dev/null
+++ b/net/announce.c
@@ -0,0 +1,140 @@
+/*
+ * Self-announce
+ * (c) 2017-2019 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "net/announce.h"
+#include "net/net.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-net.h"
+#include "qapi/qapi-commands-net.h"
+#include "trace.h"
+
+int64_t qemu_announce_timer_step(AnnounceTimer *timer)
+{
+ int64_t step;
+
+ step = timer->params.initial +
+ (timer->params.rounds - timer->round - 1) *
+ timer->params.step;
+
+ if (step < 0 || step > timer->params.max) {
+ step = timer->params.max;
+ }
+ timer_mod(timer->tm, qemu_clock_get_ms(timer->type) + step);
+
+ return step;
+}
+
+void qemu_announce_timer_del(AnnounceTimer *timer)
+{
+ if (timer->tm) {
+ timer_del(timer->tm);
+ timer_free(timer->tm);
+ timer->tm = NULL;
+ }
+}
+
+/*
+ * Under BQL/main thread
+ * Reset the timer to the given parameters/type/notifier.
+ */
+void qemu_announce_timer_reset(AnnounceTimer *timer,
+ AnnounceParameters *params,
+ QEMUClockType type,
+ QEMUTimerCB *cb,
+ void *opaque)
+{
+ /*
+ * We're under the BQL, so the current timer can't
+ * be firing, so we should be able to delete it.
+ */
+ qemu_announce_timer_del(timer);
+
+ QAPI_CLONE_MEMBERS(AnnounceParameters, &timer->params, params);
+ timer->round = params->rounds;
+ timer->type = type;
+ timer->tm = timer_new_ms(type, cb, opaque);
+}
+
+#ifndef ETH_P_RARP
+#define ETH_P_RARP 0x8035
+#endif
+#define ARP_HTYPE_ETH 0x0001
+#define ARP_PTYPE_IP 0x0800
+#define ARP_OP_REQUEST_REV 0x3
+
+static int announce_self_create(uint8_t *buf,
+ uint8_t *mac_addr)
+{
+ /* Ethernet header. */
+ memset(buf, 0xff, 6); /* destination MAC addr */
+ memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
+ *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
+
+ /* RARP header. */
+ *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
+ *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
+ *(buf + 18) = 6; /* hardware addr length (ethernet) */
+ *(buf + 19) = 4; /* protocol addr length (IPv4) */
+ *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
+ memcpy(buf + 22, mac_addr, 6); /* source hw addr */
+ memset(buf + 28, 0x00, 4); /* source protocol addr */
+ memcpy(buf + 32, mac_addr, 6); /* target hw addr */
+ memset(buf + 38, 0x00, 4); /* target protocol addr */
+
+ /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
+ memset(buf + 42, 0x00, 18);
+
+ return 60; /* len (FCS will be added by hardware) */
+}
+
+static void qemu_announce_self_iter(NICState *nic, void *opaque)
+{
+ uint8_t buf[60];
+ int len;
+
+ trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
+ len = announce_self_create(buf, nic->conf->macaddr.a);
+
+ qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
+
+ /* if the NIC provides it's own announcement support, use it as well */
+ if (nic->ncs->info->announce) {
+ nic->ncs->info->announce(nic->ncs);
+ }
+}
+static void qemu_announce_self_once(void *opaque)
+{
+ AnnounceTimer *timer = (AnnounceTimer *)opaque;
+
+ qemu_foreach_nic(qemu_announce_self_iter, NULL);
+
+ if (--timer->round) {
+ qemu_announce_timer_step(timer);
+ } else {
+ qemu_announce_timer_del(timer);
+ }
+}
+
+void qemu_announce_self(AnnounceTimer *timer, AnnounceParameters *params)
+{
+ qemu_announce_timer_reset(timer, params, QEMU_CLOCK_REALTIME,
+ qemu_announce_self_once, timer);
+ if (params->rounds) {
+ qemu_announce_self_once(timer);
+ } else {
+ qemu_announce_timer_del(timer);
+ }
+}
+
+void qmp_announce_self(AnnounceParameters *params, Error **errp)
+{
+ static AnnounceTimer announce_timer;
+ qemu_announce_self(&announce_timer, params);
+}
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 3e515f3023..bf10526f05 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -294,14 +294,6 @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
return true;
}
}
- if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
- if (colo_compare_packet_payload(ppkt, spkt,
- ppkt->header_size, spkt->header_size,
- ppkt->payload_size)) {
- *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
- return true;
- }
- }
/* one part of secondary packet payload still need to be compared */
if (!after(ppkt->seq_end, spkt->seq_end)) {
diff --git a/net/netmap.c b/net/netmap.c
index 2d11a8f4be..0cc8f545c5 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -154,65 +154,27 @@ static void netmap_writable(void *opaque)
qemu_flush_queued_packets(&s->nc);
}
-static ssize_t netmap_receive(NetClientState *nc,
- const uint8_t *buf, size_t size)
-{
- NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
- struct netmap_ring *ring = s->tx;
- uint32_t i;
- uint32_t idx;
- uint8_t *dst;
-
- if (unlikely(!ring)) {
- /* Drop. */
- return size;
- }
-
- if (unlikely(size > ring->nr_buf_size)) {
- RD(5, "[netmap_receive] drop packet of size %d > %d\n",
- (int)size, ring->nr_buf_size);
- return size;
- }
-
- if (nm_ring_empty(ring)) {
- /* No available slots in the netmap TX ring. */
- netmap_write_poll(s, true);
- return 0;
- }
-
- i = ring->cur;
- idx = ring->slot[i].buf_idx;
- dst = (uint8_t *)NETMAP_BUF(ring, idx);
-
- ring->slot[i].len = size;
- ring->slot[i].flags = 0;
- pkt_copy(buf, dst, size);
- ring->cur = ring->head = nm_ring_next(ring, i);
- ioctl(s->nmd->fd, NIOCTXSYNC, NULL);
-
- return size;
-}
-
static ssize_t netmap_receive_iov(NetClientState *nc,
const struct iovec *iov, int iovcnt)
{
NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
struct netmap_ring *ring = s->tx;
+ unsigned int tail = ring->tail;
+ ssize_t totlen = 0;
uint32_t last;
uint32_t idx;
uint8_t *dst;
int j;
uint32_t i;
- if (unlikely(!ring)) {
- /* Drop the packet. */
- return iov_size(iov, iovcnt);
- }
-
- last = i = ring->cur;
+ last = i = ring->head;
if (nm_ring_space(ring) < iovcnt) {
- /* Not enough netmap slots. */
+ /* Not enough netmap slots. Tell the kernel that we have seen the new
+ * available slots (so that it notifies us again when it has more
+ * ones), but without publishing any new slots to be processed
+ * (e.g., we don't advance ring->head). */
+ ring->cur = tail;
netmap_write_poll(s, true);
return 0;
}
@@ -222,14 +184,17 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
int offset = 0;
int nm_frag_size;
+ totlen += iov_frag_size;
+
/* Split each iovec fragment over more netmap slots, if
necessary. */
while (iov_frag_size) {
nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
- if (unlikely(nm_ring_empty(ring))) {
- /* We run out of netmap slots while splitting the
+ if (unlikely(i == tail)) {
+ /* We ran out of netmap slots while splitting the
iovec fragments. */
+ ring->cur = tail;
netmap_write_poll(s, true);
return 0;
}
@@ -251,12 +216,24 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
/* The last slot must not have NS_MOREFRAG set. */
ring->slot[last].flags &= ~NS_MOREFRAG;
- /* Now update ring->cur and ring->head. */
- ring->cur = ring->head = i;
+ /* Now update ring->head and ring->cur to publish the new slots and
+ * the new wakeup point. */
+ ring->head = ring->cur = i;
ioctl(s->nmd->fd, NIOCTXSYNC, NULL);
- return iov_size(iov, iovcnt);
+ return totlen;
+}
+
+static ssize_t netmap_receive(NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = size;
+
+ return netmap_receive_iov(nc, &iov, 1);
}
/* Complete a previous send (backend --> guest) and enable the
@@ -272,39 +249,46 @@ static void netmap_send(void *opaque)
{
NetmapState *s = opaque;
struct netmap_ring *ring = s->rx;
+ unsigned int tail = ring->tail;
- /* Keep sending while there are available packets into the netmap
+ /* Keep sending while there are available slots in the netmap
RX ring and the forwarding path towards the peer is open. */
- while (!nm_ring_empty(ring)) {
- uint32_t i;
+ while (ring->head != tail) {
+ uint32_t i = ring->head;
uint32_t idx;
bool morefrag;
int iovcnt = 0;
int iovsize;
+ /* Get a (possibly multi-slot) packet. */
do {
- i = ring->cur;
idx = ring->slot[i].buf_idx;
morefrag = (ring->slot[i].flags & NS_MOREFRAG);
- s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx);
+ s->iov[iovcnt].iov_base = (void *)NETMAP_BUF(ring, idx);
s->iov[iovcnt].iov_len = ring->slot[i].len;
iovcnt++;
+ i = nm_ring_next(ring, i);
+ } while (i != tail && morefrag);
- ring->cur = ring->head = nm_ring_next(ring, i);
- } while (!nm_ring_empty(ring) && morefrag);
+ /* Advance ring->cur to tell the kernel that we have seen the slots. */
+ ring->cur = i;
- if (unlikely(nm_ring_empty(ring) && morefrag)) {
- RD(5, "[netmap_send] ran out of slots, with a pending"
- "incomplete packet\n");
+ if (unlikely(morefrag)) {
+ /* This is a truncated packet, so we can stop without releasing the
+ * incomplete slots by updating ring->head. We will hopefully
+ * re-read the complete packet the next time we are called. */
+ break;
}
iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt,
netmap_send_completed);
+ /* Release the slots to the kernel. */
+ ring->head = i;
+
if (iovsize == 0) {
/* The peer does not receive anymore. Packet is queued, stop
- * reading from the backend until netmap_send_completed()
- */
+ * reading from the backend until netmap_send_completed(). */
netmap_read_poll(s, false);
break;
}
diff --git a/net/trace-events b/net/trace-events
index 7b594cfdd2..3417ac05b0 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -1,5 +1,8 @@
# See docs/devel/tracing.txt for syntax documentation.
+# net/announce.c
+qemu_announce_self_iter(const char *mac) "%s"
+
# net/vhost-user.c
vhost_user_event(const char *chr, int event) "chr: %s got event: %d"