aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2010-09-08 14:26:14 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2010-09-08 14:26:14 -0500
commitdccbe6fbab47c9a2589f436e0592933b47cbe40b (patch)
treee5a851b48a3801dd28282eb17533975cade7ac23
parent630c26893d6dc7713c0fcfc3c09d6bfe536a6ce3 (diff)
parenta697a334b3c4d3250e6420f5d38550ea10eb5319 (diff)
Merge remote branch 'mst/for_anthony' into staging
-rw-r--r--hw/e1000.c11
-rw-r--r--hw/s390-virtio-bus.c8
-rw-r--r--hw/s390-virtio-bus.h1
-rw-r--r--hw/syborg_virtio.c8
-rw-r--r--hw/vhost_net.c24
-rw-r--r--hw/virtio-net.c129
-rw-r--r--hw/virtio-net.h14
-rw-r--r--hw/virtio-pci.c8
-rw-r--r--hw/virtio.h4
-rw-r--r--net/tap-aix.c9
-rw-r--r--net/tap-bsd.c9
-rw-r--r--net/tap-linux.c29
-rw-r--r--net/tap-linux.h8
-rw-r--r--net/tap-solaris.c9
-rw-r--r--net/tap-win32.c9
-rw-r--r--net/tap.c49
-rw-r--r--net/tap.h4
17 files changed, 287 insertions, 46 deletions
diff --git a/hw/e1000.c b/hw/e1000.c
index 80b78bc618..7d7d14002f 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -345,7 +345,7 @@ is_vlan_txd(uint32_t txd_lower)
/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
* fill it in, just pad descriptor length by 4 bytes unless guest
- * told us to trip it off the packet. */
+ * told us to strip it off the packet. */
static inline int
fcs_len(E1000State *s)
{
@@ -690,9 +690,14 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
s->mac_reg[GPRC]++;
s->mac_reg[TPR]++;
- n = s->mac_reg[TORL];
- if ((s->mac_reg[TORL] += size) < n)
+ /* TOR - Total Octets Received:
+ * This register includes bytes received in a packet from the <Destination
+ * Address> field through the <CRC> field, inclusively.
+ */
+ n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
+ if (n < s->mac_reg[TORL])
s->mac_reg[TORH]++;
+ s->mac_reg[TORL] = n;
n = E1000_ICS_RXT0;
if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index fe6884d47d..784dc01b97 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -27,6 +27,7 @@
#include "elf.h"
#include "hw/virtio.h"
#include "hw/virtio-serial.h"
+#include "hw/virtio-net.h"
#include "hw/sysbus.h"
#include "kvm.h"
@@ -110,7 +111,7 @@ static int s390_virtio_net_init(VirtIOS390Device *dev)
{
VirtIODevice *vdev;
- vdev = virtio_net_init((DeviceState *)dev, &dev->nic);
+ vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
if (!vdev) {
return -1;
}
@@ -327,6 +328,11 @@ static VirtIOS390DeviceInfo s390_virtio_net = {
.qdev.size = sizeof(VirtIOS390Device),
.qdev.props = (Property[]) {
DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
+ DEFINE_PROP_UINT32("x-txtimer", VirtIOS390Device,
+ net.txtimer, TX_TIMER_INTERVAL),
+ DEFINE_PROP_INT32("x-txburst", VirtIOS390Device,
+ net.txburst, TX_BURST),
+ DEFINE_PROP_STRING("tx", VirtIOS390Device, net.tx),
DEFINE_PROP_END_OF_LIST(),
},
};
diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
index 333fea8963..41558c9c67 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390-virtio-bus.h
@@ -43,6 +43,7 @@ typedef struct VirtIOS390Device {
uint32_t host_features;
/* Max. number of ports we can have for a the virtio-serial device */
uint32_t max_virtserial_ports;
+ virtio_net_conf net;
} VirtIOS390Device;
typedef struct VirtIOS390Bus {
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index abf0370107..4dfd1a87b9 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -68,6 +68,7 @@ typedef struct {
uint32_t id;
NICConf nic;
uint32_t host_features;
+ virtio_net_conf net;
} SyborgVirtIOProxy;
static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset)
@@ -284,7 +285,7 @@ static int syborg_virtio_net_init(SysBusDevice *dev)
VirtIODevice *vdev;
SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev);
- vdev = virtio_net_init(&dev->qdev, &proxy->nic);
+ vdev = virtio_net_init(&dev->qdev, &proxy->nic, &proxy->net);
return syborg_virtio_init(proxy, vdev);
}
@@ -295,6 +296,11 @@ static SysBusDeviceInfo syborg_virtio_net_info = {
.qdev.props = (Property[]) {
DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic),
DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features),
+ DEFINE_PROP_UINT32("x-txtimer", SyborgVirtIOProxy,
+ net.txtimer, TX_TIMER_INTERVAL),
+ DEFINE_PROP_INT32("x-txburst", SyborgVirtIOProxy,
+ net.txburst, TX_BURST),
+ DEFINE_PROP_STRING("tx", SyborgVirtIOProxy, net.tx),
DEFINE_PROP_END_OF_LIST(),
}
};
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index 0c00de272a..4a7b8194f2 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -50,7 +50,9 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) {
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
}
- features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) {
+ features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
return features;
}
@@ -63,6 +65,9 @@ void vhost_net_ack_features(struct vhost_net *net, unsigned features)
if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
}
+ if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+ net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
}
static int vhost_net_get_fd(VLANClientState *backend)
@@ -97,6 +102,10 @@ struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd)
if (r < 0) {
goto fail;
}
+ if (!tap_has_vnet_hdr_len(backend,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+ net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
if (~net->dev.features & net->dev.backend_features) {
fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
(uint64_t)(~net->dev.features & net->dev.backend_features));
@@ -117,6 +126,10 @@ int vhost_net_start(struct vhost_net *net,
{
struct vhost_vring_file file = { };
int r;
+ if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+ tap_set_vnet_hdr_len(net->vc,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf));
+ }
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
@@ -144,6 +157,9 @@ fail:
}
net->vc->info->poll(net->vc, true);
vhost_dev_stop(&net->dev, dev);
+ if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+ tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+ }
return r;
}
@@ -158,11 +174,17 @@ void vhost_net_stop(struct vhost_net *net,
}
net->vc->info->poll(net->vc, true);
vhost_dev_stop(&net->dev, dev);
+ if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+ tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+ }
}
void vhost_net_cleanup(struct vhost_net *net)
{
vhost_dev_cleanup(&net->dev);
+ if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+ tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
+ }
qemu_free(net);
}
#else
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 075f72df2d..0a9cae22c4 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -36,7 +36,10 @@ typedef struct VirtIONet
VirtQueue *ctrl_vq;
NICState *nic;
QEMUTimer *tx_timer;
- int tx_timer_active;
+ QEMUBH *tx_bh;
+ uint32_t tx_timeout;
+ int32_t tx_burst;
+ int tx_waiting;
uint32_t has_vnet_hdr;
uint8_t has_ufo;
struct {
@@ -619,7 +622,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_
return size;
}
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
{
@@ -635,16 +638,18 @@ static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
}
/* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
{
VirtQueueElement elem;
+ int32_t num_packets = 0;
- if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
+ if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ return num_packets;
+ }
if (n->async_tx.elem.out_num) {
virtio_queue_set_notification(n->tx_vq, 0);
- return;
+ return num_packets;
}
while (virtqueue_pop(vq, &elem)) {
@@ -681,38 +686,55 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
virtio_queue_set_notification(n->tx_vq, 0);
n->async_tx.elem = elem;
n->async_tx.len = len;
- return;
+ return -EBUSY;
}
len += ret;
virtqueue_push(vq, &elem, len);
virtio_notify(&n->vdev, vq);
+
+ if (++num_packets >= n->tx_burst) {
+ break;
+ }
}
+ return num_packets;
}
-static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIONet *n = to_virtio_net(vdev);
- if (n->tx_timer_active) {
+ if (n->tx_waiting) {
virtio_queue_set_notification(vq, 1);
qemu_del_timer(n->tx_timer);
- n->tx_timer_active = 0;
+ n->tx_waiting = 0;
virtio_net_flush_tx(n, vq);
} else {
qemu_mod_timer(n->tx_timer,
- qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
- n->tx_timer_active = 1;
+ qemu_get_clock(vm_clock) + n->tx_timeout);
+ n->tx_waiting = 1;
virtio_queue_set_notification(vq, 0);
}
}
+static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+
+ if (unlikely(n->tx_waiting)) {
+ return;
+ }
+ virtio_queue_set_notification(vq, 0);
+ qemu_bh_schedule(n->tx_bh);
+ n->tx_waiting = 1;
+}
+
static void virtio_net_tx_timer(void *opaque)
{
VirtIONet *n = opaque;
- n->tx_timer_active = 0;
+ n->tx_waiting = 0;
/* Just in case the driver is not ready on more */
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
@@ -722,6 +744,41 @@ static void virtio_net_tx_timer(void *opaque)
virtio_net_flush_tx(n, n->tx_vq);
}
+static void virtio_net_tx_bh(void *opaque)
+{
+ VirtIONet *n = opaque;
+ int32_t ret;
+
+ n->tx_waiting = 0;
+
+ /* Just in case the driver is not ready on more */
+ if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
+ return;
+
+ ret = virtio_net_flush_tx(n, n->tx_vq);
+ if (ret == -EBUSY) {
+ return; /* Notification re-enable handled by tx_complete */
+ }
+
+ /* If we flush a full burst of packets, assume there are
+ * more coming and immediately reschedule */
+ if (ret >= n->tx_burst) {
+ qemu_bh_schedule(n->tx_bh);
+ n->tx_waiting = 1;
+ return;
+ }
+
+ /* If less than a full burst, re-enable notification and flush
+ * anything that may have come in while we weren't looking. If
+ * we find something, assume the guest is still active and reschedule */
+ virtio_queue_set_notification(n->tx_vq, 1);
+ if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
+ virtio_queue_set_notification(n->tx_vq, 0);
+ qemu_bh_schedule(n->tx_bh);
+ n->tx_waiting = 1;
+ }
+}
+
static void virtio_net_save(QEMUFile *f, void *opaque)
{
VirtIONet *n = opaque;
@@ -735,7 +792,7 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
virtio_save(&n->vdev, f);
qemu_put_buffer(f, n->mac, ETH_ALEN);
- qemu_put_be32(f, n->tx_timer_active);
+ qemu_put_be32(f, n->tx_waiting);
qemu_put_be32(f, n->mergeable_rx_bufs);
qemu_put_be16(f, n->status);
qemu_put_byte(f, n->promisc);
@@ -764,7 +821,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
virtio_load(&n->vdev, f);
qemu_get_buffer(f, n->mac, ETH_ALEN);
- n->tx_timer_active = qemu_get_be32(f);
+ n->tx_waiting = qemu_get_be32(f);
n->mergeable_rx_bufs = qemu_get_be32(f);
if (version_id >= 3)
@@ -840,9 +897,13 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
}
n->mac_table.first_multi = i;
- if (n->tx_timer_active) {
- qemu_mod_timer(n->tx_timer,
- qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
+ if (n->tx_waiting) {
+ if (n->tx_timer) {
+ qemu_mod_timer(n->tx_timer,
+ qemu_get_clock(vm_clock) + n->tx_timeout);
+ } else {
+ qemu_bh_schedule(n->tx_bh);
+ }
}
return 0;
}
@@ -903,7 +964,8 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason)
virtio_net_set_status(&n->vdev, n->vdev.status & status);
}
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+ virtio_net_conf *net)
{
VirtIONet *n;
@@ -919,7 +981,22 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
- n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
+
+ if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
+ fprintf(stderr, "virtio-net: "
+ "Unknown option tx=%s, valid options: \"timer\" \"bh\"\n",
+ net->tx);
+ fprintf(stderr, "Defaulting to \"bh\"\n");
+ }
+
+ if (net->tx && !strcmp(net->tx, "timer")) {
+ n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
+ n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
+ n->tx_timeout = net->txtimer;
+ } else {
+ n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
+ n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
+ }
n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
qemu_macaddr_default_if_unset(&conf->macaddr);
memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
@@ -929,8 +1006,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
- n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
- n->tx_timer_active = 0;
+ n->tx_waiting = 0;
+ n->tx_burst = net->txburst;
n->mergeable_rx_bufs = 0;
n->promisc = 1; /* for compatibility */
@@ -962,8 +1039,12 @@ void virtio_net_exit(VirtIODevice *vdev)
qemu_free(n->mac_table.macs);
qemu_free(n->vlans);
- qemu_del_timer(n->tx_timer);
- qemu_free_timer(n->tx_timer);
+ if (n->tx_timer) {
+ qemu_del_timer(n->tx_timer);
+ qemu_free_timer(n->tx_timer);
+ } else {
+ qemu_bh_delete(n->tx_bh);
+ }
virtio_cleanup(&n->vdev);
qemu_del_vlan_client(&n->nic->nc);
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 235f1a9fa8..8af9a1ce55 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -49,6 +49,20 @@
#define TX_TIMER_INTERVAL 150000 /* 150 us */
+/* Limit the number of packets that can be sent via a single flush
+ * of the TX queue. This gives us a guaranteed exit condition and
+ * ensures fairness in the io path. 256 conveniently matches the
+ * length of the TX queue and shows a good balance of performance
+ * and latency. */
+#define TX_BURST 256
+
+typedef struct virtio_net_conf
+{
+ uint32_t txtimer;
+ int32_t txburst;
+ char *tx;
+} virtio_net_conf;
+
/* Maximum packet size we can receive from tap device: header + 64k */
#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 6e8f88a141..86e6b0a561 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -107,6 +107,7 @@ typedef struct {
#endif
/* Max. number of ports we can have for a the virtio-serial device */
uint32_t max_virtserial_ports;
+ virtio_net_conf net;
} VirtIOPCIProxy;
/* virtio device */
@@ -613,7 +614,7 @@ static int virtio_net_init_pci(PCIDevice *pci_dev)
VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
VirtIODevice *vdev;
- vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic);
+ vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, &proxy->net);
vdev->nvectors = proxy->nvectors;
virtio_init_pci(proxy, vdev,
@@ -690,6 +691,11 @@ static PCIDeviceInfo virtio_info[] = {
DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
+ DEFINE_PROP_UINT32("x-txtimer", VirtIOPCIProxy,
+ net.txtimer, TX_TIMER_INTERVAL),
+ DEFINE_PROP_INT32("x-txburst", VirtIOPCIProxy,
+ net.txburst, TX_BURST),
+ DEFINE_PROP_STRING("tx", VirtIOPCIProxy, net.tx),
DEFINE_PROP_END_OF_LIST(),
},
.qdev.reset = virtio_pci_reset,
diff --git a/hw/virtio.h b/hw/virtio.h
index 5836ab61e7..a60d7355de 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -185,7 +185,9 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
/* Base devices. */
VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf);
+struct virtio_net_conf;
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+ struct virtio_net_conf *net);
VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports);
VirtIODevice *virtio_balloon_init(DeviceState *dev);
#ifdef CONFIG_LINUX
diff --git a/net/tap-aix.c b/net/tap-aix.c
index 4bc9f2f6d2..e19aaba110 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -46,6 +46,15 @@ int tap_probe_has_ufo(int fd)
return 0;
}
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+ return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
void tap_fd_set_offload(int fd, int csum, int tso4,
int tso6, int ecn, int ufo)
{
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 3773d5da5e..3513075337 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -116,6 +116,15 @@ int tap_probe_has_ufo(int fd)
return 0;
}
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+ return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
void tap_fd_set_offload(int fd, int csum, int tso4,
int tso6, int ecn, int ufo)
{
diff --git a/net/tap-linux.c b/net/tap-linux.c
index c92983cb53..f7aa9041b8 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -129,6 +129,35 @@ int tap_probe_has_ufo(int fd)
return 1;
}
+/* Verify that we can assign given length */
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+ int orig;
+ if (ioctl(fd, TUNGETVNETHDRSZ, &orig) == -1) {
+ return 0;
+ }
+ if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) {
+ return 0;
+ }
+ /* Restore original length: we can't handle failure. */
+ if (ioctl(fd, TUNSETVNETHDRSZ, &orig) == -1) {
+ fprintf(stderr, "TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n",
+ strerror(errno));
+ assert(0);
+ return -errno;
+ }
+ return 1;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+ if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) {
+ fprintf(stderr, "TUNSETVNETHDRSZ ioctl() failed: %s. Exiting.\n",
+ strerror(errno));
+ assert(0);
+ }
+}
+
void tap_fd_set_offload(int fd, int csum, int tso4,
int tso6, int ecn, int ufo)
{
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 9f943589bf..659e98122b 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -27,6 +27,8 @@
#define TUNSETOFFLOAD _IOW('T', 208, unsigned int)
#define TUNGETIFF _IOR('T', 210, unsigned int)
#define TUNSETSNDBUF _IOW('T', 212, int)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
#endif
@@ -52,4 +54,10 @@ struct virtio_net_hdr
uint16_t csum_offset;
};
+struct virtio_net_hdr_mrg_rxbuf
+{
+ struct virtio_net_hdr hdr;
+ uint16_t num_buffers; /* Number of merged rx buffers */
+};
+
#endif /* QEMU_TAP_H */
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 4ac61d2ad6..c216d28267 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -212,6 +212,15 @@ int tap_probe_has_ufo(int fd)
return 0;
}
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+ return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
void tap_fd_set_offload(int fd, int csum, int tso4,
int tso6, int ecn, int ufo)
{
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 74348daf5a..9fe4fcd5f4 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -728,6 +728,15 @@ int tap_has_vnet_hdr(VLANClientState *vc)
return 0;
}
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+ return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
{
}
diff --git a/net/tap.c b/net/tap.c
index 0147dabc15..4afb314fde 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -57,10 +57,10 @@ typedef struct TAPState {
uint8_t buf[TAP_BUFSIZE];
unsigned int read_poll : 1;
unsigned int write_poll : 1;
- unsigned int has_vnet_hdr : 1;
unsigned int using_vnet_hdr : 1;
unsigned int has_ufo: 1;
VHostNetState *vhost_net;
+ unsigned host_vnet_hdr_len;
} TAPState;
static int launch_script(const char *setup_script, const char *ifname, int fd);
@@ -121,11 +121,11 @@ static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
TAPState *s = DO_UPCAST(TAPState, nc, nc);
const struct iovec *iovp = iov;
struct iovec iov_copy[iovcnt + 1];
- struct virtio_net_hdr hdr = { 0, };
+ struct virtio_net_hdr_mrg_rxbuf hdr = { };
- if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+ if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
iov_copy[0].iov_base = &hdr;
- iov_copy[0].iov_len = sizeof(hdr);
+ iov_copy[0].iov_len = s->host_vnet_hdr_len;
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
iovp = iov_copy;
iovcnt++;
@@ -139,11 +139,11 @@ static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t s
TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[2];
int iovcnt = 0;
- struct virtio_net_hdr hdr = { 0, };
+ struct virtio_net_hdr_mrg_rxbuf hdr = { };
- if (s->has_vnet_hdr) {
+ if (s->host_vnet_hdr_len) {
iov[iovcnt].iov_base = &hdr;
- iov[iovcnt].iov_len = sizeof(hdr);
+ iov[iovcnt].iov_len = s->host_vnet_hdr_len;
iovcnt++;
}
@@ -159,7 +159,7 @@ static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[1];
- if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+ if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
return tap_receive_raw(nc, buf, size);
}
@@ -202,9 +202,9 @@ static void tap_send(void *opaque)
break;
}
- if (s->has_vnet_hdr && !s->using_vnet_hdr) {
- buf += sizeof(struct virtio_net_hdr);
- size -= sizeof(struct virtio_net_hdr);
+ if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
+ buf += s->host_vnet_hdr_len;
+ size -= s->host_vnet_hdr_len;
}
size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
@@ -229,7 +229,28 @@ int tap_has_vnet_hdr(VLANClientState *nc)
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- return s->has_vnet_hdr;
+ return !!s->host_vnet_hdr_len;
+}
+
+int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+
+ return tap_probe_vnet_hdr_len(s->fd, len);
+}
+
+void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
+{
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+ assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+ assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
+ len == sizeof(struct virtio_net_hdr));
+
+ tap_fd_set_vnet_hdr_len(s->fd, len);
+ s->host_vnet_hdr_len = len;
}
void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
@@ -239,7 +260,7 @@ void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
using_vnet_hdr = using_vnet_hdr != 0;
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- assert(s->has_vnet_hdr == using_vnet_hdr);
+ assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
s->using_vnet_hdr = using_vnet_hdr;
}
@@ -310,7 +331,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan,
s = DO_UPCAST(TAPState, nc, nc);
s->fd = fd;
- s->has_vnet_hdr = vnet_hdr != 0;
+ s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
s->using_vnet_hdr = 0;
s->has_ufo = tap_probe_has_ufo(s->fd);
tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
diff --git a/net/tap.h b/net/tap.h
index b8cec8320c..e44bd2bc5f 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -40,13 +40,17 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
int tap_has_ufo(VLANClientState *vc);
int tap_has_vnet_hdr(VLANClientState *vc);
+int tap_has_vnet_hdr_len(VLANClientState *vc, int len);
void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr);
void tap_set_offload(VLANClientState *vc, int csum, int tso4, int tso6, int ecn, int ufo);
+void tap_set_vnet_hdr_len(VLANClientState *vc, int len);
int tap_set_sndbuf(int fd, QemuOpts *opts);
int tap_probe_vnet_hdr(int fd);
+int tap_probe_vnet_hdr_len(int fd, int len);
int tap_probe_has_ufo(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
+void tap_fd_set_vnet_hdr_len(int fd, int len);
int tap_get_fd(VLANClientState *vc);