diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2014-06-29 12:45:54 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2014-06-29 12:45:54 +0100 |
commit | 76fbbec9315f8712d921411fbb4dad1fbcf1b151 (patch) | |
tree | 30b76243b8a5c98b64840c40e65597f94c08bfa0 | |
parent | 2d80e0ab4b4326e340df7e0bcc687b2bc63c68d8 (diff) | |
parent | 5e80dd223ded254b1802bdd6417ef118456eadce (diff) |
Merge remote-tracking branch 'remotes/stefanha/tags/net-pull-request' into staging
Net patches
# gpg: Signature made Fri 27 Jun 2014 14:10:57 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>"
* remotes/stefanha/tags/net-pull-request:
hw/net/eepro100: Implement read-only bits in MDI registers
net: move queue number into NICPeers
net: L2TPv3 transport
qemu-bridge-helper: Fix fd leak in main()
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hw/core/qdev-properties-system.c | 3 | ||||
-rw-r--r-- | hw/net/eepro100.c | 4 | ||||
-rw-r--r-- | hw/net/virtio-net.c | 2 | ||||
-rw-r--r-- | include/net/net.h | 2 | ||||
-rw-r--r-- | net/Makefile.objs | 1 | ||||
-rw-r--r-- | net/clients.h | 2 | ||||
-rw-r--r-- | net/l2tpv3.c | 757 | ||||
-rw-r--r-- | net/net.c | 10 | ||||
-rw-r--r-- | qapi-schema.json | 60 | ||||
-rw-r--r-- | qemu-bridge-helper.c | 9 | ||||
-rw-r--r-- | qemu-options.hx | 82 |
11 files changed, 922 insertions, 10 deletions
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index de433b2e38..52c2f8afa5 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = { static int parse_netdev(DeviceState *dev, const char *str, void **ptr) { NICPeers *peers_ptr = (NICPeers *)ptr; - NICConf *conf = container_of(peers_ptr, NICConf, peers); NetClientState **ncs = peers_ptr->ncs; NetClientState *peers[MAX_QUEUE_NUM]; int queues, i = 0; @@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr) ncs[i]->queue_index = i; } - conf->queues = queues; + peers_ptr->queues = queues; return 0; diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index aaa3ff2360..3263e3fe90 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1217,7 +1217,6 @@ static void eepro100_write_mdi(EEPRO100State *s) break; case 1: /* Status Register */ missing("not writable"); - data = s->mdimem[reg]; break; case 2: /* PHY Identification Register (Word 1) */ case 3: /* PHY Identification Register (Word 2) */ @@ -1230,7 +1229,8 @@ static void eepro100_write_mdi(EEPRO100State *s) default: missing("not implemented"); } - s->mdimem[reg] = data; + s->mdimem[reg] &= eepro100_mdi_mask[reg]; + s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg]; } else if (opcode == 2) { /* MDI read */ switch (reg) { diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 00b5e07ddd..e51d753cee 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1542,7 +1542,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); - n->max_queues = MAX(n->nic_conf.queues, 1); + n->max_queues = MAX(n->nic_conf.peers.queues, 1); n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues); n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx); n->curr_queues = 1; diff --git a/include/net/net.h b/include/net/net.h index 8b189da5ee..ed594f9bdb 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -24,13 +24,13 @@ struct MACAddr { typedef struct NICPeers { NetClientState *ncs[MAX_QUEUE_NUM]; + int32_t queues; } NICPeers; typedef struct NICConf { MACAddr macaddr; NICPeers peers; int32_t bootindex; - int32_t queues; } NICConf; #define DEFINE_NIC_PROPERTIES(_state, _conf) \ diff --git a/net/Makefile.objs b/net/Makefile.objs index 301f6b6b51..a06ba59dad 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o common-obj-y += socket.o common-obj-y += dump.o common-obj-y += eth.o +common-obj-$(CONFIG_LINUX) += l2tpv3.o common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o common-obj-$(CONFIG_LINUX) += tap-linux.o common-obj-$(CONFIG_WIN32) += tap-win32.o diff --git a/net/clients.h b/net/clients.h index 7f3d4ae9f3..2e8fedad8d 100644 --- a/net/clients.h +++ b/net/clients.h @@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, int net_init_bridge(const NetClientOptions *opts, const char *name, NetClientState *peer); +int net_init_l2tpv3(const NetClientOptions *opts, const char *name, + NetClientState *peer); #ifdef CONFIG_VDE int net_init_vde(const NetClientOptions *opts, const char *name, NetClientState *peer); diff --git a/net/l2tpv3.c b/net/l2tpv3.c new file mode 100644 index 0000000000..528d95b641 --- /dev/null +++ b/net/l2tpv3.c @@ -0,0 +1,757 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2012-2014 Cisco Systems + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <linux/ip.h> +#include <netdb.h> +#include "config-host.h" +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + + +/* The buffer size needs to be investigated for optimum numbers and + * optimum means of paging in on different systems. This size is + * chosen to be sufficient to accommodate one packet with some headers + */ + +#define BUFFER_ALIGN sysconf(_SC_PAGESIZE) +#define BUFFER_SIZE 2048 +#define IOVSIZE 2 +#define MAX_L2TPV3_MSGCNT 64 +#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) + +/* Header set to 0x30000 signifies a data packet */ + +#define L2TPV3_DATA_PACKET 0x30000 + +/* IANA-assigned IP protocol ID for L2TPv3 */ + +#ifndef IPPROTO_L2TP +#define IPPROTO_L2TP 0x73 +#endif + +typedef struct NetL2TPV3State { + NetClientState nc; + int fd; + + /* + * these are used for xmit - that happens packet a time + * and for first sign of life packet (easier to parse that once) + */ + + uint8_t *header_buf; + struct iovec *vec; + + /* + * these are used for receive - try to "eat" up to 32 packets at a time + */ + + struct mmsghdr *msgvec; + + /* + * peer address + */ + + struct sockaddr_storage *dgram_dst; + uint32_t dst_size; + + /* + * L2TPv3 parameters + */ + + uint64_t rx_cookie; + uint64_t tx_cookie; + uint32_t rx_session; + uint32_t tx_session; + uint32_t header_size; + uint32_t counter; + + /* + * DOS avoidance in error handling + */ + + bool header_mismatch; + + /* + * Ring buffer handling + */ + + int queue_head; + int queue_tail; + int queue_depth; + + /* + * Precomputed offsets + */ + + uint32_t offset; + uint32_t cookie_offset; + uint32_t counter_offset; + uint32_t session_offset; + + /* Poll Control */ + + bool read_poll; + bool write_poll; + + /* Flags */ + + bool ipv6; + bool udp; + bool has_counter; + bool pin_counter; + bool cookie; + bool cookie_is_64; + +} NetL2TPV3State; + +static int l2tpv3_can_send(void *opaque); +static void net_l2tpv3_send(void *opaque); +static void l2tpv3_writable(void *opaque); + +static void l2tpv3_update_fd_handler(NetL2TPV3State *s) +{ + qemu_set_fd_handler2(s->fd, + s->read_poll ? l2tpv3_can_send : NULL, + s->read_poll ? net_l2tpv3_send : NULL, + s->write_poll ? l2tpv3_writable : NULL, + s); +} + +static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable) +{ + if (s->read_poll != enable) { + s->read_poll = enable; + l2tpv3_update_fd_handler(s); + } +} + +static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable) +{ + if (s->write_poll != enable) { + s->write_poll = enable; + l2tpv3_update_fd_handler(s); + } +} + +static void l2tpv3_writable(void *opaque) +{ + NetL2TPV3State *s = opaque; + l2tpv3_write_poll(s, false); + qemu_flush_queued_packets(&s->nc); +} + +static int l2tpv3_can_send(void *opaque) +{ + NetL2TPV3State *s = opaque; + + return qemu_can_send_packet(&s->nc); +} + +static void l2tpv3_send_completed(NetClientState *nc, ssize_t len) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + l2tpv3_read_poll(s, true); +} + +static void l2tpv3_poll(NetClientState *nc, bool enable) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + l2tpv3_write_poll(s, enable); + l2tpv3_read_poll(s, enable); +} + +static void l2tpv3_form_header(NetL2TPV3State *s) +{ + uint32_t *counter; + + if (s->udp) { + stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET); + } + stl_be_p( + (uint32_t *) (s->header_buf + s->session_offset), + s->tx_session + ); + if (s->cookie) { + if (s->cookie_is_64) { + stq_be_p( + (uint64_t *)(s->header_buf + s->cookie_offset), + s->tx_cookie + ); + } else { + stl_be_p( + (uint32_t *) (s->header_buf + s->cookie_offset), + s->tx_cookie + ); + } + } + if (s->has_counter) { + counter = (uint32_t *)(s->header_buf + s->counter_offset); + if (s->pin_counter) { + *counter = 0; + } else { + stl_be_p(counter, ++s->counter); + } + } +} + +static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, + const struct iovec *iov, + int iovcnt) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct msghdr message; + int ret; + + if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { + error_report( + "iovec too long %d > %d, change l2tpv3.h", + iovcnt, MAX_L2TPV3_IOVCNT + ); + return -1; + } + l2tpv3_form_header(s); + memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); + s->vec->iov_base = s->header_buf; + s->vec->iov_len = s->offset; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = s->vec; + message.msg_iovlen = iovcnt + 1; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + do { + ret = sendmsg(s->fd, &message, 0); + } while ((ret == -1) && (errno == EINTR)); + if (ret > 0) { + ret -= s->offset; + } else if (ret == 0) { + /* belt and braces - should not occur on DGRAM + * we should get an error and never a 0 send + */ + ret = iov_size(iov, iovcnt); + } else { + /* signal upper layer that socket buffer is full */ + ret = -errno; + if (ret == -EAGAIN || ret == -ENOBUFS) { + l2tpv3_write_poll(s, true); + ret = 0; + } + } + return ret; +} + +static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, + const uint8_t *buf, + size_t size) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct iovec *vec; + struct msghdr message; + ssize_t ret = 0; + + l2tpv3_form_header(s); + vec = s->vec; + vec->iov_base = s->header_buf; + vec->iov_len = s->offset; + vec++; + vec->iov_base = (void *) buf; + vec->iov_len = size; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = s->vec; + message.msg_iovlen = 2; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + do { + ret = sendmsg(s->fd, &message, 0); + } while ((ret == -1) && (errno == EINTR)); + if (ret > 0) { + ret -= s->offset; + } else if (ret == 0) { + /* belt and braces - should not occur on DGRAM + * we should get an error and never a 0 send + */ + ret = size; + } else { + ret = -errno; + if (ret == -EAGAIN || ret == -ENOBUFS) { + /* signal upper layer that socket buffer is full */ + l2tpv3_write_poll(s, true); + ret = 0; + } + } + return ret; +} + +static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) +{ + + uint32_t *session; + uint64_t cookie; + + if ((!s->udp) && (!s->ipv6)) { + buf += sizeof(struct iphdr) /* fix for ipv4 raw */; + } + + /* we do not do a strict check for "data" packets as per + * the RFC spec because the pure IP spec does not have + * that anyway. + */ + + if (s->cookie) { + if (s->cookie_is_64) { + cookie = ldq_be_p(buf + s->cookie_offset); + } else { + cookie = ldl_be_p(buf + s->cookie_offset); + } + if (cookie != s->rx_cookie) { + if (!s->header_mismatch) { + error_report("unknown cookie id"); + } + return -1; + } + } + session = (uint32_t *) (buf + s->session_offset); + if (ldl_be_p(session) != s->rx_session) { + if (!s->header_mismatch) { + error_report("session mismatch"); + } + return -1; + } + return 0; +} + +static void net_l2tpv3_process_queue(NetL2TPV3State *s) +{ + int size = 0; + struct iovec *vec; + bool bad_read; + int data_size; + struct mmsghdr *msgvec; + + /* go into ring mode only if there is a "pending" tail */ + if (s->queue_depth > 0) { + do { + msgvec = s->msgvec + s->queue_tail; + if (msgvec->msg_len > 0) { + data_size = msgvec->msg_len - s->header_size; + vec = msgvec->msg_hdr.msg_iov; + if ((data_size > 0) && + (l2tpv3_verify_header(s, vec->iov_base) == 0)) { + vec++; + /* Use the legacy delivery for now, we will + * switch to using our own ring as a queueing mechanism + * at a later date + */ + size = qemu_send_packet_async( + &s->nc, + vec->iov_base, + data_size, + l2tpv3_send_completed + ); + if (size == 0) { + l2tpv3_read_poll(s, false); + } + bad_read = false; + } else { + bad_read = true; + if (!s->header_mismatch) { + /* report error only once */ + error_report("l2tpv3 header verification failed"); + s->header_mismatch = true; + } + } + } else { + bad_read = true; + } + s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT; + s->queue_depth--; + } while ( + (s->queue_depth > 0) && + qemu_can_send_packet(&s->nc) && + ((size > 0) || bad_read) + ); + } +} + +static void net_l2tpv3_send(void *opaque) +{ + NetL2TPV3State *s = opaque; + int target_count, count; + struct mmsghdr *msgvec; + + /* go into ring mode only if there is a "pending" tail */ + + if (s->queue_depth) { + + /* The ring buffer we use has variable intake + * count of how much we can read varies - adjust accordingly + */ + + target_count = MAX_L2TPV3_MSGCNT - s->queue_depth; + + /* Ensure we do not overrun the ring when we have + * a lot of enqueued packets + */ + + if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) { + target_count = MAX_L2TPV3_MSGCNT - s->queue_head; + } + } else { + + /* we do not have any pending packets - we can use + * the whole message vector linearly instead of using + * it as a ring + */ + + s->queue_head = 0; + s->queue_tail = 0; + target_count = MAX_L2TPV3_MSGCNT; + } + + msgvec = s->msgvec + s->queue_head; + if (target_count > 0) { + do { + count = recvmmsg( + s->fd, + msgvec, + target_count, MSG_DONTWAIT, NULL); + } while ((count == -1) && (errno == EINTR)); + if (count < 0) { + /* Recv error - we still need to flush packets here, + * (re)set queue head to current position + */ + count = 0; + } + s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT; + s->queue_depth += count; + } + net_l2tpv3_process_queue(s); +} + +static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount) +{ + int i, j; + struct iovec *iov; + struct mmsghdr *cleanup = msgvec; + if (cleanup) { + for (i = 0; i < count; i++) { + if (cleanup->msg_hdr.msg_iov) { + iov = cleanup->msg_hdr.msg_iov; + for (j = 0; j < iovcount; j++) { + g_free(iov->iov_base); + iov++; + } + g_free(cleanup->msg_hdr.msg_iov); + } + cleanup++; + } + g_free(msgvec); + } +} + +static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count) +{ + int i; + struct iovec *iov; + struct mmsghdr *msgvec, *result; + + msgvec = g_malloc(sizeof(struct mmsghdr) * count); + result = msgvec; + for (i = 0; i < count ; i++) { + msgvec->msg_hdr.msg_name = NULL; + msgvec->msg_hdr.msg_namelen = 0; + iov = g_malloc(sizeof(struct iovec) * IOVSIZE); + msgvec->msg_hdr.msg_iov = iov; + iov->iov_base = g_malloc(s->header_size); + iov->iov_len = s->header_size; + iov++ ; + iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE); + iov->iov_len = BUFFER_SIZE; + msgvec->msg_hdr.msg_iovlen = 2; + msgvec->msg_hdr.msg_control = NULL; + msgvec->msg_hdr.msg_controllen = 0; + msgvec->msg_hdr.msg_flags = 0; + msgvec++; + } + return result; +} + +static void net_l2tpv3_cleanup(NetClientState *nc) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + qemu_purge_queued_packets(nc); + l2tpv3_read_poll(s, false); + l2tpv3_write_poll(s, false); + if (s->fd > 0) { + close(s->fd); + } + destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); + g_free(s->vec); + g_free(s->header_buf); + g_free(s->dgram_dst); +} + +static NetClientInfo net_l2tpv3_info = { + .type = NET_CLIENT_OPTIONS_KIND_L2TPV3, + .size = sizeof(NetL2TPV3State), + .receive = net_l2tpv3_receive_dgram, + .receive_iov = net_l2tpv3_receive_dgram_iov, + .poll = l2tpv3_poll, + .cleanup = net_l2tpv3_cleanup, +}; + +int net_init_l2tpv3(const NetClientOptions *opts, + const char *name, + NetClientState *peer) +{ + + + const NetdevL2TPv3Options *l2tpv3; + NetL2TPV3State *s; + NetClientState *nc; + int fd = -1, gairet; + struct addrinfo hints; + struct addrinfo *result = NULL; + char *srcport, *dstport; + + nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); + + s = DO_UPCAST(NetL2TPV3State, nc, nc); + + s->queue_head = 0; + s->queue_tail = 0; + s->header_mismatch = false; + + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->l2tpv3; + + if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { + s->ipv6 = l2tpv3->ipv6; + } else { + s->ipv6 = false; + } + + if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) { + error_report("l2tpv3_open : offset must be less than 256 bytes"); + goto outerr; + } + + if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) { + if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) { + s->cookie = true; + } else { + goto outerr; + } + } else { + s->cookie = false; + } + + if (l2tpv3->has_cookie64 || l2tpv3->cookie64) { + s->cookie_is_64 = true; + } else { + s->cookie_is_64 = false; + } + + if (l2tpv3->has_udp && l2tpv3->udp) { + s->udp = true; + if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) { + error_report("l2tpv3_open : need both src and dst port for udp"); + goto outerr; + } else { + srcport = l2tpv3->srcport; + dstport = l2tpv3->dstport; + } + } else { + s->udp = false; + srcport = NULL; + dstport = NULL; + } + + + s->offset = 4; + s->session_offset = 0; + s->cookie_offset = 4; + s->counter_offset = 4; + + s->tx_session = l2tpv3->txsession; + if (l2tpv3->has_rxsession) { + s->rx_session = l2tpv3->rxsession; + } else { + s->rx_session = s->tx_session; + } + + if (s->cookie) { + s->rx_cookie = l2tpv3->rxcookie; + s->tx_cookie = l2tpv3->txcookie; + if (s->cookie_is_64 == true) { + /* 64 bit cookie */ + s->offset += 8; + s->counter_offset += 8; + } else { + /* 32 bit cookie */ + s->offset += 4; + s->counter_offset += 4; + } + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + s->offset += 4; + s->counter_offset += 4; + s->session_offset += 4; + s->cookie_offset += 4; + } else { + hints.ai_socktype = SOCK_RAW; + hints.ai_protocol = IPPROTO_L2TP; + } + + gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result); + + if ((gairet != 0) || (result == NULL)) { + error_report( + "l2tpv3_open : could not resolve src, errno = %s", + gai_strerror(gairet) + ); + goto outerr; + } + fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol); + if (fd == -1) { + fd = -errno; + error_report("l2tpv3_open : socket creation failed, errno = %d", -fd); + freeaddrinfo(result); + goto outerr; + } + if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { + error_report("l2tpv3_open : could not bind socket err=%i", errno); + goto outerr; + } + if (result) { + freeaddrinfo(result); + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + } else { + hints.ai_socktype = SOCK_RAW; + hints.ai_protocol = IPPROTO_L2TP; + } + + result = NULL; + gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result); + if ((gairet != 0) || (result == NULL)) { + error_report( + "l2tpv3_open : could not resolve dst, error = %s", + gai_strerror(gairet) + ); + goto outerr; + } + + s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage)); + memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage)); + memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); + s->dst_size = result->ai_addrlen; + + if (result) { + freeaddrinfo(result); + } + + if (l2tpv3->has_counter && l2tpv3->counter) { + s->has_counter = true; + s->offset += 4; + } else { + s->has_counter = false; + } + + if (l2tpv3->has_pincounter && l2tpv3->pincounter) { + s->has_counter = true; /* pin counter implies that there is counter */ + s->pin_counter = true; + } else { + s->pin_counter = false; + } + + if (l2tpv3->has_offset) { + /* extra offset */ + s->offset += l2tpv3->offset; + } + + if ((s->ipv6) || (s->udp)) { + s->header_size = s->offset; + } else { + s->header_size = s->offset + sizeof(struct iphdr); + } + + s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); + s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT); + s->header_buf = g_malloc(s->header_size); + + qemu_set_nonblock(fd); + + s->fd = fd; + s->counter = 0; + + l2tpv3_read_poll(s, true); + + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "l2tpv3: connected"); + return 0; +outerr: + qemu_del_net_client(nc); + if (fd > 0) { + close(fd); + } + if (result) { + freeaddrinfo(result); + } + return -1; +} + @@ -250,7 +250,7 @@ NICState *qemu_new_nic(NetClientInfo *info, { NetClientState **peers = conf->peers.ncs; NICState *nic; - int i, queues = MAX(1, conf->queues); + int i, queues = MAX(1, conf->peers.queues); assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC); assert(info->size >= sizeof(NICState)); @@ -363,7 +363,7 @@ void qemu_del_net_client(NetClientState *nc) void qemu_del_nic(NICState *nic) { - int i, queues = MAX(nic->conf->queues, 1); + int i, queues = MAX(nic->conf->peers.queues, 1); /* If this is a peer NIC and peer has already been deleted, free it now. */ if (nic->peer_deleted) { @@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( #ifdef CONFIG_VHOST_NET_USED [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user, #endif +#ifdef CONFIG_LINUX + [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3, +#endif }; @@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) #ifdef CONFIG_VHOST_NET_USED case NET_CLIENT_OPTIONS_KIND_VHOST_USER: #endif +#ifdef CONFIG_LINUX + case NET_CLIENT_OPTIONS_KIND_L2TPV3: +#endif break; default: diff --git a/qapi-schema.json b/qapi-schema.json index e7727a1153..0000372deb 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2040,6 +2040,62 @@ '*udp': 'str' } } ## +# @NetdevL2TPv3Options +# +# Connect the VLAN to Ethernet over L2TPv3 Static tunnel +# +# @src: source address +# +# @dst: destination address +# +# @srcport: #optional source port - mandatory for udp, optional for ip +# +# @dstport: #optional destination port - mandatory for udp, optional for ip +# +# @ipv6: #optional - force the use of ipv6 +# +# @udp: #optional - use the udp version of l2tpv3 encapsulation +# +# @cookie64: #optional - use 64 bit coookies +# +# @counter: #optional have sequence counter +# +# @pincounter: #optional pin sequence counter to zero - +# workaround for buggy implementations or +# networks with packet reorder +# +# @txcookie: #optional 32 or 64 bit transmit cookie +# +# @rxcookie: #optional 32 or 64 bit receive cookie +# +# @txsession: 32 bit transmit session +# +# @rxsession: #optional 32 bit receive session - if not specified +# set to the same value as transmit +# +# @offset: #optional additional offset - allows the insertion of +# additional application-specific data before the packet payload +# +# Since 2.1 +## +{ 'type': 'NetdevL2TPv3Options', + 'data': { + 'src': 'str', + 'dst': 'str', + '*srcport': 'str', + '*dstport': 'str', + '*ipv6': 'bool', + '*udp': 'bool', + '*cookie64': 'bool', + '*counter': 'bool', + '*pincounter': 'bool', + '*txcookie': 'uint64', + '*rxcookie': 'uint64', + 'txsession': 'uint32', + '*rxsession': 'uint32', + '*offset': 'uint32' } } + +## # @NetdevVdeOptions # # Connect the VLAN to a vde switch running on the host. @@ -2150,6 +2206,9 @@ # A discriminated record of network device traits. # # Since 1.2 +# +# 'l2tpv3' - since 2.1 +# ## { 'union': 'NetClientOptions', 'data': { @@ -2157,6 +2216,7 @@ 'nic': 'NetLegacyNicOptions', 'user': 'NetdevUserOptions', 'tap': 'NetdevTapOptions', + 'l2tpv3': 'NetdevL2TPv3Options', 'socket': 'NetdevSocketOptions', 'vde': 'NetdevVdeOptions', 'dump': 'NetdevDumpOptions', diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c index 6a0974eb48..36eb3bcfd6 100644 --- a/qemu-bridge-helper.c +++ b/qemu-bridge-helper.c @@ -229,7 +229,7 @@ int main(int argc, char **argv) unsigned long ifargs[4]; #endif int ifindex; - int fd, ctlfd, unixfd = -1; + int fd = -1, ctlfd = -1, unixfd = -1; int use_vnet = 0; int mtu; const char *bridge = NULL; @@ -436,7 +436,12 @@ int main(int argc, char **argv) /* profit! */ cleanup: - + if (fd >= 0) { + close(fd); + } + if (ctlfd >= 0) { + close(ctlfd); + } while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&acl_list, entry); g_free(acl_rule); diff --git a/qemu-options.hx b/qemu-options.hx index ff76ad4830..9e5468678b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, " (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n" " (default=" DEFAULT_BRIDGE_HELPER ")\n" #endif +#ifdef __linux__ + "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n" + " connect the VLAN to an Ethernet over L2TPv3 pseudowire\n" + " Linux kernel 3.3+ as well as most routers can talk\n" + " L2TPv3. This transport allows to connect a VM to a VM,\n" + " VM to a router and even VM to Host. It is a nearly-universal\n" + " standard (RFC3391). Note - this implementation uses static\n" + " pre-configured tunnels (same as the Linux kernel).\n" + " use 'src=' to specify source address\n" + " use 'dst=' to specify destination address\n" + " use 'udp=on' to specify udp encapsulation\n" + " use 'dstport=' to specify destination udp port\n" + " use 'dstport=' to specify destination udp port\n" + " use 'ipv6=on' to force v6\n" + " L2TPv3 uses cookies to prevent misconfiguration as\n" + " well as a weak security measure\n" + " use 'rxcookie=0x012345678' to specify a rxcookie\n" + " use 'txcookie=0x012345678' to specify a txcookie\n" + " use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n" + " use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n" + " use 'pincounter=on' to work around broken counter handling in peer\n" + " use 'offset=X' to add an extra offset between header and data\n" +#endif "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n" " connect the vlan 'n' to another VLAN using a socket connection\n" "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n" @@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \ -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 @end example +@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] +@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] +Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular +protocol to transport Ethernet (and other Layer 2) data frames between +two systems. It is present in routers, firewalls and the Linux kernel +(from version 3.3 onwards). + +This transport allows a VM to communicate to another VM, router or firewall directly. + +@item src=@var{srcaddr} + source address (mandatory) +@item dst=@var{dstaddr} + destination address (mandatory) +@item udp + select udp encapsulation (default is ip). +@item srcport=@var{srcport} + source udp port. +@item dstport=@var{dstport} + destination udp port. +@item ipv6 + force v6, otherwise defaults to v4. +@item rxcookie=@var{rxcookie} +@item txcookie=@var{txcookie} + Cookies are a weak form of security in the l2tpv3 specification. +Their function is mostly to prevent misconfiguration. By default they are 32 +bit. +@item cookie64 + Set cookie size to 64 bit instead of the default 32 +@item counter=off + Force a 'cut-down' L2TPv3 with no counter as in +draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00 +@item pincounter=on + Work around broken counter handling in peer. This may also help on +networks which have packet reorder. +@item offset=@var{offset} + Add an extra offset between header and data + +For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan +on the remote Linux host 1.2.3.4: +@example +# Setup tunnel on linux host using raw ip as encapsulation +# on 1.2.3.4 +ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \ + encap udp udp_sport 16384 udp_dport 16384 +ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \ + 0xFFFFFFFF peer_session_id 0xFFFFFFFF +ifconfig vmtunnel0 mtu 1500 +ifconfig vmtunnel0 up +brctl addif br-lan vmtunnel0 + + +# on 4.3.2.1 +# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter + +qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter + + +@end example + @item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and |