diff options
Diffstat (limited to 'slirp/src')
57 files changed, 15468 insertions, 0 deletions
diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c new file mode 100644 index 0000000000..58eafdcfd8 --- /dev/null +++ b/slirp/src/arp_table.c @@ -0,0 +1,91 @@ +/* + * ARP table + * + * Copyright (c) 2011 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "slirp.h" + +#include <string.h> + +void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) +{ + const uint32_t broadcast_addr = + ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; + ArpTable *arptbl = &slirp->arp_table; + int i; + + DEBUG_CALL("arp_table_add"); + DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){.s_addr = ip_addr})); + DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", + ethaddr[0], ethaddr[1], ethaddr[2], + ethaddr[3], ethaddr[4], ethaddr[5]); + + if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { + /* Do not register broadcast addresses */ + return; + } + + /* Search for an entry */ + for (i = 0; i < ARP_TABLE_SIZE; i++) { + if (arptbl->table[i].ar_sip == ip_addr) { + /* Update the entry */ + memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); + return; + } + } + + /* No entry found, create a new one */ + arptbl->table[arptbl->next_victim].ar_sip = ip_addr; + memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); + arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; +} + +bool arp_table_search(Slirp *slirp, uint32_t ip_addr, + uint8_t out_ethaddr[ETH_ALEN]) +{ + const uint32_t broadcast_addr = + ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; + ArpTable *arptbl = &slirp->arp_table; + int i; + + DEBUG_CALL("arp_table_search"); + DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){.s_addr = ip_addr})); + + /* If broadcast address */ + if (ip_addr == 0xffffffff || ip_addr == broadcast_addr) { + /* return Ethernet broadcast address */ + memset(out_ethaddr, 0xff, ETH_ALEN); + return 1; + } + + for (i = 0; i < ARP_TABLE_SIZE; i++) { + if (arptbl->table[i].ar_sip == ip_addr) { + memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); + DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", + out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], + out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); + return 1; + } + } + + return 0; +} diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c new file mode 100644 index 0000000000..d396849a05 --- /dev/null +++ b/slirp/src/bootp.c @@ -0,0 +1,370 @@ +/* + * QEMU BOOTP/DHCP server + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "slirp.h" + +#if defined(_WIN32) +/* Windows ntohl() returns an u_long value. + * Add a type cast to match the format strings. */ +# define ntohl(n) ((uint32_t)ntohl(n)) +#endif + +/* XXX: only DHCP is supported */ + +#define LEASE_TIME (24 * 3600) + +static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; + +#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) + +static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, + const uint8_t *macaddr) +{ + BOOTPClient *bc; + int i; + + for(i = 0; i < NB_BOOTP_CLIENTS; i++) { + bc = &slirp->bootp_clients[i]; + if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) + goto found; + } + return NULL; + found: + bc = &slirp->bootp_clients[i]; + bc->allocated = 1; + paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); + return bc; +} + +static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, + const uint8_t *macaddr) +{ + uint32_t req_addr = ntohl(paddr->s_addr); + uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); + BOOTPClient *bc; + + if (req_addr >= dhcp_addr && + req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { + bc = &slirp->bootp_clients[req_addr - dhcp_addr]; + if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { + bc->allocated = 1; + return bc; + } + } + return NULL; +} + +static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, + const uint8_t *macaddr) +{ + BOOTPClient *bc; + int i; + + for(i = 0; i < NB_BOOTP_CLIENTS; i++) { + if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) + goto found; + } + return NULL; + found: + bc = &slirp->bootp_clients[i]; + bc->allocated = 1; + paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); + return bc; +} + +static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, + struct in_addr *preq_addr) +{ + const uint8_t *p, *p_end; + int len, tag; + + *pmsg_type = 0; + preq_addr->s_addr = htonl(0L); + + p = bp->bp_vend; + p_end = p + DHCP_OPT_LEN; + if (memcmp(p, rfc1533_cookie, 4) != 0) + return; + p += 4; + while (p < p_end) { + tag = p[0]; + if (tag == RFC1533_PAD) { + p++; + } else if (tag == RFC1533_END) { + break; + } else { + p++; + if (p >= p_end) + break; + len = *p++; + if (p + len > p_end) { + break; + } + DPRINTF("dhcp: tag=%d len=%d\n", tag, len); + + switch(tag) { + case RFC2132_MSG_TYPE: + if (len >= 1) + *pmsg_type = p[0]; + break; + case RFC2132_REQ_ADDR: + if (len >= 4) { + memcpy(&(preq_addr->s_addr), p, 4); + } + break; + default: + break; + } + p += len; + } + } + if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && + bp->bp_ciaddr.s_addr) { + memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); + } +} + +static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) +{ + BOOTPClient *bc = NULL; + struct mbuf *m; + struct bootp_t *rbp; + struct sockaddr_in saddr, daddr; + struct in_addr preq_addr; + int dhcp_msg_type, val; + uint8_t *q; + uint8_t *end; + uint8_t client_ethaddr[ETH_ALEN]; + + /* extract exact DHCP msg type */ + dhcp_decode(bp, &dhcp_msg_type, &preq_addr); + DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); + if (preq_addr.s_addr != htonl(0L)) + DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); + else { + DPRINTF("\n"); + } + + if (dhcp_msg_type == 0) + dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ + + if (dhcp_msg_type != DHCPDISCOVER && + dhcp_msg_type != DHCPREQUEST) + return; + + /* Get client's hardware address from bootp request */ + memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); + + m = m_get(slirp); + if (!m) { + return; + } + m->m_data += IF_MAXLINKHDR; + rbp = (struct bootp_t *)m->m_data; + m->m_data += sizeof(struct udpiphdr); + memset(rbp, 0, sizeof(struct bootp_t)); + + if (dhcp_msg_type == DHCPDISCOVER) { + if (preq_addr.s_addr != htonl(0L)) { + bc = request_addr(slirp, &preq_addr, client_ethaddr); + if (bc) { + daddr.sin_addr = preq_addr; + } + } + if (!bc) { + new_addr: + bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); + if (!bc) { + DPRINTF("no address left\n"); + return; + } + } + memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); + } else if (preq_addr.s_addr != htonl(0L)) { + bc = request_addr(slirp, &preq_addr, client_ethaddr); + if (bc) { + daddr.sin_addr = preq_addr; + memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); + } else { + /* DHCPNAKs should be sent to broadcast */ + daddr.sin_addr.s_addr = 0xffffffff; + } + } else { + bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); + if (!bc) { + /* if never assigned, behaves as if it was already + assigned (windows fix because it remembers its address) */ + goto new_addr; + } + } + + /* Update ARP table for this IP address */ + arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); + + saddr.sin_addr = slirp->vhost_addr; + saddr.sin_port = htons(BOOTP_SERVER); + + daddr.sin_port = htons(BOOTP_CLIENT); + + rbp->bp_op = BOOTP_REPLY; + rbp->bp_xid = bp->bp_xid; + rbp->bp_htype = 1; + rbp->bp_hlen = 6; + memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); + + rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ + rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ + + q = rbp->bp_vend; + end = (uint8_t *)&rbp[1]; + memcpy(q, rfc1533_cookie, 4); + q += 4; + + if (bc) { + DPRINTF("%s addr=%08" PRIx32 "\n", + (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", + ntohl(daddr.sin_addr.s_addr)); + + if (dhcp_msg_type == DHCPDISCOVER) { + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPOFFER; + } else /* DHCPREQUEST */ { + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPACK; + } + + if (slirp->bootp_filename) + snprintf((char *)rbp->bp_file, sizeof(rbp->bp_file), "%s", + slirp->bootp_filename); + + *q++ = RFC2132_SRV_ID; + *q++ = 4; + memcpy(q, &saddr.sin_addr, 4); + q += 4; + + *q++ = RFC1533_NETMASK; + *q++ = 4; + memcpy(q, &slirp->vnetwork_mask, 4); + q += 4; + + if (!slirp->restricted) { + *q++ = RFC1533_GATEWAY; + *q++ = 4; + memcpy(q, &saddr.sin_addr, 4); + q += 4; + + *q++ = RFC1533_DNS; + *q++ = 4; + memcpy(q, &slirp->vnameserver_addr, 4); + q += 4; + } + + *q++ = RFC2132_LEASE_TIME; + *q++ = 4; + val = htonl(LEASE_TIME); + memcpy(q, &val, 4); + q += 4; + + if (*slirp->client_hostname) { + val = strlen(slirp->client_hostname); + if (q + val + 2 >= end) { + g_warning("DHCP packet size exceeded, " + "omitting host name option."); + } else { + *q++ = RFC1533_HOSTNAME; + *q++ = val; + memcpy(q, slirp->client_hostname, val); + q += val; + } + } + + if (slirp->vdomainname) { + val = strlen(slirp->vdomainname); + if (q + val + 2 >= end) { + g_warning("DHCP packet size exceeded, " + "omitting domain name option."); + } else { + *q++ = RFC1533_DOMAINNAME; + *q++ = val; + memcpy(q, slirp->vdomainname, val); + q += val; + } + } + + if (slirp->tftp_server_name) { + val = strlen(slirp->tftp_server_name); + if (q + val + 2 >= end) { + g_warning("DHCP packet size exceeded, " + "omitting tftp-server-name option."); + } else { + *q++ = RFC2132_TFTP_SERVER_NAME; + *q++ = val; + memcpy(q, slirp->tftp_server_name, val); + q += val; + } + } + + if (slirp->vdnssearch) { + val = slirp->vdnssearch_len; + if (q + val >= end) { + g_warning("DHCP packet size exceeded, " + "omitting domain-search option."); + } else { + memcpy(q, slirp->vdnssearch, val); + q += val; + } + } + } else { + static const char nak_msg[] = "requested address not available"; + + DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); + + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPNAK; + + *q++ = RFC2132_MESSAGE; + *q++ = sizeof(nak_msg) - 1; + memcpy(q, nak_msg, sizeof(nak_msg) - 1); + q += sizeof(nak_msg) - 1; + } + assert(q < end); + *q = RFC1533_END; + + daddr.sin_addr.s_addr = 0xffffffffu; + + m->m_len = sizeof(struct bootp_t) - + sizeof(struct ip) - sizeof(struct udphdr); + udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); +} + +void bootp_input(struct mbuf *m) +{ + struct bootp_t *bp = mtod(m, struct bootp_t *); + + if (bp->bp_op == BOOTP_REQUEST) { + bootp_reply(m->slirp, bp); + } +} diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h new file mode 100644 index 0000000000..4043489835 --- /dev/null +++ b/slirp/src/bootp.h @@ -0,0 +1,128 @@ +/* bootp/dhcp defines */ + +#ifndef SLIRP_BOOTP_H +#define SLIRP_BOOTP_H + +#define BOOTP_SERVER 67 +#define BOOTP_CLIENT 68 + +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +#define RFC1533_COOKIE 99, 130, 83, 99 +#define RFC1533_PAD 0 +#define RFC1533_NETMASK 1 +#define RFC1533_TIMEOFFSET 2 +#define RFC1533_GATEWAY 3 +#define RFC1533_TIMESERVER 4 +#define RFC1533_IEN116NS 5 +#define RFC1533_DNS 6 +#define RFC1533_LOGSERVER 7 +#define RFC1533_COOKIESERVER 8 +#define RFC1533_LPRSERVER 9 +#define RFC1533_IMPRESSSERVER 10 +#define RFC1533_RESOURCESERVER 11 +#define RFC1533_HOSTNAME 12 +#define RFC1533_BOOTFILESIZE 13 +#define RFC1533_MERITDUMPFILE 14 +#define RFC1533_DOMAINNAME 15 +#define RFC1533_SWAPSERVER 16 +#define RFC1533_ROOTPATH 17 +#define RFC1533_EXTENSIONPATH 18 +#define RFC1533_IPFORWARDING 19 +#define RFC1533_IPSOURCEROUTING 20 +#define RFC1533_IPPOLICYFILTER 21 +#define RFC1533_IPMAXREASSEMBLY 22 +#define RFC1533_IPTTL 23 +#define RFC1533_IPMTU 24 +#define RFC1533_IPMTUPLATEAU 25 +#define RFC1533_INTMTU 26 +#define RFC1533_INTLOCALSUBNETS 27 +#define RFC1533_INTBROADCAST 28 +#define RFC1533_INTICMPDISCOVER 29 +#define RFC1533_INTICMPRESPOND 30 +#define RFC1533_INTROUTEDISCOVER 31 +#define RFC1533_INTROUTESOLICIT 32 +#define RFC1533_INTSTATICROUTES 33 +#define RFC1533_LLTRAILERENCAP 34 +#define RFC1533_LLARPCACHETMO 35 +#define RFC1533_LLETHERNETENCAP 36 +#define RFC1533_TCPTTL 37 +#define RFC1533_TCPKEEPALIVETMO 38 +#define RFC1533_TCPKEEPALIVEGB 39 +#define RFC1533_NISDOMAIN 40 +#define RFC1533_NISSERVER 41 +#define RFC1533_NTPSERVER 42 +#define RFC1533_VENDOR 43 +#define RFC1533_NBNS 44 +#define RFC1533_NBDD 45 +#define RFC1533_NBNT 46 +#define RFC1533_NBSCOPE 47 +#define RFC1533_XFS 48 +#define RFC1533_XDM 49 + +#define RFC2132_REQ_ADDR 50 +#define RFC2132_LEASE_TIME 51 +#define RFC2132_MSG_TYPE 53 +#define RFC2132_SRV_ID 54 +#define RFC2132_PARAM_LIST 55 +#define RFC2132_MESSAGE 56 +#define RFC2132_MAX_SIZE 57 +#define RFC2132_RENEWAL_TIME 58 +#define RFC2132_REBIND_TIME 59 +#define RFC2132_TFTP_SERVER_NAME 66 + +#define DHCPDISCOVER 1 +#define DHCPOFFER 2 +#define DHCPREQUEST 3 +#define DHCPACK 5 +#define DHCPNAK 6 + +#define RFC1533_VENDOR_MAJOR 0 +#define RFC1533_VENDOR_MINOR 0 + +#define RFC1533_VENDOR_MAGIC 128 +#define RFC1533_VENDOR_ADDPARM 129 +#define RFC1533_VENDOR_ETHDEV 130 +#define RFC1533_VENDOR_HOWTO 132 +#define RFC1533_VENDOR_MNUOPTS 160 +#define RFC1533_VENDOR_SELECTION 176 +#define RFC1533_VENDOR_MOTD 184 +#define RFC1533_VENDOR_NUMOFMOTD 8 +#define RFC1533_VENDOR_IMG 192 +#define RFC1533_VENDOR_NUMOFIMG 16 + +#define RFC1533_END 255 +#define BOOTP_VENDOR_LEN 64 +#define DHCP_OPT_LEN 312 + +struct bootp_t { + struct ip ip; + struct udphdr udp; + uint8_t bp_op; + uint8_t bp_htype; + uint8_t bp_hlen; + uint8_t bp_hops; + uint32_t bp_xid; + uint16_t bp_secs; + uint16_t unused; + struct in_addr bp_ciaddr; + struct in_addr bp_yiaddr; + struct in_addr bp_siaddr; + struct in_addr bp_giaddr; + uint8_t bp_hwaddr[16]; + uint8_t bp_sname[64]; + uint8_t bp_file[128]; + uint8_t bp_vend[DHCP_OPT_LEN]; +}; + +typedef struct { + uint16_t allocated; + uint8_t macaddr[6]; +} BOOTPClient; + +#define NB_BOOTP_CLIENTS 16 + +void bootp_input(struct mbuf *m); + +#endif diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c new file mode 100644 index 0000000000..25bfa67348 --- /dev/null +++ b/slirp/src/cksum.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp + */ + +#include "slirp.h" + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * XXX Since we will never span more than 1 mbuf, we can optimise this + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; \ + (void)ADDCARRY(sum);} + +int cksum(struct mbuf *m, int len) +{ + register uint16_t *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union { + uint8_t c[2]; + uint16_t s; + } s_util; + union { + uint16_t s[2]; + uint32_t l; + } l_util; + + if (m->m_len == 0) + goto cont; + w = mtod(m, uint16_t *); + + mlen = m->m_len; + + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (uintptr_t)w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(uint8_t *)w; + w = (uint16_t *)((int8_t *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + goto cont; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + + if (byte_swapped) { + REDUCE; + sum <<= 8; + if (mlen == -1) { + s_util.c[1] = *(uint8_t *)w; + sum += s_util.s; + mlen = 0; + } else + + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(uint8_t *)w; + +cont: + if (len) { + DEBUG_ERROR("cksum: out of data"); + DEBUG_ERROR(" len = %d", len); + } + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} + +int ip6_cksum(struct mbuf *m) +{ + /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum + * separately from the mbuf */ + struct ip6 save_ip, *ip = mtod(m, struct ip6 *); + struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); + int sum; + + save_ip = *ip; + + ih->ih_src = save_ip.ip_src; + ih->ih_dst = save_ip.ip_dst; + ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); + ih->ih_zero_hi = 0; + ih->ih_zero_lo = 0; + ih->ih_nh = save_ip.ip_nh; + + sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + + ntohl(ih->ih_pl)); + + *ip = save_ip; + + return sum; +} diff --git a/slirp/src/debug.h b/slirp/src/debug.h new file mode 100644 index 0000000000..44d922df37 --- /dev/null +++ b/slirp/src/debug.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ + +#define DBG_CALL (1 << 0) +#define DBG_MISC (1 << 1) +#define DBG_ERROR (1 << 2) +#define DBG_TFTP (1 << 3) + +extern int slirp_debug; + +#define DEBUG_CALL(fmt, ...) do { \ + if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ + g_debug(fmt "...", ##__VA_ARGS__); \ + } \ +} while (0) + +#define DEBUG_ARG(fmt, ...) do { \ + if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ + g_debug(" " fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define DEBUG_MISC(fmt, ...) do { \ + if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ + g_debug(fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define DEBUG_ERROR(fmt, ...) do { \ + if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ + g_debug(fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define DEBUG_TFTP(fmt, ...) do { \ + if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ + g_debug(fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#endif /* DEBUG_H_ */ diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c new file mode 100644 index 0000000000..e655c7d5b1 --- /dev/null +++ b/slirp/src/dhcpv6.c @@ -0,0 +1,206 @@ +/* + * SLIRP stateless DHCPv6 + * + * We only support stateless DHCPv6, e.g. for network booting. + * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. + * + * Copyright 2016 Thomas Huth, Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "slirp.h" +#include "dhcpv6.h" + +/* DHCPv6 message types */ +#define MSGTYPE_REPLY 7 +#define MSGTYPE_INFO_REQUEST 11 + +/* DHCPv6 option types */ +#define OPTION_CLIENTID 1 +#define OPTION_IAADDR 5 +#define OPTION_ORO 6 +#define OPTION_DNS_SERVERS 23 +#define OPTION_BOOTFILE_URL 59 + +struct requested_infos { + uint8_t *client_id; + int client_id_len; + bool want_dns; + bool want_boot_url; +}; + +/** + * Analyze the info request message sent by the client to see what data it + * provided and what it wants to have. The information is gathered in the + * "requested_infos" struct. Note that client_id (if provided) points into + * the odata region, thus the caller must keep odata valid as long as it + * needs to access the requested_infos struct. + */ +static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, + struct requested_infos *ri) +{ + int i, req_opt; + + while (olen > 4) { + /* Parse one option */ + int option = odata[0] << 8 | odata[1]; + int len = odata[2] << 8 | odata[3]; + + if (len + 4 > olen) { + slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", slirp->opaque); + return -E2BIG; + } + + switch (option) { + case OPTION_IAADDR: + /* According to RFC3315, we must discard requests with IA option */ + return -EINVAL; + case OPTION_CLIENTID: + if (len > 256) { + /* Avoid very long IDs which could cause problems later */ + return -E2BIG; + } + ri->client_id = odata + 4; + ri->client_id_len = len; + break; + case OPTION_ORO: /* Option request option */ + if (len & 1) { + return -EINVAL; + } + /* Check which options the client wants to have */ + for (i = 0; i < len; i += 2) { + req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; + switch (req_opt) { + case OPTION_DNS_SERVERS: + ri->want_dns = true; + break; + case OPTION_BOOTFILE_URL: + ri->want_boot_url = true; + break; + default: + DEBUG_MISC("dhcpv6: Unsupported option request %d", + req_opt); + } + } + break; + default: + DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", + option, len); + } + + odata += len + 4; + olen -= len + 4; + } + + return 0; +} + + +/** + * Handle information request messages + */ +static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, + uint32_t xid, uint8_t *odata, int olen) +{ + struct requested_infos ri = { NULL }; + struct sockaddr_in6 sa6, da6; + struct mbuf *m; + uint8_t *resp; + + if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { + return; + } + + m = m_get(slirp); + if (!m) { + return; + } + memset(m->m_data, 0, m->m_size); + m->m_data += IF_MAXLINKHDR; + resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); + + /* Fill in response */ + *resp++ = MSGTYPE_REPLY; + *resp++ = (uint8_t)(xid >> 16); + *resp++ = (uint8_t)(xid >> 8); + *resp++ = (uint8_t)xid; + + if (ri.client_id) { + *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ + *resp++ = OPTION_CLIENTID; /* option-code low byte */ + *resp++ = ri.client_id_len >> 8; /* option-len high byte */ + *resp++ = ri.client_id_len; /* option-len low byte */ + memcpy(resp, ri.client_id, ri.client_id_len); + resp += ri.client_id_len; + } + if (ri.want_dns) { + *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ + *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ + *resp++ = 0; /* option-len high byte */ + *resp++ = 16; /* option-len low byte */ + memcpy(resp, &slirp->vnameserver_addr6, 16); + resp += 16; + } + if (ri.want_boot_url) { + uint8_t *sa = slirp->vhost_addr6.s6_addr; + int slen, smaxlen; + + *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ + *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ + smaxlen = (uint8_t *)m->m_data + IF_MTU - (resp + 2); + slen = snprintf((char *)resp + 2, smaxlen, + "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" + "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", + sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], + sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], + sa[15], slirp->bootp_filename); + slen = MIN(slen, smaxlen); + *resp++ = slen >> 8; /* option-len high byte */ + *resp++ = slen; /* option-len low byte */ + resp += slen; + } + + sa6.sin6_addr = slirp->vhost_addr6; + sa6.sin6_port = DHCPV6_SERVER_PORT; + da6.sin6_addr = srcsas->sin6_addr; + da6.sin6_port = srcsas->sin6_port; + m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); + m->m_len = resp - (uint8_t *)m->m_data; + udp6_output(NULL, m, &sa6, &da6); +} + +/** + * Handle DHCPv6 messages sent by the client + */ +void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) +{ + uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); + int data_len = m->m_len - sizeof(struct udphdr); + uint32_t xid; + + if (data_len < 4) { + return; + } + + xid = ntohl(*(uint32_t *)data) & 0xffffff; + + switch (data[0]) { + case MSGTYPE_INFO_REQUEST: + dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); + break; + default: + DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); + } +} diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h new file mode 100644 index 0000000000..3373f6cb89 --- /dev/null +++ b/slirp/src/dhcpv6.h @@ -0,0 +1,25 @@ +/* + * Definitions and prototypes for SLIRP stateless DHCPv6 + * + * Copyright 2016 Thomas Huth, Red Hat Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 + * or later. See the COPYING file in the top-level directory. + */ +#ifndef SLIRP_DHCPV6_H +#define SLIRP_DHCPV6_H + +#define DHCPV6_SERVER_PORT 547 + +#define ALLDHCP_MULTICAST { .s6_addr = \ + { 0xff, 0x02, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x01, 0x00, 0x02 } } + +#define in6_dhcp_multicast(a)\ + in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) + +void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); + +#endif diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c new file mode 100644 index 0000000000..c459cece8d --- /dev/null +++ b/slirp/src/dnssearch.c @@ -0,0 +1,310 @@ +/* + * Domain search option for DHCP (RFC 3397) + * + * Copyright (c) 2012 Klaus Stengel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "slirp.h" + +static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; +static const uint8_t MAX_OPT_LEN = 255; +static const uint8_t OPT_HEADER_LEN = 2; +static const uint8_t REFERENCE_LEN = 2; + +struct compact_domain; + +typedef struct compact_domain { + struct compact_domain *self; + struct compact_domain *refdom; + uint8_t *labels; + size_t len; + size_t common_octets; +} CompactDomain; + +static size_t +domain_suffix_diffoff(const CompactDomain *a, const CompactDomain *b) +{ + size_t la = a->len, lb = b->len; + uint8_t *da = a->labels + la, *db = b->labels + lb; + size_t i, lm = (la < lb) ? la : lb; + + for (i = 0; i < lm; i++) { + da--; db--; + if (*da != *db) { + break; + } + } + return i; +} + +static int domain_suffix_ord(const void *cva, const void *cvb) +{ + const CompactDomain *a = cva, *b = cvb; + size_t la = a->len, lb = b->len; + size_t doff = domain_suffix_diffoff(a, b); + uint8_t ca = a->labels[la - doff]; + uint8_t cb = b->labels[lb - doff]; + + if (ca < cb) { + return -1; + } + if (ca > cb) { + return 1; + } + if (la < lb) { + return -1; + } + if (la > lb) { + return 1; + } + return 0; +} + +static size_t domain_common_label(CompactDomain *a, CompactDomain *b) +{ + size_t res, doff = domain_suffix_diffoff(a, b); + uint8_t *first_eq_pos = a->labels + (a->len - doff); + uint8_t *label = a->labels; + + while (*label && label < first_eq_pos) { + label += *label + 1; + } + res = a->len - (label - a->labels); + /* only report if it can help to reduce the packet size */ + return (res > REFERENCE_LEN) ? res : 0; +} + +static void domain_fixup_order(CompactDomain *cd, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + CompactDomain *cur = cd + i, *next = cd[i].self; + + while (!cur->common_octets) { + CompactDomain *tmp = next->self; /* backup target value */ + + next->self = cur; + cur->common_octets++; + + cur = next; + next = tmp; + } + } +} + +static void domain_mklabels(CompactDomain *cd, const char *input) +{ + uint8_t *len_marker = cd->labels; + uint8_t *output = len_marker; /* pre-incremented */ + const char *in = input; + char cur_chr; + size_t len = 0; + + if (cd->len == 0) { + goto fail; + } + cd->len++; + + do { + cur_chr = *in++; + if (cur_chr == '.' || cur_chr == '\0') { + len = output - len_marker; + if ((len == 0 && cur_chr == '.') || len >= 64) { + goto fail; + } + *len_marker = len; + + output++; + len_marker = output; + } else { + output++; + *output = cur_chr; + } + } while (cur_chr != '\0'); + + /* ensure proper zero-termination */ + if (len != 0) { + *len_marker = 0; + cd->len++; + } + return; + +fail: + g_warning("failed to parse domain name '%s'\n", input); + cd->len = 0; +} + +static void +domain_mkxrefs(CompactDomain *doms, CompactDomain *last, size_t depth) +{ + CompactDomain *i = doms, *target = doms; + + do { + if (i->labels < target->labels) { + target = i; + } + } while (i++ != last); + + for (i = doms; i != last; i++) { + CompactDomain *group_last; + size_t next_depth; + + if (i->common_octets == depth) { + continue; + } + + next_depth = -1; + for (group_last = i; group_last != last; group_last++) { + size_t co = group_last->common_octets; + if (co <= depth) { + break; + } + if (co < next_depth) { + next_depth = co; + } + } + domain_mkxrefs(i, group_last, next_depth); + + i = group_last; + if (i == last) { + break; + } + } + + if (depth == 0) { + return; + } + + i = doms; + do { + if (i != target && i->refdom == NULL) { + i->refdom = target; + i->common_octets = depth; + } + } while (i++ != last); +} + +static size_t domain_compactify(CompactDomain *domains, size_t n) +{ + uint8_t *start = domains->self->labels, *outptr = start; + size_t i; + + for (i = 0; i < n; i++) { + CompactDomain *cd = domains[i].self; + CompactDomain *rd = cd->refdom; + + if (rd != NULL) { + size_t moff = (rd->labels - start) + + (rd->len - cd->common_octets); + if (moff < 0x3FFFu) { + cd->len -= cd->common_octets - 2; + cd->labels[cd->len - 1] = moff & 0xFFu; + cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); + } + } + + if (cd->labels != outptr) { + memmove(outptr, cd->labels, cd->len); + cd->labels = outptr; + } + outptr += cd->len; + } + return outptr - start; +} + +int translate_dnssearch(Slirp *s, const char **names) +{ + size_t blocks, bsrc_start, bsrc_end, bdst_start; + size_t i, num_domains, memreq = 0; + uint8_t *result = NULL, *outptr; + CompactDomain *domains = NULL; + const char **nameptr = names; + + while (*nameptr != NULL) { + nameptr++; + } + + num_domains = nameptr - names; + if (num_domains == 0) { + return -2; + } + + domains = g_malloc(num_domains * sizeof(*domains)); + + for (i = 0; i < num_domains; i++) { + size_t nlen = strlen(names[i]); + memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ + domains[i].self = domains + i; + domains[i].len = nlen; + domains[i].common_octets = 0; + domains[i].refdom = NULL; + } + + /* reserve extra 2 header bytes for each 255 bytes of output */ + memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; + result = g_malloc(memreq * sizeof(*result)); + + outptr = result; + for (i = 0; i < num_domains; i++) { + domains[i].labels = outptr; + domain_mklabels(domains + i, names[i]); + outptr += domains[i].len; + } + + if (outptr == result) { + g_free(domains); + g_free(result); + return -1; + } + + qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); + domain_fixup_order(domains, num_domains); + + for (i = 1; i < num_domains; i++) { + size_t cl = domain_common_label(domains + i - 1, domains + i); + domains[i - 1].common_octets = cl; + } + + domain_mkxrefs(domains, domains + num_domains - 1, 0); + memreq = domain_compactify(domains, num_domains); + + blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); + bsrc_end = memreq; + bsrc_start = (blocks - 1) * MAX_OPT_LEN; + bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; + memreq += blocks * OPT_HEADER_LEN; + + while (blocks--) { + size_t len = bsrc_end - bsrc_start; + memmove(result + bdst_start, result + bsrc_start, len); + result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; + result[bdst_start - 1] = len; + bsrc_end = bsrc_start; + bsrc_start -= MAX_OPT_LEN; + bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; + } + + g_free(domains); + s->vdnssearch = result; + s->vdnssearch_len = memreq; + return 0; +} diff --git a/slirp/src/if.c b/slirp/src/if.c new file mode 100644 index 0000000000..1830cc396c --- /dev/null +++ b/slirp/src/if.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" + +static void +ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) +{ + ifm->ifs_next = ifmhead->ifs_next; + ifmhead->ifs_next = ifm; + ifm->ifs_prev = ifmhead; + ifm->ifs_next->ifs_prev = ifm; +} + +static void +ifs_remque(struct mbuf *ifm) +{ + ifm->ifs_prev->ifs_next = ifm->ifs_next; + ifm->ifs_next->ifs_prev = ifm->ifs_prev; +} + +void +if_init(Slirp *slirp) +{ + slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; + slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; +} + +/* + * if_output: Queue packet into an output queue. + * There are 2 output queue's, if_fastq and if_batchq. + * Each output queue is a doubly linked list of double linked lists + * of mbufs, each list belonging to one "session" (socket). This + * way, we can output packets fairly by sending one packet from each + * session, instead of all the packets from one session, then all packets + * from the next session, etc. Packets on the if_fastq get absolute + * priority, but if one session hogs the link, it gets "downgraded" + * to the batchq until it runs out of packets, then it'll return + * to the fastq (eg. if the user does an ls -alR in a telnet session, + * it'll temporarily get downgraded to the batchq) + */ +void +if_output(struct socket *so, struct mbuf *ifm) +{ + Slirp *slirp = ifm->slirp; + struct mbuf *ifq; + int on_fastq = 1; + + DEBUG_CALL("if_output"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("ifm = %p", ifm); + + /* + * First remove the mbuf from m_usedlist, + * since we're gonna use m_next and m_prev ourselves + * XXX Shouldn't need this, gotta change dtom() etc. + */ + if (ifm->m_flags & M_USEDLIST) { + remque(ifm); + ifm->m_flags &= ~M_USEDLIST; + } + + /* + * See if there's already a batchq list for this session. + * This can include an interactive session, which should go on fastq, + * but gets too greedy... hence it'll be downgraded from fastq to batchq. + * We mustn't put this packet back on the fastq (or we'll send it out of order) + * XXX add cache here? + */ + if (so) { + for (ifq = (struct mbuf *) slirp->if_batchq.qh_rlink; + (struct quehead *) ifq != &slirp->if_batchq; + ifq = ifq->ifq_prev) { + if (so == ifq->ifq_so) { + /* A match! */ + ifm->ifq_so = so; + ifs_insque(ifm, ifq->ifs_prev); + goto diddit; + } + } + } + + /* No match, check which queue to put it on */ + if (so && (so->so_iptos & IPTOS_LOWDELAY)) { + ifq = (struct mbuf *) slirp->if_fastq.qh_rlink; + on_fastq = 1; + /* + * Check if this packet is a part of the last + * packet's session + */ + if (ifq->ifq_so == so) { + ifm->ifq_so = so; + ifs_insque(ifm, ifq->ifs_prev); + goto diddit; + } + } else { + ifq = (struct mbuf *) slirp->if_batchq.qh_rlink; + } + + /* Create a new doubly linked list for this session */ + ifm->ifq_so = so; + ifs_init(ifm); + insque(ifm, ifq); + +diddit: + if (so) { + /* Update *_queued */ + so->so_queued++; + so->so_nqueued++; + /* + * Check if the interactive session should be downgraded to + * the batchq. A session is downgraded if it has queued 6 + * packets without pausing, and at least 3 of those packets + * have been sent over the link + * (XXX These are arbitrary numbers, probably not optimal..) + */ + if (on_fastq && ((so->so_nqueued >= 6) && + (so->so_nqueued - so->so_queued) >= 3)) { + + /* Remove from current queue... */ + remque(ifm->ifs_next); + + /* ...And insert in the new. That'll teach ya! */ + insque(ifm->ifs_next, &slirp->if_batchq); + } + } + + /* + * This prevents us from malloc()ing too many mbufs + */ + if_start(ifm->slirp); +} + +/* + * Send one packet from each session. + * If there are packets on the fastq, they are sent FIFO, before + * everything else. Then we choose the first packet from each + * batchq session (socket) and send it. + * For example, if there are 3 ftp sessions fighting for bandwidth, + * one packet will be sent from the first session, then one packet + * from the second session, then one packet from the third. + */ +void if_start(Slirp *slirp) +{ + uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); + bool from_batchq = false; + struct mbuf *ifm, *ifm_next, *ifqt; + + DEBUG_CALL("if_start"); + + if (slirp->if_start_busy) { + return; + } + slirp->if_start_busy = true; + + struct mbuf *batch_head = NULL; + if (slirp->if_batchq.qh_link != &slirp->if_batchq) { + batch_head = (struct mbuf *) slirp->if_batchq.qh_link; + } + + if (slirp->if_fastq.qh_link != &slirp->if_fastq) { + ifm_next = (struct mbuf *) slirp->if_fastq.qh_link; + } else if (batch_head) { + /* Nothing on fastq, pick up from batchq */ + ifm_next = batch_head; + from_batchq = true; + } else { + ifm_next = NULL; + } + + while (ifm_next) { + ifm = ifm_next; + + ifm_next = ifm->ifq_next; + if ((struct quehead *) ifm_next == &slirp->if_fastq) { + /* No more packets in fastq, switch to batchq */ + ifm_next = batch_head; + from_batchq = true; + } + if ((struct quehead *) ifm_next == &slirp->if_batchq) { + /* end of batchq */ + ifm_next = NULL; + } + + /* Try to send packet unless it already expired */ + if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { + /* Packet is delayed due to pending ARP or NDP resolution */ + continue; + } + + /* Remove it from the queue */ + ifqt = ifm->ifq_prev; + remque(ifm); + + /* If there are more packets for this session, re-queue them */ + if (ifm->ifs_next != ifm) { + struct mbuf *next = ifm->ifs_next; + + insque(next, ifqt); + ifs_remque(ifm); + if (!from_batchq) { + ifm_next = next; + } + } + + /* Update so_queued */ + if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { + /* If there's no more queued, reset nqueued */ + ifm->ifq_so->so_nqueued = 0; + } + + m_free(ifm); + } + + slirp->if_start_busy = false; +} diff --git a/slirp/src/if.h b/slirp/src/if.h new file mode 100644 index 0000000000..69569c10df --- /dev/null +++ b/slirp/src/if.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef IF_H +#define IF_H + +#define IF_COMPRESS 0x01 /* We want compression */ +#define IF_NOCOMPRESS 0x02 /* Do not do compression */ +#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ +#define IF_NOCIDCOMP 0x08 /* CID compression */ + +#define IF_MTU 1500 +#define IF_MRU 1500 +#define IF_COMP IF_AUTOCOMP /* Flags for compression */ + +/* 2 for alignment, 14 for ethernet */ +#define IF_MAXLINKHDR (2 + ETH_HLEN) + +#endif diff --git a/slirp/src/ip.h b/slirp/src/ip.h new file mode 100644 index 0000000000..73a4d2a3d2 --- /dev/null +++ b/slirp/src/ip.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.1 (Berkeley) 6/10/93 + * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp + */ + +#ifndef IP_H +#define IP_H + +#include <glib.h> + +#if G_BYTE_ORDER == G_BIG_ENDIAN +# undef NTOHL +# undef NTOHS +# undef HTONL +# undef HTONS +# define NTOHL(d) +# define NTOHS(d) +# define HTONL(d) +# define HTONS(d) +#else +# ifndef NTOHL +# define NTOHL(d) ((d) = ntohl((d))) +# endif +# ifndef NTOHS +# define NTOHS(d) ((d) = ntohs((uint16_t)(d))) +# endif +# ifndef HTONL +# define HTONL(d) ((d) = htonl((d))) +# endif +# ifndef HTONS +# define HTONS(d) ((d) = htons((uint16_t)(d))) +# endif +#endif + +typedef uint32_t n_long; /* long as received from the net */ + +/* + * Definitions for internet protocol version 4. + * Per RFC 791, September 1981. + */ +#define IPVERSION 4 + +/* + * Structure of an internet header, naked of options. + */ +struct ip { +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint8_t ip_v:4, /* version */ + ip_hl:4; /* header length */ +#else + uint8_t ip_hl:4, /* header length */ + ip_v:4; /* version */ +#endif + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset field */ +#define IP_DF 0x4000 /* don't fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + struct in_addr ip_src,ip_dst; /* source and dest address */ +} SLIRP_PACKED; + +#define IP_MAXPACKET 65535 /* maximum packet size */ + +/* + * Definitions for IP type of service (ip_tos) + */ +#define IPTOS_LOWDELAY 0x10 +#define IPTOS_THROUGHPUT 0x08 +#define IPTOS_RELIABILITY 0x04 + +/* + * Definitions for options. + */ +#define IPOPT_COPIED(o) ((o)&0x80) +#define IPOPT_CLASS(o) ((o)&0x60) +#define IPOPT_NUMBER(o) ((o)&0x1f) + +#define IPOPT_CONTROL 0x00 +#define IPOPT_RESERVED1 0x20 +#define IPOPT_DEBMEAS 0x40 +#define IPOPT_RESERVED2 0x60 + +#define IPOPT_EOL 0 /* end of option list */ +#define IPOPT_NOP 1 /* no operation */ + +#define IPOPT_RR 7 /* record packet route */ +#define IPOPT_TS 68 /* timestamp */ +#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ +#define IPOPT_LSRR 131 /* loose source route */ +#define IPOPT_SATID 136 /* satnet id */ +#define IPOPT_SSRR 137 /* strict source route */ + +/* + * Offsets to fields in options other than EOL and NOP. + */ +#define IPOPT_OPTVAL 0 /* option ID */ +#define IPOPT_OLEN 1 /* option length */ +#define IPOPT_OFFSET 2 /* offset within option */ +#define IPOPT_MINOFF 4 /* min value of above */ + +/* + * Time stamp option structure. + */ +struct ip_timestamp { + uint8_t ipt_code; /* IPOPT_TS */ + uint8_t ipt_len; /* size of structure (variable) */ + uint8_t ipt_ptr; /* index of current entry */ +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint8_t ipt_oflw:4, /* overflow counter */ + ipt_flg:4; /* flags, see below */ +#else + uint8_t ipt_flg:4, /* flags, see below */ + ipt_oflw:4; /* overflow counter */ +#endif + union ipt_timestamp { + n_long ipt_time[1]; + struct ipt_ta { + struct in_addr ipt_addr; + n_long ipt_time; + } ipt_ta[1]; + } ipt_timestamp; +} SLIRP_PACKED; + +/* flag bits for ipt_flg */ +#define IPOPT_TS_TSONLY 0 /* timestamps only */ +#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ +#define IPOPT_TS_PRESPEC 3 /* specified modules only */ + +/* bits for security (not byte swapped) */ +#define IPOPT_SECUR_UNCLASS 0x0000 +#define IPOPT_SECUR_CONFID 0xf135 +#define IPOPT_SECUR_EFTO 0x789a +#define IPOPT_SECUR_MMMM 0xbc4d +#define IPOPT_SECUR_RESTR 0xaf13 +#define IPOPT_SECUR_SECRET 0xd788 +#define IPOPT_SECUR_TOPSECRET 0x6bc5 + +/* + * Internet implementation parameters. + */ +#define MAXTTL 255 /* maximum time to live (seconds) */ +#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ +#define IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define IPTTLDEC 1 /* subtracted when forwarding */ + +#define IP_MSS 576 /* default maximum segment size */ + +#if GLIB_SIZEOF_VOID_P == 4 +struct mbuf_ptr { + struct mbuf *mptr; + uint32_t dummy; +} SLIRP_PACKED; +#else +struct mbuf_ptr { + struct mbuf *mptr; +} SLIRP_PACKED; +#endif +struct qlink { + void *next, *prev; +}; + +/* + * Overlay for ip header used by other protocols (tcp, udp). + */ +struct ipovly { + struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ + uint8_t ih_x1; /* (unused) */ + uint8_t ih_pr; /* protocol */ + uint16_t ih_len; /* protocol length */ + struct in_addr ih_src; /* source internet address */ + struct in_addr ih_dst; /* destination internet address */ +} SLIRP_PACKED; + +/* + * Ip reassembly queue structure. Each fragment + * being reassembled is attached to one of these structures. + * They are timed out after ipq_ttl drops to 0, and may also + * be reclaimed if memory becomes tight. + * size 28 bytes + */ +struct ipq { + struct qlink frag_link; /* to ip headers of fragments */ + struct qlink ip_link; /* to other reass headers */ + uint8_t ipq_ttl; /* time for reass q to live */ + uint8_t ipq_p; /* protocol of this fragment */ + uint16_t ipq_id; /* sequence id for reassembly */ + struct in_addr ipq_src,ipq_dst; +}; + +/* + * Ip header, when holding a fragment. + * + * Note: ipf_link must be at same offset as frag_link above + */ +struct ipasfrag { + struct qlink ipf_link; + struct ip ipf_ip; +}; + +G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == + offsetof(struct ipasfrag, ipf_link)); + +#define ipf_off ipf_ip.ip_off +#define ipf_tos ipf_ip.ip_tos +#define ipf_len ipf_ip.ip_len +#define ipf_next ipf_link.next +#define ipf_prev ipf_link.prev + +#endif diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h new file mode 100644 index 0000000000..1b3364f960 --- /dev/null +++ b/slirp/src/ip6.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. + */ + +#ifndef SLIRP_IP6_H +#define SLIRP_IP6_H + +#include <glib.h> +#include <string.h> + +#define ALLNODES_MULTICAST { .s6_addr = \ + { 0xff, 0x02, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x01 } } + +#define SOLICITED_NODE_PREFIX { .s6_addr = \ + { 0xff, 0x02, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x01,\ + 0xff, 0x00, 0x00, 0x00 } } + +#define LINKLOCAL_ADDR { .s6_addr = \ + { 0xfe, 0x80, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x02 } } + +#define ZERO_ADDR { .s6_addr = \ + { 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00,\ + 0x00, 0x00, 0x00, 0x00 } } + +static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + +static inline bool in6_equal_net(const struct in6_addr *a, + const struct in6_addr *b, + int prefix_len) +{ + if (memcmp(a, b, prefix_len / 8) != 0) { + return 0; + } + + if (prefix_len % 8 == 0) { + return 1; + } + + return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) + == b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); +} + +static inline bool in6_equal_mach(const struct in6_addr *a, + const struct in6_addr *b, + int prefix_len) +{ + if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), + &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), + 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { + return 0; + } + + if (prefix_len % 8 == 0) { + return 1; + } + + return (a->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)) + == (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); +} + + +#define in6_equal_router(a)\ + ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len)\ + && in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len))\ + || (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64)\ + && in6_equal_mach(a, &slirp->vhost_addr6, 64))) + +#define in6_equal_dns(a)\ + ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len)\ + && in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len))\ + || (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64)\ + && in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) + +#define in6_equal_host(a)\ + (in6_equal_router(a) || in6_equal_dns(a)) + +#define in6_solicitednode_multicast(a)\ + (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) + +#define in6_zero(a)\ + (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) + +/* Compute emulated host MAC address from its ipv6 address */ +static inline void in6_compute_ethaddr(struct in6_addr ip, + uint8_t eth[ETH_ALEN]) +{ + eth[0] = 0x52; + eth[1] = 0x56; + memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); +} + +/* + * Definitions for internet protocol version 6. + * Per RFC 2460, December 1998. + */ +#define IP6VERSION 6 +#define IP6_HOP_LIMIT 255 + +/* + * Structure of an internet header, naked of options. + */ +struct ip6 { +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint32_t + ip_v:4, /* version */ + ip_tc_hi:4, /* traffic class */ + ip_tc_lo:4, + ip_fl_hi:4, /* flow label */ + ip_fl_lo:16; +#else + uint32_t + ip_tc_hi:4, + ip_v:4, + ip_fl_hi:4, + ip_tc_lo:4, + ip_fl_lo:16; +#endif + uint16_t ip_pl; /* payload length */ + uint8_t ip_nh; /* next header */ + uint8_t ip_hl; /* hop limit */ + struct in6_addr ip_src, ip_dst; /* source and dest address */ +}; + +/* + * IPv6 pseudo-header used by upper-layer protocols + */ +struct ip6_pseudohdr { + struct in6_addr ih_src; /* source internet address */ + struct in6_addr ih_dst; /* destination internet address */ + uint32_t ih_pl; /* upper-layer packet length */ + uint16_t ih_zero_hi; /* zero */ + uint8_t ih_zero_lo; /* zero */ + uint8_t ih_nh; /* next header */ +}; + +/* + * We don't want to mark these ip6 structs as packed as they are naturally + * correctly aligned; instead assert that there is no stray padding. + * If we marked the struct as packed then we would be unable to take + * the address of any of the fields in it. + */ +G_STATIC_ASSERT(sizeof(struct ip6) == 40); +G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); + +#endif diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c new file mode 100644 index 0000000000..c1e3d30470 --- /dev/null +++ b/slirp/src/ip6_icmp.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. + */ + +#include "slirp.h" +#include "ip6_icmp.h" + +#define NDP_Interval g_rand_int_range(slirp->grand, \ + NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) + +static void ra_timer_handler(void *opaque) +{ + Slirp *slirp = opaque; + + slirp->cb->timer_mod(slirp->ra_timer, + slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + NDP_Interval, + slirp->opaque); + ndp_send_ra(slirp); +} + +void icmp6_init(Slirp *slirp) +{ + if (!slirp->in6_enabled) { + return; + } + + slirp->ra_timer = slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); + slirp->cb->timer_mod(slirp->ra_timer, + slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + NDP_Interval, + slirp->opaque); +} + +void icmp6_cleanup(Slirp *slirp) +{ + if (!slirp->in6_enabled) { + return; + } + + slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); +} + +static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, + struct icmp6 *icmp) +{ + struct mbuf *t = m_get(slirp); + t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); + memcpy(t->m_data, m->m_data, t->m_len); + + /* IPv6 Packet */ + struct ip6 *rip = mtod(t, struct ip6 *); + rip->ip_dst = ip->ip_src; + rip->ip_src = ip->ip_dst; + + /* ICMPv6 packet */ + t->m_data += sizeof(struct ip6); + struct icmp6 *ricmp = mtod(t, struct icmp6 *); + ricmp->icmp6_type = ICMP6_ECHO_REPLY; + ricmp->icmp6_cksum = 0; + + /* Checksum */ + t->m_data -= sizeof(struct ip6); + ricmp->icmp6_cksum = ip6_cksum(t); + + ip6_output(NULL, t, 0); +} + +void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) +{ + Slirp *slirp = m->slirp; + struct mbuf *t; + struct ip6 *ip = mtod(m, struct ip6 *); + char addrstr[INET6_ADDRSTRLEN]; + + DEBUG_CALL("icmp6_send_error"); + DEBUG_ARG("type = %d, code = %d", type, code); + + if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || + in6_zero(&ip->ip_src)) { + /* TODO icmp error? */ + return; + } + + t = m_get(slirp); + + /* IPv6 packet */ + struct ip6 *rip = mtod(t, struct ip6 *); + rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; + rip->ip_dst = ip->ip_src; + inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); + DEBUG_ARG("target = %s", addrstr); + + rip->ip_nh = IPPROTO_ICMPV6; + const int error_data_len = MIN(m->m_len, + IF_MTU - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); + rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); + t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); + + /* ICMPv6 packet */ + t->m_data += sizeof(struct ip6); + struct icmp6 *ricmp = mtod(t, struct icmp6 *); + ricmp->icmp6_type = type; + ricmp->icmp6_code = code; + ricmp->icmp6_cksum = 0; + + switch (type) { + case ICMP6_UNREACH: + case ICMP6_TIMXCEED: + ricmp->icmp6_err.unused = 0; + break; + case ICMP6_TOOBIG: + ricmp->icmp6_err.mtu = htonl(IF_MTU); + break; + case ICMP6_PARAMPROB: + /* TODO: Handle this case */ + break; + default: + g_assert_not_reached(); + break; + } + t->m_data += ICMP6_ERROR_MINLEN; + memcpy(t->m_data, m->m_data, error_data_len); + + /* Checksum */ + t->m_data -= ICMP6_ERROR_MINLEN; + t->m_data -= sizeof(struct ip6); + ricmp->icmp6_cksum = ip6_cksum(t); + + ip6_output(NULL, t, 0); +} + +/* + * Send NDP Router Advertisement + */ +void ndp_send_ra(Slirp *slirp) +{ + DEBUG_CALL("ndp_send_ra"); + + /* Build IPv6 packet */ + struct mbuf *t = m_get(slirp); + struct ip6 *rip = mtod(t, struct ip6 *); + size_t pl_size = 0; + struct in6_addr addr; + uint32_t scope_id; + + rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; + rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; + rip->ip_nh = IPPROTO_ICMPV6; + + /* Build ICMPv6 packet */ + t->m_data += sizeof(struct ip6); + struct icmp6 *ricmp = mtod(t, struct icmp6 *); + ricmp->icmp6_type = ICMP6_NDP_RA; + ricmp->icmp6_code = 0; + ricmp->icmp6_cksum = 0; + + /* NDP */ + ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; + ricmp->icmp6_nra.M = NDP_AdvManagedFlag; + ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; + ricmp->icmp6_nra.reserved = 0; + ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); + ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); + ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); + t->m_data += ICMP6_NDP_RA_MINLEN; + pl_size += ICMP6_NDP_RA_MINLEN; + + /* Source link-layer address (NDP option) */ + struct ndpopt *opt = mtod(t, struct ndpopt *); + opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; + opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; + in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); + t->m_data += NDPOPT_LINKLAYER_LEN; + pl_size += NDPOPT_LINKLAYER_LEN; + + /* Prefix information (NDP option) */ + struct ndpopt *opt2 = mtod(t, struct ndpopt *); + opt2->ndpopt_type = NDPOPT_PREFIX_INFO; + opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; + opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; + opt2->ndpopt_prefixinfo.L = 1; + opt2->ndpopt_prefixinfo.A = 1; + opt2->ndpopt_prefixinfo.reserved1 = 0; + opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); + opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); + opt2->ndpopt_prefixinfo.reserved2 = 0; + opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; + t->m_data += NDPOPT_PREFIXINFO_LEN; + pl_size += NDPOPT_PREFIXINFO_LEN; + + /* Prefix information (NDP option) */ + if (get_dns6_addr(&addr, &scope_id) >= 0) { + /* Host system does have an IPv6 DNS server, announce our proxy. */ + struct ndpopt *opt3 = mtod(t, struct ndpopt *); + opt3->ndpopt_type = NDPOPT_RDNSS; + opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; + opt3->ndpopt_rdnss.reserved = 0; + opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); + opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; + t->m_data += NDPOPT_RDNSS_LEN; + pl_size += NDPOPT_RDNSS_LEN; + } + + rip->ip_pl = htons(pl_size); + t->m_data -= sizeof(struct ip6) + pl_size; + t->m_len = sizeof(struct ip6) + pl_size; + + /* ICMPv6 Checksum */ + ricmp->icmp6_cksum = ip6_cksum(t); + + ip6_output(NULL, t, 0); +} + +/* + * Send NDP Neighbor Solitication + */ +void ndp_send_ns(Slirp *slirp, struct in6_addr addr) +{ + char addrstr[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); + + DEBUG_CALL("ndp_send_ns"); + DEBUG_ARG("target = %s", addrstr); + + /* Build IPv6 packet */ + struct mbuf *t = m_get(slirp); + struct ip6 *rip = mtod(t, struct ip6 *); + rip->ip_src = slirp->vhost_addr6; + rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; + memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); + rip->ip_nh = IPPROTO_ICMPV6; + rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); + t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); + + /* Build ICMPv6 packet */ + t->m_data += sizeof(struct ip6); + struct icmp6 *ricmp = mtod(t, struct icmp6 *); + ricmp->icmp6_type = ICMP6_NDP_NS; + ricmp->icmp6_code = 0; + ricmp->icmp6_cksum = 0; + + /* NDP */ + ricmp->icmp6_nns.reserved = 0; + ricmp->icmp6_nns.target = addr; + + /* Build NDP option */ + t->m_data += ICMP6_NDP_NS_MINLEN; + struct ndpopt *opt = mtod(t, struct ndpopt *); + opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; + opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; + in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); + + /* ICMPv6 Checksum */ + t->m_data -= ICMP6_NDP_NA_MINLEN; + t->m_data -= sizeof(struct ip6); + ricmp->icmp6_cksum = ip6_cksum(t); + + ip6_output(NULL, t, 1); +} + +/* + * Send NDP Neighbor Advertisement + */ +static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) +{ + /* Build IPv6 packet */ + struct mbuf *t = m_get(slirp); + struct ip6 *rip = mtod(t, struct ip6 *); + rip->ip_src = icmp->icmp6_nns.target; + if (in6_zero(&ip->ip_src)) { + rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; + } else { + rip->ip_dst = ip->ip_src; + } + rip->ip_nh = IPPROTO_ICMPV6; + rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + + NDPOPT_LINKLAYER_LEN); + t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); + + /* Build ICMPv6 packet */ + t->m_data += sizeof(struct ip6); + struct icmp6 *ricmp = mtod(t, struct icmp6 *); + ricmp->icmp6_type = ICMP6_NDP_NA; + ricmp->icmp6_code = 0; + ricmp->icmp6_cksum = 0; + + /* NDP */ + ricmp->icmp6_nna.R = NDP_IsRouter; + ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); + ricmp->icmp6_nna.O = 1; + ricmp->icmp6_nna.reserved_hi = 0; + ricmp->icmp6_nna.reserved_lo = 0; + ricmp->icmp6_nna.target = icmp->icmp6_nns.target; + + /* Build NDP option */ + t->m_data += ICMP6_NDP_NA_MINLEN; + struct ndpopt *opt = mtod(t, struct ndpopt *); + opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; + opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; + in6_compute_ethaddr(ricmp->icmp6_nna.target, + opt->ndpopt_linklayer); + + /* ICMPv6 Checksum */ + t->m_data -= ICMP6_NDP_NA_MINLEN; + t->m_data -= sizeof(struct ip6); + ricmp->icmp6_cksum = ip6_cksum(t); + + ip6_output(NULL, t, 0); +} + +/* + * Process a NDP message + */ +static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, + struct icmp6 *icmp) +{ + m->m_len += ETH_HLEN; + m->m_data -= ETH_HLEN; + struct ethhdr *eth = mtod(m, struct ethhdr *); + m->m_len -= ETH_HLEN; + m->m_data += ETH_HLEN; + + switch (icmp->icmp6_type) { + case ICMP6_NDP_RS: + DEBUG_CALL(" type = Router Solicitation"); + if (ip->ip_hl == 255 + && icmp->icmp6_code == 0 + && ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { + /* Gratuitous NDP */ + ndp_table_add(slirp, ip->ip_src, eth->h_source); + + ndp_send_ra(slirp); + } + break; + + case ICMP6_NDP_RA: + DEBUG_CALL(" type = Router Advertisement"); + slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", + slirp->opaque); + break; + + case ICMP6_NDP_NS: + DEBUG_CALL(" type = Neighbor Solicitation"); + if (ip->ip_hl == 255 + && icmp->icmp6_code == 0 + && !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) + && ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN + && (!in6_zero(&ip->ip_src) + || in6_solicitednode_multicast(&ip->ip_dst))) { + if (in6_equal_host(&icmp->icmp6_nns.target)) { + /* Gratuitous NDP */ + ndp_table_add(slirp, ip->ip_src, eth->h_source); + ndp_send_na(slirp, ip, icmp); + } + } + break; + + case ICMP6_NDP_NA: + DEBUG_CALL(" type = Neighbor Advertisement"); + if (ip->ip_hl == 255 + && icmp->icmp6_code == 0 + && ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN + && !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) + && (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) + || icmp->icmp6_nna.S == 0)) { + ndp_table_add(slirp, ip->ip_src, eth->h_source); + } + break; + + case ICMP6_NDP_REDIRECT: + DEBUG_CALL(" type = Redirect"); + slirp->cb->guest_error( + "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); + break; + } +} + +/* + * Process a received ICMPv6 message. + */ +void icmp6_input(struct mbuf *m) +{ + struct icmp6 *icmp; + struct ip6 *ip = mtod(m, struct ip6 *); + Slirp *slirp = m->slirp; + int hlen = sizeof(struct ip6); + + DEBUG_CALL("icmp6_input"); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("m_len = %d", m->m_len); + + if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { + goto end; + } + + if (ip6_cksum(m)) { + goto end; + } + + m->m_len -= hlen; + m->m_data += hlen; + icmp = mtod(m, struct icmp6 *); + m->m_len += hlen; + m->m_data -= hlen; + + DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); + switch (icmp->icmp6_type) { + case ICMP6_ECHO_REQUEST: + if (in6_equal_host(&ip->ip_dst)) { + icmp6_send_echoreply(m, slirp, ip, icmp); + } else { + /* TODO */ + g_critical("external icmpv6 not supported yet"); + } + break; + + case ICMP6_NDP_RS: + case ICMP6_NDP_RA: + case ICMP6_NDP_NS: + case ICMP6_NDP_NA: + case ICMP6_NDP_REDIRECT: + ndp_input(m, slirp, ip, icmp); + break; + + case ICMP6_UNREACH: + case ICMP6_TOOBIG: + case ICMP6_TIMXCEED: + case ICMP6_PARAMPROB: + /* XXX? report error? close socket? */ + default: + break; + } + +end: + m_free(m); +} diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h new file mode 100644 index 0000000000..e8ed753db5 --- /dev/null +++ b/slirp/src/ip6_icmp.h @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. + */ + +#ifndef SLIRP_IP6_ICMP_H +#define SLIRP_IP6_ICMP_H + +/* + * Interface Control Message Protocol version 6 Definitions. + * Per RFC 4443, March 2006. + * + * Network Discover Protocol Definitions. + * Per RFC 4861, September 2007. + */ + +struct icmp6_echo { /* Echo Messages */ + uint16_t id; + uint16_t seq_num; +}; + +union icmp6_error_body { + uint32_t unused; + uint32_t pointer; + uint32_t mtu; +}; + +/* + * NDP Messages + */ +struct ndp_rs { /* Router Solicitation Message */ + uint32_t reserved; +}; + +struct ndp_ra { /* Router Advertisement Message */ + uint8_t chl; /* Cur Hop Limit */ +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint8_t + M:1, + O:1, + reserved:6; +#else + uint8_t + reserved:6, + O:1, + M:1; +#endif + uint16_t lifetime; /* Router Lifetime */ + uint32_t reach_time; /* Reachable Time */ + uint32_t retrans_time; /* Retrans Timer */ +}; + +G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); + +struct ndp_ns { /* Neighbor Solicitation Message */ + uint32_t reserved; + struct in6_addr target; /* Target Address */ +}; + +G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); + +struct ndp_na { /* Neighbor Advertisement Message */ +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint32_t + R:1, /* Router Flag */ + S:1, /* Solicited Flag */ + O:1, /* Override Flag */ + reserved_hi:5, + reserved_lo:24; +#else + uint32_t + reserved_hi:5, + O:1, + S:1, + R:1, + reserved_lo:24; +#endif + struct in6_addr target; /* Target Address */ +}; + +G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); + +struct ndp_redirect { + uint32_t reserved; + struct in6_addr target; /* Target Address */ + struct in6_addr dest; /* Destination Address */ +}; + +G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); + +/* + * Structure of an icmpv6 header. + */ +struct icmp6 { + uint8_t icmp6_type; /* type of message, see below */ + uint8_t icmp6_code; /* type sub code */ + uint16_t icmp6_cksum; /* ones complement cksum of struct */ + union { + union icmp6_error_body error_body; + struct icmp6_echo echo; + struct ndp_rs ndp_rs; + struct ndp_ra ndp_ra; + struct ndp_ns ndp_ns; + struct ndp_na ndp_na; + struct ndp_redirect ndp_redirect; + } icmp6_body; +#define icmp6_err icmp6_body.error_body +#define icmp6_echo icmp6_body.echo +#define icmp6_nrs icmp6_body.ndp_rs +#define icmp6_nra icmp6_body.ndp_ra +#define icmp6_nns icmp6_body.ndp_ns +#define icmp6_nna icmp6_body.ndp_na +#define icmp6_redirect icmp6_body.ndp_redirect +}; + +G_STATIC_ASSERT(sizeof(struct icmp6) == 40); + +#define ICMP6_MINLEN 4 +#define ICMP6_ERROR_MINLEN 8 +#define ICMP6_ECHO_MINLEN 8 +#define ICMP6_NDP_RS_MINLEN 8 +#define ICMP6_NDP_RA_MINLEN 16 +#define ICMP6_NDP_NS_MINLEN 24 +#define ICMP6_NDP_NA_MINLEN 24 +#define ICMP6_NDP_REDIRECT_MINLEN 40 + +/* + * NDP Options + */ +struct ndpopt { + uint8_t ndpopt_type; /* Option type */ + uint8_t ndpopt_len; /* /!\ In units of 8 octets */ + union { + unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ +#define ndpopt_linklayer ndpopt_body.linklayer_addr + struct prefixinfo { /* Prefix Information */ + uint8_t prefix_length; +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint8_t L:1, A:1, reserved1:6; +#else + uint8_t reserved1:6, A:1, L:1; +#endif + uint32_t valid_lt; /* Valid Lifetime */ + uint32_t pref_lt; /* Preferred Lifetime */ + uint32_t reserved2; + struct in6_addr prefix; + } SLIRP_PACKED prefixinfo; +#define ndpopt_prefixinfo ndpopt_body.prefixinfo + struct rdnss { + uint16_t reserved; + uint32_t lifetime; + struct in6_addr addr; + } SLIRP_PACKED rdnss; +#define ndpopt_rdnss ndpopt_body.rdnss + } ndpopt_body; +} SLIRP_PACKED; + +/* NDP options type */ +#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ +#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ +#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ +#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ + +/* NDP options size, in octets. */ +#define NDPOPT_LINKLAYER_LEN 8 +#define NDPOPT_PREFIXINFO_LEN 32 +#define NDPOPT_RDNSS_LEN 24 + +/* + * Definition of type and code field values. + * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml + * Last Updated 2012-11-12 + */ + +/* Errors */ +#define ICMP6_UNREACH 1 /* Destination Unreachable */ +#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ +#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ +#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ +#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ +#define ICMP6_UNREACH_PORT 4 /* port unreachable */ +#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ +#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ +#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ +#define ICMP6_TOOBIG 2 /* Packet Too Big */ +#define ICMP6_TIMXCEED 3 /* Time Exceeded */ +#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ +#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ +#define ICMP6_PARAMPROB 4 /* Parameter Problem */ +#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ +#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ +#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ + +/* Informational Messages */ +#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ +#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ +#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ +#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ +#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ +#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ +#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ + +/* + * Router Configuration Variables (rfc4861#section-6) + */ +#define NDP_IsRouter 1 +#define NDP_AdvSendAdvertisements 1 +#define NDP_MaxRtrAdvInterval 600000 +#define NDP_MinRtrAdvInterval ((NDP_MaxRtrAdvInterval >= 9) ? \ + NDP_MaxRtrAdvInterval / 3 : \ + NDP_MaxRtrAdvInterval) +#define NDP_AdvManagedFlag 0 +#define NDP_AdvOtherConfigFlag 0 +#define NDP_AdvLinkMTU 0 +#define NDP_AdvReachableTime 0 +#define NDP_AdvRetransTime 0 +#define NDP_AdvCurHopLimit 64 +#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) +#define NDP_AdvValidLifetime 86400 +#define NDP_AdvOnLinkFlag 1 +#define NDP_AdvPrefLifetime 14400 +#define NDP_AdvAutonomousFlag 1 + +void icmp6_init(Slirp *slirp); +void icmp6_cleanup(Slirp *slirp); +void icmp6_input(struct mbuf *); +void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); +void ndp_send_ra(Slirp *slirp); +void ndp_send_ns(Slirp *slirp, struct in6_addr addr); + +#endif diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c new file mode 100644 index 0000000000..1b8c003c66 --- /dev/null +++ b/slirp/src/ip6_input.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. + */ + +#include "slirp.h" +#include "ip6_icmp.h" + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented in kernel go to raw IP protocol handler. + */ +void ip6_init(Slirp *slirp) +{ + icmp6_init(slirp); +} + +void ip6_cleanup(Slirp *slirp) +{ + icmp6_cleanup(slirp); +} + +void ip6_input(struct mbuf *m) +{ + struct ip6 *ip6; + Slirp *slirp = m->slirp; + + if (!slirp->in6_enabled) { + goto bad; + } + + DEBUG_CALL("ip6_input"); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("m_len = %d", m->m_len); + + if (m->m_len < sizeof(struct ip6)) { + goto bad; + } + + ip6 = mtod(m, struct ip6 *); + + if (ip6->ip_v != IP6VERSION) { + goto bad; + } + + if (ntohs(ip6->ip_pl) > IF_MTU) { + icmp6_send_error(m, ICMP6_TOOBIG, 0); + goto bad; + } + + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); + goto bad; + } + + /* + * Switch out to protocol's input routine. + */ + switch (ip6->ip_nh) { + case IPPROTO_TCP: + NTOHS(ip6->ip_pl); + tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); + break; + case IPPROTO_UDP: + udp6_input(m); + break; + case IPPROTO_ICMPV6: + icmp6_input(m); + break; + default: + m_free(m); + } + return; +bad: + m_free(m); +} diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c new file mode 100644 index 0000000000..19d1ae7748 --- /dev/null +++ b/slirp/src/ip6_output.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. + */ + +#include "slirp.h" + +/* Number of packets queued before we start sending + * (to prevent allocing too many mbufs) */ +#define IF6_THRESH 10 + +/* + * IPv6 output. The packet in mbuf chain m contains a IP header + */ +int ip6_output(struct socket *so, struct mbuf *m, int fast) +{ + struct ip6 *ip = mtod(m, struct ip6 *); + + DEBUG_CALL("ip6_output"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); + + /* Fill IPv6 header */ + ip->ip_v = IP6VERSION; + ip->ip_hl = IP6_HOP_LIMIT; + ip->ip_tc_hi = 0; + ip->ip_tc_lo = 0; + ip->ip_fl_hi = 0; + ip->ip_fl_lo = 0; + + if (fast) { + if_encap(m->slirp, m); + } else { + if_output(so, m); + } + + return 0; +} diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c new file mode 100644 index 0000000000..120108f582 --- /dev/null +++ b/slirp/src/ip_icmp.c @@ -0,0 +1,469 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp + */ + +#include "slirp.h" +#include "ip_icmp.h" + +#ifndef WITH_ICMP_ERROR_MSG +#define WITH_ICMP_ERROR_MSG 0 +#endif + +/* The message sent when emulating PING */ +/* Be nice and tell them it's just a pseudo-ping packet */ +static const char icmp_ping_msg[] = "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST packets.\n"; + +/* list of actions for icmp_send_error() on RX of an icmp message */ +static const int icmp_flush[19] = { +/* ECHO REPLY (0) */ 0, + 1, + 1, +/* DEST UNREACH (3) */ 1, +/* SOURCE QUENCH (4)*/ 1, +/* REDIRECT (5) */ 1, + 1, + 1, +/* ECHO (8) */ 0, +/* ROUTERADVERT (9) */ 1, +/* ROUTERSOLICIT (10) */ 1, +/* TIME EXCEEDED (11) */ 1, +/* PARAMETER PROBLEM (12) */ 1, +/* TIMESTAMP (13) */ 0, +/* TIMESTAMP REPLY (14) */ 0, +/* INFO (15) */ 0, +/* INFO REPLY (16) */ 0, +/* ADDR MASK (17) */ 0, +/* ADDR MASK REPLY (18) */ 0 +}; + +void icmp_init(Slirp *slirp) +{ + slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; + slirp->icmp_last_so = &slirp->icmp; +} + +void icmp_cleanup(Slirp *slirp) +{ + while (slirp->icmp.so_next != &slirp->icmp) { + icmp_detach(slirp->icmp.so_next); + } +} + +static int icmp_send(struct socket *so, struct mbuf *m, int hlen) +{ + struct ip *ip = mtod(m, struct ip *); + struct sockaddr_in addr; + + so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (so->s == -1) { + return -1; + } + + so->so_m = m; + so->so_faddr = ip->ip_dst; + so->so_laddr = ip->ip_src; + so->so_iptos = ip->ip_tos; + so->so_type = IPPROTO_ICMP; + so->so_state = SS_ISFCONNECTED; + so->so_expire = curtime + SO_EXPIRE; + + addr.sin_family = AF_INET; + addr.sin_addr = so->so_faddr; + + insque(so, &so->slirp->icmp); + + if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, + (struct sockaddr *)&addr, sizeof(addr)) == -1) { + DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", + errno, strerror(errno)); + icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); + icmp_detach(so); + } + + return 0; +} + +void icmp_detach(struct socket *so) +{ + so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); + closesocket(so->s); + sofree(so); +} + +/* + * Process a received ICMP message. + */ +void +icmp_input(struct mbuf *m, int hlen) +{ + register struct icmp *icp; + register struct ip *ip=mtod(m, struct ip *); + int icmplen=ip->ip_len; + Slirp *slirp = m->slirp; + + DEBUG_CALL("icmp_input"); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("m_len = %d", m->m_len); + + /* + * Locate icmp structure in mbuf, and check + * that its not corrupted and of at least minimum length. + */ + if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ + freeit: + m_free(m); + goto end_error; + } + + m->m_len -= hlen; + m->m_data += hlen; + icp = mtod(m, struct icmp *); + if (cksum(m, icmplen)) { + goto freeit; + } + m->m_len += hlen; + m->m_data -= hlen; + + DEBUG_ARG("icmp_type = %d", icp->icmp_type); + switch (icp->icmp_type) { + case ICMP_ECHO: + ip->ip_len += hlen; /* since ip_input subtracts this */ + if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || + ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { + icmp_reflect(m); + } else if (slirp->restricted) { + goto freeit; + } else { + struct socket *so; + struct sockaddr_storage addr; + so = socreate(slirp); + if (icmp_send(so, m, hlen) == 0) { + return; + } + if (udp_attach(so, AF_INET) == -1) { + DEBUG_MISC("icmp_input udp_attach errno = %d-%s", + errno,strerror(errno)); + sofree(so); + m_free(m); + goto end_error; + } + so->so_m = m; + so->so_ffamily = AF_INET; + so->so_faddr = ip->ip_dst; + so->so_fport = htons(7); + so->so_lfamily = AF_INET; + so->so_laddr = ip->ip_src; + so->so_lport = htons(9); + so->so_iptos = ip->ip_tos; + so->so_type = IPPROTO_ICMP; + so->so_state = SS_ISFCONNECTED; + + /* Send the packet */ + addr = so->fhost.ss; + sotranslate_out(so, &addr); + + if(sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, + (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { + DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", + errno,strerror(errno)); + icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); + udp_detach(so); + } + } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ + break; + case ICMP_UNREACH: + /* XXX? report error? close socket? */ + case ICMP_TIMXCEED: + case ICMP_PARAMPROB: + case ICMP_SOURCEQUENCH: + case ICMP_TSTAMP: + case ICMP_MASKREQ: + case ICMP_REDIRECT: + m_free(m); + break; + + default: + m_free(m); + } /* swith */ + +end_error: + /* m is m_free()'d xor put in a socket xor or given to ip_send */ + return; +} + + +/* + * Send an ICMP message in response to a situation + * + * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). + * MUST NOT change this header information. + * MUST NOT reply to a multicast/broadcast IP address. + * MUST NOT reply to a multicast/broadcast MAC address. + * MUST reply to only the first fragment. + */ +/* + * Send ICMP_UNREACH back to the source regarding msrc. + * mbuf *msrc is used as a template, but is NOT m_free()'d. + * It is reported as the bad ip packet. The header should + * be fully correct and in host byte order. + * ICMP fragmentation is illegal. All machines must accept 576 bytes in one + * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 + */ + +#define ICMP_MAXDATALEN (IP_MSS-28) +void +icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, + const char *message) +{ + unsigned hlen, shlen, s_ip_len; + register struct ip *ip; + register struct icmp *icp; + register struct mbuf *m; + + DEBUG_CALL("icmp_send_error"); + DEBUG_ARG("msrc = %p", msrc); + DEBUG_ARG("msrc_len = %d", msrc->m_len); + + if(type!=ICMP_UNREACH && type!=ICMP_TIMXCEED) goto end_error; + + /* check msrc */ + if(!msrc) goto end_error; + ip = mtod(msrc, struct ip *); + if (slirp_debug & DBG_MISC) { + char bufa[20], bufb[20]; + strcpy(bufa, inet_ntoa(ip->ip_src)); + strcpy(bufb, inet_ntoa(ip->ip_dst)); + DEBUG_MISC(" %.16s to %.16s", bufa, bufb); + } + if(ip->ip_off & IP_OFFMASK) goto end_error; /* Only reply to fragment 0 */ + + /* Do not reply to source-only IPs */ + if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { + goto end_error; + } + + shlen=ip->ip_hl << 2; + s_ip_len=ip->ip_len; + if(ip->ip_p == IPPROTO_ICMP) { + icp = (struct icmp *)((char *)ip + shlen); + /* + * Assume any unknown ICMP type is an error. This isn't + * specified by the RFC, but think about it.. + */ + if(icp->icmp_type>18 || icmp_flush[icp->icmp_type]) goto end_error; + } + + /* make a copy */ + m = m_get(msrc->slirp); + if (!m) { + goto end_error; + } + + { int new_m_size; + new_m_size=sizeof(struct ip )+ICMP_MINLEN+msrc->m_len+ICMP_MAXDATALEN; + if(new_m_size>m->m_size) m_inc(m, new_m_size); + } + memcpy(m->m_data, msrc->m_data, msrc->m_len); + m->m_len = msrc->m_len; /* copy msrc to m */ + + /* make the header of the reply packet */ + ip = mtod(m, struct ip *); + hlen= sizeof(struct ip ); /* no options in reply */ + + /* fill in icmp */ + m->m_data += hlen; + m->m_len -= hlen; + + icp = mtod(m, struct icmp *); + + if(minsize) s_ip_len=shlen+ICMP_MINLEN; /* return header+8b only */ + else if(s_ip_len>ICMP_MAXDATALEN) /* maximum size */ + s_ip_len=ICMP_MAXDATALEN; + + m->m_len=ICMP_MINLEN+s_ip_len; /* 8 bytes ICMP header */ + + /* min. size = 8+sizeof(struct ip)+8 */ + + icp->icmp_type = type; + icp->icmp_code = code; + icp->icmp_id = 0; + icp->icmp_seq = 0; + + memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ + HTONS(icp->icmp_ip.ip_len); + HTONS(icp->icmp_ip.ip_id); + HTONS(icp->icmp_ip.ip_off); + + if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ + int message_len; + char *cpnt; + message_len=strlen(message); + if(message_len>ICMP_MAXDATALEN) message_len=ICMP_MAXDATALEN; + cpnt=(char *)m->m_data+m->m_len; + memcpy(cpnt, message, message_len); + m->m_len+=message_len; + } + + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, m->m_len); + + m->m_data -= hlen; + m->m_len += hlen; + + /* fill in ip */ + ip->ip_hl = hlen >> 2; + ip->ip_len = m->m_len; + + ip->ip_tos=((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ + + ip->ip_ttl = MAXTTL; + ip->ip_p = IPPROTO_ICMP; + ip->ip_dst = ip->ip_src; /* ip addresses */ + ip->ip_src = m->slirp->vhost_addr; + + (void ) ip_output((struct socket *)NULL, m); + +end_error: + return; +} +#undef ICMP_MAXDATALEN + +/* + * Reflect the ip packet back to the source + */ +void +icmp_reflect(struct mbuf *m) +{ + register struct ip *ip = mtod(m, struct ip *); + int hlen = ip->ip_hl << 2; + int optlen = hlen - sizeof(struct ip ); + register struct icmp *icp; + + /* + * Send an icmp packet back to the ip level, + * after supplying a checksum. + */ + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + + icp->icmp_type = ICMP_ECHOREPLY; + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, ip->ip_len - hlen); + + m->m_data -= hlen; + m->m_len += hlen; + + /* fill in ip */ + if (optlen > 0) { + /* + * Strip out original options by copying rest of first + * mbuf's data back, and adjust the IP length. + */ + memmove((char *)(ip + 1), (char *)ip + hlen, + (unsigned )(m->m_len - hlen)); + hlen -= optlen; + ip->ip_hl = hlen >> 2; + ip->ip_len -= optlen; + m->m_len -= optlen; + } + + ip->ip_ttl = MAXTTL; + { /* swap */ + struct in_addr icmp_dst; + icmp_dst = ip->ip_dst; + ip->ip_dst = ip->ip_src; + ip->ip_src = icmp_dst; + } + + (void ) ip_output((struct socket *)NULL, m); +} + +void icmp_receive(struct socket *so) +{ + struct mbuf *m = so->so_m; + struct ip *ip = mtod(m, struct ip *); + int hlen = ip->ip_hl << 2; + uint8_t error_code; + struct icmp *icp; + int id, len; + + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + + id = icp->icmp_id; + len = recv(so->s, icp, M_ROOM(m), 0); + /* + * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent + * between host OSes. On Linux, only the ICMP header and payload is + * included. On macOS/Darwin, the socket acts like a raw socket and + * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP + * sockets aren't supported at all, so we treat them like raw sockets. It + * isn't possible to detect this difference at runtime, so we must use an + * #ifdef to determine if we need to remove the IP header. + */ +#ifdef CONFIG_BSD + if (len >= sizeof(struct ip)) { + struct ip *inner_ip = mtod(m, struct ip *); + int inner_hlen = inner_ip->ip_hl << 2; + if (inner_hlen > len) { + len = -1; + errno = -EINVAL; + } else { + len -= inner_hlen; + memmove(icp, (unsigned char *)icp + inner_hlen, len); + } + } else { + len = -1; + errno = -EINVAL; + } +#endif + icp->icmp_id = id; + + m->m_data -= hlen; + m->m_len += hlen; + + if (len == -1 || len == 0) { + if (errno == ENETUNREACH) { + error_code = ICMP_UNREACH_NET; + } else { + error_code = ICMP_UNREACH_HOST; + } + DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, + strerror(errno)); + icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); + } else { + icmp_reflect(so->so_m); + so->so_m = NULL; /* Don't m_free() it again! */ + } + icmp_detach(so); +} diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h new file mode 100644 index 0000000000..a4e5b8b265 --- /dev/null +++ b/slirp/src/ip_icmp.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 + * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp + */ + +#ifndef NETINET_IP_ICMP_H +#define NETINET_IP_ICMP_H + +/* + * Interface Control Message Protocol Definitions. + * Per RFC 792, September 1981. + */ + +typedef uint32_t n_time; + +/* + * Structure of an icmp header. + */ +struct icmp { + uint8_t icmp_type; /* type of message, see below */ + uint8_t icmp_code; /* type sub code */ + uint16_t icmp_cksum; /* ones complement cksum of struct */ + union { + uint8_t ih_pptr; /* ICMP_PARAMPROB */ + struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ + struct ih_idseq { + uint16_t icd_id; + uint16_t icd_seq; + } ih_idseq; + int ih_void; + + /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ + struct ih_pmtu { + uint16_t ipm_void; + uint16_t ipm_nextmtu; + } ih_pmtu; + } icmp_hun; +#define icmp_pptr icmp_hun.ih_pptr +#define icmp_gwaddr icmp_hun.ih_gwaddr +#define icmp_id icmp_hun.ih_idseq.icd_id +#define icmp_seq icmp_hun.ih_idseq.icd_seq +#define icmp_void icmp_hun.ih_void +#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void +#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu + union { + struct id_ts { + n_time its_otime; + n_time its_rtime; + n_time its_ttime; + } id_ts; + struct id_ip { + struct ip idi_ip; + /* options and then 64 bits of data */ + } id_ip; + uint32_t id_mask; + char id_data[1]; + } icmp_dun; +#define icmp_otime icmp_dun.id_ts.its_otime +#define icmp_rtime icmp_dun.id_ts.its_rtime +#define icmp_ttime icmp_dun.id_ts.its_ttime +#define icmp_ip icmp_dun.id_ip.idi_ip +#define icmp_mask icmp_dun.id_mask +#define icmp_data icmp_dun.id_data +}; + +/* + * Lower bounds on packet lengths for various types. + * For the error advice packets must first ensure that the + * packet is large enough to contain the returned ip header. + * Only then can we do the check to see if 64 bits of packet + * data have been returned, since we need to check the returned + * ip header length. + */ +#define ICMP_MINLEN 8 /* abs minimum */ +#define ICMP_TSLEN (8 + 3 * sizeof (n_time)) /* timestamp */ +#define ICMP_MASKLEN 12 /* address mask */ +#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */ +#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) + /* N.B.: must separately check that ip_hl >= 5 */ + +/* + * Definition of type and code field values. + */ +#define ICMP_ECHOREPLY 0 /* echo reply */ +#define ICMP_UNREACH 3 /* dest unreachable, codes: */ +#define ICMP_UNREACH_NET 0 /* bad net */ +#define ICMP_UNREACH_HOST 1 /* bad host */ +#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ +#define ICMP_UNREACH_PORT 3 /* bad port */ +#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ +#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ +#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ +#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ +#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ +#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ +#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ +#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ +#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ +#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ +#define ICMP_REDIRECT 5 /* shorter route, codes: */ +#define ICMP_REDIRECT_NET 0 /* for network */ +#define ICMP_REDIRECT_HOST 1 /* for host */ +#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ +#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define ICMP_ECHO 8 /* echo service */ +#define ICMP_ROUTERADVERT 9 /* router advertisement */ +#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ +#define ICMP_TIMXCEED 11 /* time exceeded, code: */ +#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ +#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ +#define ICMP_PARAMPROB 12 /* ip header bad */ +#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ +#define ICMP_TSTAMP 13 /* timestamp request */ +#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ +#define ICMP_IREQ 15 /* information request */ +#define ICMP_IREQREPLY 16 /* information reply */ +#define ICMP_MASKREQ 17 /* address mask request */ +#define ICMP_MASKREPLY 18 /* address mask reply */ + +#define ICMP_MAXTYPE 18 + +#define ICMP_INFOTYPE(type) \ + ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ + (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ + (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ + (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ + (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) + +void icmp_init(Slirp *slirp); +void icmp_cleanup(Slirp *slirp); +void icmp_input(struct mbuf *, int); +void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, + const char *message); +void icmp_reflect(struct mbuf *); +void icmp_receive(struct socket *so); +void icmp_detach(struct socket *so); + +#endif diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c new file mode 100644 index 0000000000..e0b94b0e42 --- /dev/null +++ b/slirp/src/ip_input.c @@ -0,0 +1,471 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" +#include "ip_icmp.h" + +static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); +static void ip_freef(Slirp *slirp, struct ipq *fp); +static void ip_enq(register struct ipasfrag *p, + register struct ipasfrag *prev); +static void ip_deq(register struct ipasfrag *p); + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented in kernel go to raw IP protocol handler. + */ +void +ip_init(Slirp *slirp) +{ + slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; + udp_init(slirp); + tcp_init(slirp); + icmp_init(slirp); +} + +void ip_cleanup(Slirp *slirp) +{ + udp_cleanup(slirp); + tcp_cleanup(slirp); + icmp_cleanup(slirp); +} + +/* + * Ip input routine. Checksum and byte swap header. If fragmented + * try to reassemble. Process options. Pass to next level. + */ +void +ip_input(struct mbuf *m) +{ + Slirp *slirp = m->slirp; + register struct ip *ip; + int hlen; + + if (!slirp->in_enabled) { + goto bad; + } + + DEBUG_CALL("ip_input"); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("m_len = %d", m->m_len); + + if (m->m_len < sizeof (struct ip)) { + goto bad; + } + + ip = mtod(m, struct ip *); + + if (ip->ip_v != IPVERSION) { + goto bad; + } + + hlen = ip->ip_hl << 2; + if (hlen<sizeof(struct ip ) || hlen>m->m_len) {/* min header length */ + goto bad; /* or packet too short */ + } + + /* keep ip header intact for ICMP reply + * ip->ip_sum = cksum(m, hlen); + * if (ip->ip_sum) { + */ + if(cksum(m,hlen)) { + goto bad; + } + + /* + * Convert fields to host representation. + */ + NTOHS(ip->ip_len); + if (ip->ip_len < hlen) { + goto bad; + } + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + /* + * Check that the amount of data in the buffers + * is as at least much as the IP header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (m->m_len < ip->ip_len) { + goto bad; + } + + /* Should drop packet if mbuf too long? hmmm... */ + if (m->m_len > ip->ip_len) + m_adj(m, ip->ip_len - m->m_len); + + /* check ip_ttl for a correct ICMP reply */ + if (ip->ip_ttl == 0) { + icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); + goto bad; + } + + /* + * If offset or IP_MF are set, must reassemble. + * Otherwise, nothing need be done. + * (We could look in the reassembly queue to see + * if the packet was previously fragmented, + * but it's not worth the time; just let them time out.) + * + * XXX This should fail, don't fragment yet + */ + if (ip->ip_off &~ IP_DF) { + register struct ipq *fp; + struct qlink *l; + /* + * Look for queue of fragments + * of this datagram. + */ + for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; + l = l->next) { + fp = container_of(l, struct ipq, ip_link); + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) + goto found; + } + fp = NULL; + found: + + /* + * Adjust ip_len to not reflect header, + * set ip_mff if more fragments are expected, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + if (ip->ip_off & IP_MF) + ip->ip_tos |= 1; + else + ip->ip_tos &= ~1; + + ip->ip_off <<= 3; + + /* + * If datagram marked as having more fragments + * or if this is not the first fragment, + * attempt reassembly; if it succeeds, proceed. + */ + if (ip->ip_tos & 1 || ip->ip_off) { + ip = ip_reass(slirp, ip, fp); + if (ip == NULL) + return; + m = dtom(slirp, ip); + } else + if (fp) + ip_freef(slirp, fp); + + } else + ip->ip_len -= hlen; + + /* + * Switch out to protocol's input routine. + */ + switch (ip->ip_p) { + case IPPROTO_TCP: + tcp_input(m, hlen, (struct socket *)NULL, AF_INET); + break; + case IPPROTO_UDP: + udp_input(m, hlen); + break; + case IPPROTO_ICMP: + icmp_input(m, hlen); + break; + default: + m_free(m); + } + return; +bad: + m_free(m); +} + +#define iptofrag(P) ((struct ipasfrag *)(((char*)(P)) - sizeof(struct qlink))) +#define fragtoip(P) ((struct ip*)(((char*)(P)) + sizeof(struct qlink))) +/* + * Take incoming datagram fragment and try to + * reassemble it into whole datagram. If a chain for + * reassembly of this datagram already exists, then it + * is given as fp; otherwise have to make a chain. + */ +static struct ip * +ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) +{ + register struct mbuf *m = dtom(slirp, ip); + register struct ipasfrag *q; + int hlen = ip->ip_hl << 2; + int i, next; + + DEBUG_CALL("ip_reass"); + DEBUG_ARG("ip = %p", ip); + DEBUG_ARG("fp = %p", fp); + DEBUG_ARG("m = %p", m); + + /* + * Presence of header sizes in mbufs + * would confuse code below. + * Fragment m_data is concatenated. + */ + m->m_data += hlen; + m->m_len -= hlen; + + /* + * If first fragment to arrive, create a reassembly queue. + */ + if (fp == NULL) { + struct mbuf *t = m_get(slirp); + + if (t == NULL) { + goto dropfrag; + } + fp = mtod(t, struct ipq *); + insque(&fp->ip_link, &slirp->ipq.ip_link); + fp->ipq_ttl = IPFRAGTTL; + fp->ipq_p = ip->ip_p; + fp->ipq_id = ip->ip_id; + fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; + fp->ipq_src = ip->ip_src; + fp->ipq_dst = ip->ip_dst; + q = (struct ipasfrag *)fp; + goto insert; + } + + /* + * Find a segment which begins after this one does. + */ + for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; + q = q->ipf_next) + if (q->ipf_off > ip->ip_off) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (q->ipf_prev != &fp->frag_link) { + struct ipasfrag *pq = q->ipf_prev; + i = pq->ipf_off + pq->ipf_len - ip->ip_off; + if (i > 0) { + if (i >= ip->ip_len) + goto dropfrag; + m_adj(dtom(slirp, ip), i); + ip->ip_off += i; + ip->ip_len -= i; + } + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q != (struct ipasfrag*)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; + q->ipf_off += i; + m_adj(dtom(slirp, q), i); + break; + } + q = q->ipf_next; + m_free(dtom(slirp, q->ipf_prev)); + ip_deq(q->ipf_prev); + } + +insert: + /* + * Stick new segment in its place; + * check for complete reassembly. + */ + ip_enq(iptofrag(ip), q->ipf_prev); + next = 0; + for (q = fp->frag_link.next; q != (struct ipasfrag*)&fp->frag_link; + q = q->ipf_next) { + if (q->ipf_off != next) + return NULL; + next += q->ipf_len; + } + if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) + return NULL; + + /* + * Reassembly is complete; concatenate fragments. + */ + q = fp->frag_link.next; + m = dtom(slirp, q); + + q = (struct ipasfrag *) q->ipf_next; + while (q != (struct ipasfrag*)&fp->frag_link) { + struct mbuf *t = dtom(slirp, q); + q = (struct ipasfrag *) q->ipf_next; + m_cat(m, t); + } + + /* + * Create header for new ip packet by + * modifying header of first packet; + * dequeue and discard fragment reassembly header. + * Make header visible. + */ + q = fp->frag_link.next; + + /* + * If the fragments concatenated to an mbuf that's + * bigger than the total size of the fragment, then and + * m_ext buffer was alloced. But fp->ipq_next points to + * the old buffer (in the mbuf), so we must point ip + * into the new buffer. + */ + if (m->m_flags & M_EXT) { + int delta = (char *)q - m->m_dat; + q = (struct ipasfrag *)(m->m_ext + delta); + } + + ip = fragtoip(q); + ip->ip_len = next; + ip->ip_tos &= ~1; + ip->ip_src = fp->ipq_src; + ip->ip_dst = fp->ipq_dst; + remque(&fp->ip_link); + (void) m_free(dtom(slirp, fp)); + m->m_len += (ip->ip_hl << 2); + m->m_data -= (ip->ip_hl << 2); + + return ip; + +dropfrag: + m_free(m); + return NULL; +} + +/* + * Free a fragment reassembly header and all + * associated datagrams. + */ +static void +ip_freef(Slirp *slirp, struct ipq *fp) +{ + register struct ipasfrag *q, *p; + + for (q = fp->frag_link.next; q != (struct ipasfrag*)&fp->frag_link; q = p) { + p = q->ipf_next; + ip_deq(q); + m_free(dtom(slirp, q)); + } + remque(&fp->ip_link); + (void) m_free(dtom(slirp, fp)); +} + +/* + * Put an ip fragment on a reassembly chain. + * Like insque, but pointers in middle of structure. + */ +static void +ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) +{ + DEBUG_CALL("ip_enq"); + DEBUG_ARG("prev = %p", prev); + p->ipf_prev = prev; + p->ipf_next = prev->ipf_next; + ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; + prev->ipf_next = p; +} + +/* + * To ip_enq as remque is to insque. + */ +static void +ip_deq(register struct ipasfrag *p) +{ + ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; + ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; +} + +/* + * IP timer processing; + * if a timer expires on a reassembly + * queue, discard it. + */ +void +ip_slowtimo(Slirp *slirp) +{ + struct qlink *l; + + DEBUG_CALL("ip_slowtimo"); + + l = slirp->ipq.ip_link.next; + + if (l == NULL) + return; + + while (l != &slirp->ipq.ip_link) { + struct ipq *fp = container_of(l, struct ipq, ip_link); + l = l->next; + if (--fp->ipq_ttl == 0) { + ip_freef(slirp, fp); + } + } +} + +/* + * Strip out IP options, at higher + * level protocol in the kernel. + * Second argument is buffer to which options + * will be moved, and return value is their length. + * (XXX) should be deleted; last arg currently ignored. + */ +void +ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) +{ + register int i; + struct ip *ip = mtod(m, struct ip *); + register char *opts; + int olen; + + olen = (ip->ip_hl<<2) - sizeof (struct ip); + opts = (char *)(ip + 1); + i = m->m_len - (sizeof (struct ip) + olen); + memcpy(opts, opts + olen, (unsigned)i); + m->m_len -= olen; + + ip->ip_hl = sizeof(struct ip) >> 2; +} diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c new file mode 100644 index 0000000000..f6ec141df5 --- /dev/null +++ b/slirp/src/ip_output.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" + +/* Number of packets queued before we start sending + * (to prevent allocing too many mbufs) */ +#define IF_THRESH 10 + +/* + * IP output. The packet in mbuf chain m contains a skeletal IP + * header (with len, off, ttl, proto, tos, src, dst). + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + */ +int +ip_output(struct socket *so, struct mbuf *m0) +{ + Slirp *slirp = m0->slirp; + register struct ip *ip; + register struct mbuf *m = m0; + register int hlen = sizeof(struct ip ); + int len, off, error = 0; + + DEBUG_CALL("ip_output"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m0 = %p", m0); + + ip = mtod(m, struct ip *); + /* + * Fill in IP header. + */ + ip->ip_v = IPVERSION; + ip->ip_off &= IP_DF; + ip->ip_id = htons(slirp->ip_id++); + ip->ip_hl = hlen >> 2; + + /* + * If small enough for interface, can just send directly. + */ + if ((uint16_t)ip->ip_len <= IF_MTU) { + ip->ip_len = htons((uint16_t)ip->ip_len); + ip->ip_off = htons((uint16_t)ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); + + if_output(so, m); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & IP_DF) { + error = -1; + goto bad; + } + + len = (IF_MTU - hlen) &~ 7; /* ip databytes per packet */ + if (len < 8) { + error = -1; + goto bad; + } + + { + int mhlen, firstlen = len; + struct mbuf **mnext = &m->m_nextpkt; + + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto chain. + */ + m0 = m; + mhlen = sizeof (struct ip); + for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { + register struct ip *mhip; + m = m_get(slirp); + if (m == NULL) { + error = -1; + goto sendorfree; + } + m->m_data += IF_MAXLINKHDR; + mhip = mtod(m, struct ip *); + *mhip = *ip; + + m->m_len = mhlen; + mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); + if (ip->ip_off & IP_MF) + mhip->ip_off |= IP_MF; + if (off + len >= (uint16_t)ip->ip_len) + len = (uint16_t)ip->ip_len - off; + else + mhip->ip_off |= IP_MF; + mhip->ip_len = htons((uint16_t)(len + mhlen)); + + if (m_copy(m, m0, off, len) < 0) { + error = -1; + goto sendorfree; + } + + mhip->ip_off = htons((uint16_t)mhip->ip_off); + mhip->ip_sum = 0; + mhip->ip_sum = cksum(m, mhlen); + *mnext = m; + mnext = &m->m_nextpkt; + } + /* + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). + */ + m = m0; + m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); + ip->ip_len = htons((uint16_t)m->m_len); + ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); +sendorfree: + for (m = m0; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = NULL; + if (error == 0) + if_output(so, m); + else + m_free(m); + } + } + +done: + return (error); + +bad: + m_free(m0); + goto done; +} diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h new file mode 100644 index 0000000000..2d13950065 --- /dev/null +++ b/slirp/src/libslirp.h @@ -0,0 +1,117 @@ +#ifndef LIBSLIRP_H +#define LIBSLIRP_H + +#include <stdint.h> +#include <stdbool.h> +#include <sys/types.h> + +#ifdef _WIN32 +#include <winsock2.h> +#include <in6addr.h> +#else +#include <netinet/in.h> +#include <arpa/inet.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct Slirp Slirp; + +enum { + SLIRP_POLL_IN = 1 << 0, + SLIRP_POLL_OUT = 1 << 1, + SLIRP_POLL_PRI = 1 << 2, + SLIRP_POLL_ERR = 1 << 3, + SLIRP_POLL_HUP = 1 << 4, +}; + +typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); +typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); +typedef void (*SlirpTimerCb)(void *opaque); +typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); +typedef int (*SlirpGetREventsCb)(int idx, void *opaque); + +/* + * Callbacks from slirp + */ +typedef struct SlirpCb { + /* + * Send an ethernet frame to the guest network. The opaque + * parameter is the one given to slirp_init(). The function + * doesn't need to send all the data and may return <len (no + * buffering is done on libslirp side, so the data will be dropped + * in this case). <0 reports an IO error. + */ + SlirpWriteCb send_packet; + /* Print a message for an error due to guest misbehavior. */ + void (*guest_error)(const char *msg, void *opaque); + /* Return the virtual clock value in nanoseconds */ + int64_t (*clock_get_ns)(void *opaque); + /* Create a new timer with the given callback and opaque data */ + void *(*timer_new)(SlirpTimerCb cb, void *cb_opaque, void *opaque); + /* Remove and free a timer */ + void (*timer_free)(void *timer, void *opaque); + /* Modify a timer to expire at @expire_time */ + void (*timer_mod)(void *timer, int64_t expire_time, void *opaque); + /* Register a fd for future polling */ + void (*register_poll_fd)(int fd, void *opaque); + /* Unregister a fd */ + void (*unregister_poll_fd)(int fd, void *opaque); + /* Kick the io-thread, to signal that new events may be processed */ + void (*notify)(void *opaque); +} SlirpCb; + + +Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, + struct in_addr vnetmask, struct in_addr vhost, + bool in6_enabled, + struct in6_addr vprefix_addr6, uint8_t vprefix_len, + struct in6_addr vhost6, const char *vhostname, + const char *tftp_server_name, + const char *tftp_path, const char *bootfile, + struct in_addr vdhcp_start, struct in_addr vnameserver, + struct in6_addr vnameserver6, const char **vdnssearch, + const char *vdomainname, + const SlirpCb *callbacks, + void *opaque); +void slirp_cleanup(Slirp *slirp); + +void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, + SlirpAddPollCb add_poll, void *opaque); + +void slirp_pollfds_poll(Slirp *slirp, int select_error, + SlirpGetREventsCb get_revents, void *opaque); + +void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); + +int slirp_add_hostfwd(Slirp *slirp, int is_udp, + struct in_addr host_addr, int host_port, + struct in_addr guest_addr, int guest_port); +int slirp_remove_hostfwd(Slirp *slirp, int is_udp, + struct in_addr host_addr, int host_port); +int slirp_add_exec(Slirp *slirp, const char *cmdline, + struct in_addr *guest_addr, int guest_port); +int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, + struct in_addr *guest_addr, int guest_port); + +char *slirp_connection_info(Slirp *slirp); + +void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port, const uint8_t *buf, int size); +size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port); + +void slirp_state_save(Slirp *s, SlirpWriteCb write_cb, void *opaque); + +int slirp_state_load(Slirp *s, int version_id, + SlirpReadCb read_cb, void *opaque); + +int slirp_state_version(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* LIBSLIRP_H */ diff --git a/slirp/src/main.h b/slirp/src/main.h new file mode 100644 index 0000000000..f11d4572b7 --- /dev/null +++ b/slirp/src/main.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef SLIRP_MAIN_H +#define SLIRP_MAIN_H + +extern unsigned curtime; +extern struct in_addr loopback_addr; +extern unsigned long loopback_mask; + +int if_encap(Slirp *slirp, struct mbuf *ifm); +ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags); + +#endif diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c new file mode 100644 index 0000000000..521c02c967 --- /dev/null +++ b/slirp/src/mbuf.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 1995 Danny Gasparovski + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +/* + * mbuf's in SLiRP are much simpler than the real mbufs in + * FreeBSD. They are fixed size, determined by the MTU, + * so that one whole packet can fit. Mbuf's cannot be + * chained together. If there's more data than the mbuf + * could hold, an external g_malloced buffer is pointed to + * by m_ext (and the data pointers) and M_EXT is set in + * the flags + */ + +#include "slirp.h" + +#define MBUF_THRESH 30 + +/* + * Find a nice value for msize + */ +#define SLIRP_MSIZE\ + (offsetof(struct mbuf, m_dat) + IF_MAXLINKHDR + TCPIPHDR_DELTA + IF_MTU) + +void +m_init(Slirp *slirp) +{ + slirp->m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; + slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; +} + +void m_cleanup(Slirp *slirp) +{ + struct mbuf *m, *next; + + m = (struct mbuf *) slirp->m_usedlist.qh_link; + while ((struct quehead *) m != &slirp->m_usedlist) { + next = m->m_next; + if (m->m_flags & M_EXT) { + g_free(m->m_ext); + } + g_free(m); + m = next; + } + m = (struct mbuf *) slirp->m_freelist.qh_link; + while ((struct quehead *) m != &slirp->m_freelist) { + next = m->m_next; + g_free(m); + m = next; + } +} + +/* + * Get an mbuf from the free list, if there are none + * allocate one + * + * Because fragmentation can occur if we alloc new mbufs and + * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, + * which tells m_free to actually g_free() it + */ +struct mbuf * +m_get(Slirp *slirp) +{ + register struct mbuf *m; + int flags = 0; + + DEBUG_CALL("m_get"); + + if (slirp->m_freelist.qh_link == &slirp->m_freelist) { + m = g_malloc(SLIRP_MSIZE); + slirp->mbuf_alloced++; + if (slirp->mbuf_alloced > MBUF_THRESH) + flags = M_DOFREE; + m->slirp = slirp; + } else { + m = (struct mbuf *) slirp->m_freelist.qh_link; + remque(m); + } + + /* Insert it in the used list */ + insque(m,&slirp->m_usedlist); + m->m_flags = (flags | M_USEDLIST); + + /* Initialise it */ + m->m_size = SLIRP_MSIZE - offsetof(struct mbuf, m_dat); + m->m_data = m->m_dat; + m->m_len = 0; + m->m_nextpkt = NULL; + m->m_prevpkt = NULL; + m->resolution_requested = false; + m->expiration_date = (uint64_t)-1; + DEBUG_ARG("m = %p", m); + return m; +} + +void +m_free(struct mbuf *m) +{ + + DEBUG_CALL("m_free"); + DEBUG_ARG("m = %p", m); + + if(m) { + /* Remove from m_usedlist */ + if (m->m_flags & M_USEDLIST) + remque(m); + + /* If it's M_EXT, free() it */ + if (m->m_flags & M_EXT) { + g_free(m->m_ext); + } + /* + * Either free() it or put it on the free list + */ + if (m->m_flags & M_DOFREE) { + m->slirp->mbuf_alloced--; + g_free(m); + } else if ((m->m_flags & M_FREELIST) == 0) { + insque(m,&m->slirp->m_freelist); + m->m_flags = M_FREELIST; /* Clobber other flags */ + } + } /* if(m) */ +} + +/* + * Copy data from one mbuf to the end of + * the other.. if result is too big for one mbuf, allocate + * an M_EXT data segment + */ +void +m_cat(struct mbuf *m, struct mbuf *n) +{ + /* + * If there's no room, realloc + */ + if (M_FREEROOM(m) < n->m_len) + m_inc(m, m->m_len + n->m_len); + + memcpy(m->m_data+m->m_len, n->m_data, n->m_len); + m->m_len += n->m_len; + + m_free(n); +} + + +/* make m 'size' bytes large from m_data */ +void +m_inc(struct mbuf *m, int size) +{ + int gapsize; + + /* some compilers throw up on gotos. This one we can fake. */ + if (M_ROOM(m) > size) { + return; + } + + if (m->m_flags & M_EXT) { + gapsize = m->m_data - m->m_ext; + m->m_ext = g_realloc(m->m_ext, size + gapsize); + } else { + gapsize = m->m_data - m->m_dat; + m->m_ext = g_malloc(size + gapsize); + memcpy(m->m_ext, m->m_dat, m->m_size); + m->m_flags |= M_EXT; + } + + m->m_data = m->m_ext + gapsize; + m->m_size = size + gapsize; +} + + + +void +m_adj(struct mbuf *m, int len) +{ + if (m == NULL) + return; + if (len >= 0) { + /* Trim from head */ + m->m_data += len; + m->m_len -= len; + } else { + /* Trim from tail */ + len = -len; + m->m_len -= len; + } +} + + +/* + * Copy len bytes from m, starting off bytes into n + */ +int +m_copy(struct mbuf *n, struct mbuf *m, int off, int len) +{ + if (len > M_FREEROOM(n)) + return -1; + + memcpy((n->m_data + n->m_len), (m->m_data + off), len); + n->m_len += len; + return 0; +} + + +/* + * Given a pointer into an mbuf, return the mbuf + * XXX This is a kludge, I should eliminate the need for it + * Fortunately, it's not used often + */ +struct mbuf * +dtom(Slirp *slirp, void *dat) +{ + struct mbuf *m; + + DEBUG_CALL("dtom"); + DEBUG_ARG("dat = %p", dat); + + /* bug corrected for M_EXT buffers */ + for (m = (struct mbuf *) slirp->m_usedlist.qh_link; + (struct quehead *) m != &slirp->m_usedlist; + m = m->m_next) { + if (m->m_flags & M_EXT) { + if( (char *)dat>=m->m_ext && (char *)dat<(m->m_ext + m->m_size) ) + return m; + } else { + if( (char *)dat >= m->m_dat && (char *)dat<(m->m_dat + m->m_size) ) + return m; + } + } + + DEBUG_ERROR("dtom failed"); + + return (struct mbuf *)0; +} diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h new file mode 100644 index 0000000000..e2d443418a --- /dev/null +++ b/slirp/src/mbuf.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 + * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp + */ + +#ifndef MBUF_H +#define MBUF_H + +/* + * Macros for type conversion + * mtod(m,t) - convert mbuf pointer to data pointer of correct type + */ +#define mtod(m,t) ((t)(m)->m_data) + +/* XXX About mbufs for slirp: + * Only one mbuf is ever used in a chain, for each "cell" of data. + * m_nextpkt points to the next packet, if fragmented. + * If the data is too large, the M_EXT is used, and a larger block + * is alloced. Therefore, m_free[m] must check for M_EXT and if set + * free the m_ext. This is inefficient memory-wise, but who cares. + */ + +/* + * mbufs allow to have a gap between the start of the allocated buffer (m_ext if + * M_EXT is set, m_dat otherwise) and the in-use data: + * + * |--gapsize----->|---m_len-------> + * |----------m_size------------------------------> + * |----M_ROOM--------------------> + * |-M_FREEROOM--> + * + * ^ ^ ^ + * m_dat/m_ext m_data end of buffer + */ + +/* + * How much room is in the mbuf, from m_data to the end of the mbuf + */ +#define M_ROOM(m) ((m->m_flags & M_EXT)? \ + (((m)->m_ext + (m)->m_size) - (m)->m_data) \ + : \ + (((m)->m_dat + (m)->m_size) - (m)->m_data)) + +/* + * How much free room there is + */ +#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) + +struct mbuf { + /* XXX should union some of these! */ + /* header at beginning of each mbuf: */ + struct mbuf *m_next; /* Linked list of mbufs */ + struct mbuf *m_prev; + struct mbuf *m_nextpkt; /* Next packet in queue/record */ + struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ + int m_flags; /* Misc flags */ + + int m_size; /* Size of mbuf, from m_dat or m_ext */ + struct socket *m_so; + + char *m_data; /* Current location of data */ + int m_len; /* Amount of data in this mbuf, from m_data */ + + Slirp *slirp; + bool resolution_requested; + uint64_t expiration_date; + char *m_ext; + /* start of dynamic buffer area, must be last element */ + char m_dat[]; +}; + +#define ifq_prev m_prev +#define ifq_next m_next +#define ifs_prev m_prevpkt +#define ifs_next m_nextpkt +#define ifq_so m_so + +#define M_EXT 0x01 /* m_ext points to more (malloced) data */ +#define M_FREELIST 0x02 /* mbuf is on free list */ +#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ +#define M_DOFREE 0x08 /* when m_free is called on the mbuf, free() + * it rather than putting it on the free list */ + +void m_init(Slirp *); +void m_cleanup(Slirp *slirp); +struct mbuf * m_get(Slirp *); +void m_free(struct mbuf *); +void m_cat(register struct mbuf *, register struct mbuf *); +void m_inc(struct mbuf *, int); +void m_adj(struct mbuf *, int); +int m_copy(struct mbuf *, struct mbuf *, int, int); +struct mbuf * dtom(Slirp *, void *); + +static inline void ifs_init(struct mbuf *ifm) +{ + ifm->ifs_next = ifm->ifs_prev = ifm; +} + +#endif diff --git a/slirp/src/misc.c b/slirp/src/misc.c new file mode 100644 index 0000000000..937a418d4e --- /dev/null +++ b/slirp/src/misc.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" + +inline void +insque(void *a, void *b) +{ + register struct quehead *element = (struct quehead *) a; + register struct quehead *head = (struct quehead *) b; + element->qh_link = head->qh_link; + head->qh_link = (struct quehead *)element; + element->qh_rlink = (struct quehead *)head; + ((struct quehead *)(element->qh_link))->qh_rlink + = (struct quehead *)element; +} + +inline void +remque(void *a) +{ + register struct quehead *element = (struct quehead *) a; + ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; + ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; + element->qh_rlink = NULL; +} + +/* TODO: IPv6 */ +struct gfwd_list * +add_guestfwd(struct gfwd_list **ex_ptr, + SlirpWriteCb write_cb, void *opaque, + struct in_addr addr, int port) +{ + struct gfwd_list *f = g_new0(struct gfwd_list, 1); + + f->write_cb = write_cb; + f->opaque = opaque; + f->ex_fport = port; + f->ex_addr = addr; + f->ex_next = *ex_ptr; + *ex_ptr = f; + + return f; +} + +struct gfwd_list * +add_exec(struct gfwd_list **ex_ptr, const char *cmdline, + struct in_addr addr, int port) +{ + struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); + + f->ex_exec = g_strdup(cmdline); + + return f; +} + +static int +slirp_socketpair_with_oob(int sv[2]) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr.s_addr = INADDR_ANY, + }; + socklen_t addrlen = sizeof(addr); + int ret, s; + + sv[1] = -1; + s = slirp_socket(AF_INET, SOCK_STREAM, 0); + if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || + listen(s, 1) < 0 || + getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { + goto err; + } + + sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); + if (sv[1] < 0) { + goto err; + } + /* + * This connect won't block because we've already listen()ed on + * the server end (even though we won't accept() the connection + * until later on). + */ + do { + ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); + } while (ret < 0 && errno == EINTR); + if (ret < 0) { + goto err; + } + + do { + sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); + } while (sv[0] < 0 && errno == EINTR); + if (sv[0] < 0) { + goto err; + } + + closesocket(s); + return 0; + +err: + g_critical("slirp_socketpair(): %s", strerror(errno)); + if (s >= 0) { + closesocket(s); + } + if (sv[1] >= 0) { + closesocket(sv[1]); + } + return -1; +} + +static void +fork_exec_child_setup(gpointer data) +{ +#ifndef _WIN32 + setsid(); +#endif +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + +#if !GLIB_CHECK_VERSION(2, 58, 0) +typedef struct SlirpGSpawnFds { + GSpawnChildSetupFunc child_setup; + gpointer user_data; + gint stdin_fd; + gint stdout_fd; + gint stderr_fd; +} SlirpGSpawnFds; + +static inline void +slirp_gspawn_fds_setup(gpointer user_data) +{ + SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; + + dup2(q->stdin_fd, 0); + dup2(q->stdout_fd, 1); + dup2(q->stderr_fd, 2); + q->child_setup(q->user_data); +} +#endif + +static inline gboolean +g_spawn_async_with_fds_slirp(const gchar *working_directory, + gchar **argv, + gchar **envp, + GSpawnFlags flags, + GSpawnChildSetupFunc child_setup, + gpointer user_data, + GPid *child_pid, + gint stdin_fd, + gint stdout_fd, + gint stderr_fd, + GError **error) +{ +#if GLIB_CHECK_VERSION(2, 58, 0) + return g_spawn_async_with_fds(working_directory, argv, envp, flags, + child_setup, user_data, + child_pid, stdin_fd, stdout_fd, stderr_fd, + error); +#else + SlirpGSpawnFds setup = { + .child_setup = child_setup, + .user_data = user_data, + .stdin_fd = stdin_fd, + .stdout_fd = stdout_fd, + .stderr_fd = stderr_fd, + }; + + return g_spawn_async(working_directory, argv, envp, flags, + slirp_gspawn_fds_setup, &setup, + child_pid, error); +#endif +} + +#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ + g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) + +#pragma GCC diagnostic pop + +int +fork_exec(struct socket *so, const char *ex) +{ + GError *err = NULL; + char **argv; + int opt, sp[2]; + + DEBUG_CALL("fork_exec"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("ex = %p", ex); + + if (slirp_socketpair_with_oob(sp) < 0) { + return 0; + } + + argv = g_strsplit(ex, " ", -1); + g_spawn_async_with_fds(NULL /* cwd */, + argv, + NULL /* env */, + G_SPAWN_SEARCH_PATH, + fork_exec_child_setup, NULL /* data */, + NULL /* child_pid */, + sp[1], sp[1], sp[1], + &err); + g_strfreev(argv); + + if (err) { + g_critical("fork_exec: %s", err->message); + g_error_free(err); + closesocket(sp[0]); + closesocket(sp[1]); + return 0; + } + + so->s = sp[0]; + closesocket(sp[1]); + slirp_socket_set_fast_reuse(so->s); + opt = 1; + setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); + slirp_set_nonblock(so->s); + so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); + return 1; +} + +char *slirp_connection_info(Slirp *slirp) +{ + GString *str = g_string_new(NULL); + const char * const tcpstates[] = { + [TCPS_CLOSED] = "CLOSED", + [TCPS_LISTEN] = "LISTEN", + [TCPS_SYN_SENT] = "SYN_SENT", + [TCPS_SYN_RECEIVED] = "SYN_RCVD", + [TCPS_ESTABLISHED] = "ESTABLISHED", + [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", + [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", + [TCPS_CLOSING] = "CLOSING", + [TCPS_LAST_ACK] = "LAST_ACK", + [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", + [TCPS_TIME_WAIT] = "TIME_WAIT", + }; + struct in_addr dst_addr; + struct sockaddr_in src; + socklen_t src_len; + uint16_t dst_port; + struct socket *so; + const char *state; + char buf[20]; + + g_string_append_printf(str, + " Protocol[State] FD Source Address Port " + "Dest. Address Port RecvQ SendQ\n"); + + /* TODO: IPv6 */ + + for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { + if (so->so_state & SS_HOSTFWD) { + state = "HOST_FORWARD"; + } else if (so->so_tcpcb) { + state = tcpstates[so->so_tcpcb->t_state]; + } else { + state = "NONE"; + } + if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { + src_len = sizeof(src); + getsockname(so->s, (struct sockaddr *)&src, &src_len); + dst_addr = so->so_laddr; + dst_port = so->so_lport; + } else { + src.sin_addr = so->so_laddr; + src.sin_port = so->so_lport; + dst_addr = so->so_faddr; + dst_port = so->so_fport; + } + snprintf(buf, sizeof(buf), " TCP[%s]", state); + g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, + src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*", + ntohs(src.sin_port)); + g_string_append_printf(str, "%15s %5d %5d %5d\n", + inet_ntoa(dst_addr), ntohs(dst_port), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } + + for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { + if (so->so_state & SS_HOSTFWD) { + snprintf(buf, sizeof(buf), " UDP[HOST_FORWARD]"); + src_len = sizeof(src); + getsockname(so->s, (struct sockaddr *)&src, &src_len); + dst_addr = so->so_laddr; + dst_port = so->so_lport; + } else { + snprintf(buf, sizeof(buf), " UDP[%d sec]", + (so->so_expire - curtime) / 1000); + src.sin_addr = so->so_laddr; + src.sin_port = so->so_lport; + dst_addr = so->so_faddr; + dst_port = so->so_fport; + } + g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, + src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*", + ntohs(src.sin_port)); + g_string_append_printf(str, "%15s %5d %5d %5d\n", + inet_ntoa(dst_addr), ntohs(dst_port), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } + + for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { + snprintf(buf, sizeof(buf), " ICMP[%d sec]", + (so->so_expire - curtime) / 1000); + src.sin_addr = so->so_laddr; + dst_addr = so->so_faddr; + g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, + src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*"); + g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } + + return g_string_free(str, FALSE); +} diff --git a/slirp/src/misc.h b/slirp/src/misc.h new file mode 100644 index 0000000000..c2ceadb591 --- /dev/null +++ b/slirp/src/misc.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef MISC_H +#define MISC_H + +#include "libslirp.h" + +struct gfwd_list { + SlirpWriteCb write_cb; + void *opaque; + struct in_addr ex_addr; /* Server address */ + int ex_fport; /* Port to telnet to */ + char *ex_exec; /* Command line of what to exec */ + struct gfwd_list *ex_next; +}; + +#define EMU_NONE 0x0 + +/* TCP emulations */ +#define EMU_CTL 0x1 +#define EMU_FTP 0x2 +#define EMU_KSH 0x3 +#define EMU_IRC 0x4 +#define EMU_REALAUDIO 0x5 +#define EMU_RLOGIN 0x6 +#define EMU_IDENT 0x7 + +#define EMU_NOCONNECT 0x10 /* Don't connect */ + +struct tos_t { + uint16_t lport; + uint16_t fport; + uint8_t tos; + uint8_t emu; +}; + +struct emu_t { + uint16_t lport; + uint16_t fport; + uint8_t tos; + uint8_t emu; + struct emu_t *next; +}; + +struct slirp_quehead { + struct slirp_quehead *qh_link; + struct slirp_quehead *qh_rlink; +}; + +void slirp_insque(void *, void *); +void slirp_remque(void *); +int fork_exec(struct socket *so, const char *ex); + +struct gfwd_list * +add_guestfwd(struct gfwd_list **ex_ptr, + SlirpWriteCb write_cb, void *opaque, + struct in_addr addr, int port); + +struct gfwd_list * +add_exec(struct gfwd_list **ex_ptr, const char *cmdline, + struct in_addr addr, int port); + +#endif diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h new file mode 100644 index 0000000000..ea07d1cd0f --- /dev/null +++ b/slirp/src/ncsi-pkt.h @@ -0,0 +1,419 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef NCSI_PKT_H +#define NCSI_PKT_H + +/* from linux/net/ncsi/ncsi-pkt.h */ +#define __be32 uint32_t +#define __be16 uint16_t + +struct ncsi_pkt_hdr { + unsigned char mc_id; /* Management controller ID */ + unsigned char revision; /* NCSI version - 0x01 */ + unsigned char reserved; /* Reserved */ + unsigned char id; /* Packet sequence number */ + unsigned char type; /* Packet type */ + unsigned char channel; /* Network controller ID */ + __be16 length; /* Payload length */ + __be32 reserved1[2]; /* Reserved */ +}; + +struct ncsi_cmd_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ +}; + +struct ncsi_rsp_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ + __be16 code; /* Response code */ + __be16 reason; /* Response reason */ +}; + +struct ncsi_aen_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ + unsigned char reserved2[3]; /* Reserved */ + unsigned char type; /* AEN packet type */ +}; + +/* NCSI common command packet */ +struct ncsi_cmd_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 checksum; /* Checksum */ + unsigned char pad[26]; +}; + +struct ncsi_rsp_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Select Package */ +struct ncsi_cmd_sp_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char hw_arbitration; /* HW arbitration */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Disable Channel */ +struct ncsi_cmd_dc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char ald; /* Allow link down */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Reset Channel */ +struct ncsi_cmd_rc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 reserved; /* Reserved */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* AEN Enable */ +struct ncsi_cmd_ae_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mc_id; /* MC ID */ + __be32 mode; /* AEN working mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Set Link */ +struct ncsi_cmd_sl_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Link working mode */ + __be32 oem_mode; /* OEM link mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Set VLAN Filter */ +struct ncsi_cmd_svf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be16 reserved; /* Reserved */ + __be16 vlan; /* VLAN ID */ + __be16 reserved1; /* Reserved */ + unsigned char index; /* VLAN table index */ + unsigned char enable; /* Enable or disable */ + __be32 checksum; /* Checksum */ + unsigned char pad[14]; +}; + +/* Enable VLAN */ +struct ncsi_cmd_ev_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mode; /* VLAN filter mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Set MAC Address */ +struct ncsi_cmd_sma_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char mac[6]; /* MAC address */ + unsigned char index; /* MAC table index */ + unsigned char at_e; /* Addr type and operation */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Enable Broadcast Filter */ +struct ncsi_cmd_ebf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Filter mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Enable Global Multicast Filter */ +struct ncsi_cmd_egmf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Global MC mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Set NCSI Flow Control */ +struct ncsi_cmd_snfc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mode; /* Flow control mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Get Link Status */ +struct ncsi_rsp_gls_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 status; /* Link status */ + __be32 other; /* Other indications */ + __be32 oem_status; /* OEM link status */ + __be32 checksum; + unsigned char pad[10]; +}; + +/* Get Version ID */ +struct ncsi_rsp_gvi_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 ncsi_version; /* NCSI version */ + unsigned char reserved[3]; /* Reserved */ + unsigned char alpha2; /* NCSI version */ + unsigned char fw_name[12]; /* f/w name string */ + __be32 fw_version; /* f/w version */ + __be16 pci_ids[4]; /* PCI IDs */ + __be32 mf_id; /* Manufacture ID */ + __be32 checksum; +}; + +/* Get Capabilities */ +struct ncsi_rsp_gc_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 cap; /* Capabilities */ + __be32 bc_cap; /* Broadcast cap */ + __be32 mc_cap; /* Multicast cap */ + __be32 buf_cap; /* Buffering cap */ + __be32 aen_cap; /* AEN cap */ + unsigned char vlan_cnt; /* VLAN filter count */ + unsigned char mixed_cnt; /* Mix filter count */ + unsigned char mc_cnt; /* MC filter count */ + unsigned char uc_cnt; /* UC filter count */ + unsigned char reserved[2]; /* Reserved */ + unsigned char vlan_mode; /* VLAN mode */ + unsigned char channel_cnt; /* Channel count */ + __be32 checksum; /* Checksum */ +}; + +/* Get Parameters */ +struct ncsi_rsp_gp_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + unsigned char mac_cnt; /* Number of MAC addr */ + unsigned char reserved[2]; /* Reserved */ + unsigned char mac_enable; /* MAC addr enable flags */ + unsigned char vlan_cnt; /* VLAN tag count */ + unsigned char reserved1; /* Reserved */ + __be16 vlan_enable; /* VLAN tag enable flags */ + __be32 link_mode; /* Link setting */ + __be32 bc_mode; /* BC filter mode */ + __be32 valid_modes; /* Valid mode parameters */ + unsigned char vlan_mode; /* VLAN mode */ + unsigned char fc_mode; /* Flow control mode */ + unsigned char reserved2[2]; /* Reserved */ + __be32 aen_mode; /* AEN mode */ + unsigned char mac[6]; /* Supported MAC addr */ + __be16 vlan; /* Supported VLAN tags */ + __be32 checksum; /* Checksum */ +}; + +/* Get Controller Packet Statistics */ +struct ncsi_rsp_gcps_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 cnt_hi; /* Counter cleared */ + __be32 cnt_lo; /* Counter cleared */ + __be32 rx_bytes; /* Rx bytes */ + __be32 tx_bytes; /* Tx bytes */ + __be32 rx_uc_pkts; /* Rx UC packets */ + __be32 rx_mc_pkts; /* Rx MC packets */ + __be32 rx_bc_pkts; /* Rx BC packets */ + __be32 tx_uc_pkts; /* Tx UC packets */ + __be32 tx_mc_pkts; /* Tx MC packets */ + __be32 tx_bc_pkts; /* Tx BC packets */ + __be32 fcs_err; /* FCS errors */ + __be32 align_err; /* Alignment errors */ + __be32 false_carrier; /* False carrier detection */ + __be32 runt_pkts; /* Rx runt packets */ + __be32 jabber_pkts; /* Rx jabber packets */ + __be32 rx_pause_xon; /* Rx pause XON frames */ + __be32 rx_pause_xoff; /* Rx XOFF frames */ + __be32 tx_pause_xon; /* Tx XON frames */ + __be32 tx_pause_xoff; /* Tx XOFF frames */ + __be32 tx_s_collision; /* Single collision frames */ + __be32 tx_m_collision; /* Multiple collision frames */ + __be32 l_collision; /* Late collision frames */ + __be32 e_collision; /* Excessive collision frames */ + __be32 rx_ctl_frames; /* Rx control frames */ + __be32 rx_64_frames; /* Rx 64-bytes frames */ + __be32 rx_127_frames; /* Rx 65-127 bytes frames */ + __be32 rx_255_frames; /* Rx 128-255 bytes frames */ + __be32 rx_511_frames; /* Rx 256-511 bytes frames */ + __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ + __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ + __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ + __be32 tx_64_frames; /* Tx 64-bytes frames */ + __be32 tx_127_frames; /* Tx 65-127 bytes frames */ + __be32 tx_255_frames; /* Tx 128-255 bytes frames */ + __be32 tx_511_frames; /* Tx 256-511 bytes frames */ + __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ + __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ + __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ + __be32 rx_valid_bytes; /* Rx valid bytes */ + __be32 rx_runt_pkts; /* Rx error runt packets */ + __be32 rx_jabber_pkts; /* Rx error jabber packets */ + __be32 checksum; /* Checksum */ +}; + +/* Get NCSI Statistics */ +struct ncsi_rsp_gns_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 rx_cmds; /* Rx NCSI commands */ + __be32 dropped_cmds; /* Dropped commands */ + __be32 cmd_type_errs; /* Command type errors */ + __be32 cmd_csum_errs; /* Command checksum errors */ + __be32 rx_pkts; /* Rx NCSI packets */ + __be32 tx_pkts; /* Tx NCSI packets */ + __be32 tx_aen_pkts; /* Tx AEN packets */ + __be32 checksum; /* Checksum */ +}; + +/* Get NCSI Pass-through Statistics */ +struct ncsi_rsp_gnpts_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 tx_pkts; /* Tx packets */ + __be32 tx_dropped; /* Tx dropped packets */ + __be32 tx_channel_err; /* Tx channel errors */ + __be32 tx_us_err; /* Tx undersize errors */ + __be32 rx_pkts; /* Rx packets */ + __be32 rx_dropped; /* Rx dropped packets */ + __be32 rx_channel_err; /* Rx channel errors */ + __be32 rx_us_err; /* Rx undersize errors */ + __be32 rx_os_err; /* Rx oversize errors */ + __be32 checksum; /* Checksum */ +}; + +/* Get package status */ +struct ncsi_rsp_gps_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 status; /* Hardware arbitration status */ + __be32 checksum; +}; + +/* Get package UUID */ +struct ncsi_rsp_gpuuid_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + unsigned char uuid[16]; /* UUID */ + __be32 checksum; +}; + +/* AEN: Link State Change */ +struct ncsi_aen_lsc_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 status; /* Link status */ + __be32 oem_status; /* OEM link status */ + __be32 checksum; /* Checksum */ + unsigned char pad[14]; +}; + +/* AEN: Configuration Required */ +struct ncsi_aen_cr_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* AEN: Host Network Controller Driver Status Change */ +struct ncsi_aen_hncdsc_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 status; /* Status */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* NCSI packet revision */ +#define NCSI_PKT_REVISION 0x01 + +/* NCSI packet commands */ +#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ +#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ +#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ +#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ +#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ +#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ +#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ +#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ +#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ +#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ +#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ +#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ +#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ +#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ +#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ +#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ +#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ +#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ +#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ +#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ +#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ +#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ +#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ +#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ +#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ +#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ +#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ +#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ +#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ +#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ + +/* NCSI packet responses */ +#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) +#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) +#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) +#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) +#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) +#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) +#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) +#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) +#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) +#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) +#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) +#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) +#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) +#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) +#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) +#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) +#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) +#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) +#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) +#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) +#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) +#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) +#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) +#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) +#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) +#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) +#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) +#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) +#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) +#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) + +/* NCSI response code/reason */ +#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ +#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ +#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ +#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ +#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ +#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ +#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ +#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ +#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ +#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ +#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ + +/* NCSI AEN packet type */ +#define NCSI_PKT_AEN 0xFF /* AEN Packet */ +#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ +#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ +#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ + +#endif /* NCSI_PKT_H */ diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c new file mode 100644 index 0000000000..359f52c284 --- /dev/null +++ b/slirp/src/ncsi.c @@ -0,0 +1,166 @@ +/* + * NC-SI (Network Controller Sideband Interface) "echo" model + * + * Copyright (C) 2016-2018 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "slirp.h" + +#include "ncsi-pkt.h" + +static uint32_t ncsi_calculate_checksum(uint16_t *data, int len) +{ + uint32_t checksum = 0; + int i; + + /* + * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet + * payload interpreted as a series of 16-bit unsigned integer values. + */ + for (i = 0; i < len; i++) { + checksum += htons(data[i]); + } + + checksum = (~checksum + 1); + return checksum; +} + +/* Get Capabilities */ +static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) +{ + struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *) rnh; + + rsp->cap = htonl(~0); + rsp->bc_cap = htonl(~0); + rsp->mc_cap = htonl(~0); + rsp->buf_cap = htonl(~0); + rsp->aen_cap = htonl(~0); + rsp->vlan_mode = 0xff; + rsp->uc_cnt = 2; + return 0; +} + +/* Get Link status */ +static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) +{ + struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *) rnh; + + rsp->status = htonl(0x1); + return 0; +} + +/* Get Parameters */ +static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) +{ + struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *) rnh; + + /* no MAC address filters or VLAN filters on the channel */ + rsp->mac_cnt = 0; + rsp->mac_enable = 0; + rsp->vlan_cnt = 0; + rsp->vlan_enable = 0; + + return 0; +} + +static const struct ncsi_rsp_handler { + unsigned char type; + int payload; + int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); +} ncsi_rsp_handlers[] = { + { NCSI_PKT_RSP_CIS, 4, NULL }, + { NCSI_PKT_RSP_SP, 4, NULL }, + { NCSI_PKT_RSP_DP, 4, NULL }, + { NCSI_PKT_RSP_EC, 4, NULL }, + { NCSI_PKT_RSP_DC, 4, NULL }, + { NCSI_PKT_RSP_RC, 4, NULL }, + { NCSI_PKT_RSP_ECNT, 4, NULL }, + { NCSI_PKT_RSP_DCNT, 4, NULL }, + { NCSI_PKT_RSP_AE, 4, NULL }, + { NCSI_PKT_RSP_SL, 4, NULL }, + { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, + { NCSI_PKT_RSP_SVF, 4, NULL }, + { NCSI_PKT_RSP_EV, 4, NULL }, + { NCSI_PKT_RSP_DV, 4, NULL }, + { NCSI_PKT_RSP_SMA, 4, NULL }, + { NCSI_PKT_RSP_EBF, 4, NULL }, + { NCSI_PKT_RSP_DBF, 4, NULL }, + { NCSI_PKT_RSP_EGMF, 4, NULL }, + { NCSI_PKT_RSP_DGMF, 4, NULL }, + { NCSI_PKT_RSP_SNFC, 4, NULL }, + { NCSI_PKT_RSP_GVI, 40, NULL }, + { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, + { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, + { NCSI_PKT_RSP_GCPS, 172, NULL }, + { NCSI_PKT_RSP_GNS, 172, NULL }, + { NCSI_PKT_RSP_GNPTS, 172, NULL }, + { NCSI_PKT_RSP_GPS, 8, NULL }, + { NCSI_PKT_RSP_OEM, 0, NULL }, + { NCSI_PKT_RSP_PLDM, 0, NULL }, + { NCSI_PKT_RSP_GPUUID, 20, NULL } +}; + +/* + * packet format : ncsi header + payload + checksum + */ +#define NCSI_MAX_PAYLOAD 172 +#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) + +void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ + struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); + uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; + struct ethhdr *reh = (struct ethhdr *)ncsi_reply; + struct ncsi_rsp_pkt_hdr *rnh = (struct ncsi_rsp_pkt_hdr *) + (ncsi_reply + ETH_HLEN); + const struct ncsi_rsp_handler *handler = NULL; + int i; + int ncsi_rsp_len = sizeof(*nh); + uint32_t checksum; + uint32_t *pchecksum; + + memset(ncsi_reply, 0, sizeof(ncsi_reply)); + + memset(reh->h_dest, 0xff, ETH_ALEN); + memset(reh->h_source, 0xff, ETH_ALEN); + reh->h_proto = htons(ETH_P_NCSI); + + for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { + if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { + handler = &ncsi_rsp_handlers[i]; + break; + } + } + + rnh->common.mc_id = nh->mc_id; + rnh->common.revision = NCSI_PKT_REVISION; + rnh->common.id = nh->id; + rnh->common.type = nh->type + 0x80; + rnh->common.channel = nh->channel; + + if (handler) { + rnh->common.length = htons(handler->payload); + rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); + rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); + + if (handler->handler) { + /* TODO: handle errors */ + handler->handler(rnh); + } + ncsi_rsp_len += handler->payload; + } else { + rnh->common.length = 0; + rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); + rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); + } + + /* Add the optional checksum at the end of the frame. */ + checksum = ncsi_calculate_checksum((uint16_t *) rnh, ncsi_rsp_len); + pchecksum = (uint32_t *)((void *) rnh + ncsi_rsp_len); + *pchecksum = htonl(checksum); + ncsi_rsp_len += 4; + + slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); +} diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c new file mode 100644 index 0000000000..34ea4fdf1f --- /dev/null +++ b/slirp/src/ndp_table.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. + */ + +#include "slirp.h" + +void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, + uint8_t ethaddr[ETH_ALEN]) +{ + char addrstr[INET6_ADDRSTRLEN]; + NdpTable *ndp_table = &slirp->ndp_table; + int i; + + inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); + + DEBUG_CALL("ndp_table_add"); + DEBUG_ARG("ip = %s", addrstr); + DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", + ethaddr[0], ethaddr[1], ethaddr[2], + ethaddr[3], ethaddr[4], ethaddr[5]); + + if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { + /* Do not register multicast or unspecified addresses */ + DEBUG_CALL(" abort: do not register multicast or unspecified address"); + return; + } + + /* Search for an entry */ + for (i = 0; i < NDP_TABLE_SIZE; i++) { + if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { + DEBUG_CALL(" already in table: update the entry"); + /* Update the entry */ + memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); + return; + } + } + + /* No entry found, create a new one */ + DEBUG_CALL(" create new entry"); + ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; + memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, + ethaddr, ETH_ALEN); + ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; +} + +bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, + uint8_t out_ethaddr[ETH_ALEN]) +{ + char addrstr[INET6_ADDRSTRLEN]; + NdpTable *ndp_table = &slirp->ndp_table; + int i; + + inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); + + DEBUG_CALL("ndp_table_search"); + DEBUG_ARG("ip = %s", addrstr); + + assert(!in6_zero(&ip_addr)); + + /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ + if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { + out_ethaddr[0] = 0x33; out_ethaddr[1] = 0x33; + out_ethaddr[2] = ip_addr.s6_addr[12]; + out_ethaddr[3] = ip_addr.s6_addr[13]; + out_ethaddr[4] = ip_addr.s6_addr[14]; + out_ethaddr[5] = ip_addr.s6_addr[15]; + DEBUG_ARG("multicast addr = %02x:%02x:%02x:%02x:%02x:%02x", + out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], + out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); + return 1; + } + + for (i = 0; i < NDP_TABLE_SIZE; i++) { + if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { + memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); + DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", + out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], + out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); + return 1; + } + } + + DEBUG_CALL(" ip not found in table"); + return 0; +} diff --git a/slirp/src/qtailq.h b/slirp/src/qtailq.h new file mode 100644 index 0000000000..a89b0c439a --- /dev/null +++ b/slirp/src/qtailq.h @@ -0,0 +1,193 @@ +/* $NetBSD: queue.h,v 1.52 2009/04/20 09:56:08 mschuett Exp $ */ + +/* + * slirp version: Copy from QEMU, removed all but tail queues. + */ + +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + */ + +#ifndef QTAILQ_H +#define QTAILQ_H + +/* + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + */ +typedef struct QTailQLink { + void *tql_next; + struct QTailQLink *tql_prev; +} QTailQLink; + +/* + * Tail queue definitions. The union acts as a poor man template, as if + * it were QTailQLink<type>. + */ +#define QTAILQ_HEAD(name, type) \ + union name { \ + struct type *tqh_first; /* first element */ \ + QTailQLink tqh_circ; /* link for circular backwards list */ \ + } + +#define QTAILQ_HEAD_INITIALIZER(head) \ + { .tqh_circ = { NULL, &(head).tqh_circ } } + +#define QTAILQ_ENTRY(type) \ + union { \ + struct type *tqe_next; /* next element */ \ + QTailQLink tqe_circ; /* link for circular backwards list */ \ + } + +#define QTAILQ_INIT(head) do { \ + (head)->tqh_first = NULL; \ + (head)->tqh_circ.tql_prev = &(head)->tqh_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ + (head)->tqh_first->field.tqe_circ.tql_prev = \ + &(elm)->field.tqe_circ; \ + else \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ + (head)->tqh_first = (elm); \ + (elm)->field.tqe_circ.tql_prev = &(head)->tqh_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_TAIL(head, elm, field) do { \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_circ.tql_prev = (head)->tqh_circ.tql_prev; \ + (head)->tqh_circ.tql_prev->tql_next = (elm); \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ + (elm)->field.tqe_next->field.tqe_circ.tql_prev = \ + &(elm)->field.tqe_circ; \ + else \ + (head)->tqh_circ.tql_prev = &(elm)->field.tqe_circ; \ + (listelm)->field.tqe_next = (elm); \ + (elm)->field.tqe_circ.tql_prev = &(listelm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_circ.tql_prev = (listelm)->field.tqe_circ.tql_prev; \ + (elm)->field.tqe_next = (listelm); \ + (listelm)->field.tqe_circ.tql_prev->tql_next = (elm); \ + (listelm)->field.tqe_circ.tql_prev = &(elm)->field.tqe_circ; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_REMOVE(head, elm, field) do { \ + if (((elm)->field.tqe_next) != NULL) \ + (elm)->field.tqe_next->field.tqe_circ.tql_prev = \ + (elm)->field.tqe_circ.tql_prev; \ + else \ + (head)->tqh_circ.tql_prev = (elm)->field.tqe_circ.tql_prev; \ + (elm)->field.tqe_circ.tql_prev->tql_next = (elm)->field.tqe_next; \ + (elm)->field.tqe_circ.tql_prev = NULL; \ +} while (/*CONSTCOND*/0) + +#define QTAILQ_FOREACH(var, head, field) \ + for ((var) = ((head)->tqh_first); \ + (var); \ + (var) = ((var)->field.tqe_next)) + +#define QTAILQ_FOREACH_SAFE(var, head, field, next_var) \ + for ((var) = ((head)->tqh_first); \ + (var) && ((next_var) = ((var)->field.tqe_next), 1); \ + (var) = (next_var)) + +#define QTAILQ_FOREACH_REVERSE(var, head, field) \ + for ((var) = QTAILQ_LAST(head); \ + (var); \ + (var) = QTAILQ_PREV(var, field)) + +#define QTAILQ_FOREACH_REVERSE_SAFE(var, head, field, prev_var) \ + for ((var) = QTAILQ_LAST(head); \ + (var) && ((prev_var) = QTAILQ_PREV(var, field)); \ + (var) = (prev_var)) + +/* + * Tail queue access methods. + */ +#define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL) +#define QTAILQ_FIRST(head) ((head)->tqh_first) +#define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define QTAILQ_IN_USE(elm, field) ((elm)->field.tqe_circ.tql_prev != NULL) + +#define QTAILQ_LINK_PREV(link) \ + ((link).tql_prev->tql_prev->tql_next) +#define QTAILQ_LAST(head) \ + ((typeof((head)->tqh_first)) QTAILQ_LINK_PREV((head)->tqh_circ)) +#define QTAILQ_PREV(elm, field) \ + ((typeof((elm)->field.tqe_next)) QTAILQ_LINK_PREV((elm)->field.tqe_circ)) + +#define field_at_offset(base, offset, type) \ + ((type *) (((char *) (base)) + (offset))) + +/* + * Raw access of elements of a tail queue head. Offsets are all zero + * because it's a union. + */ +#define QTAILQ_RAW_FIRST(head) \ + field_at_offset(head, 0, void *) +#define QTAILQ_RAW_TQH_CIRC(head) \ + field_at_offset(head, 0, QTailQLink) + +/* + * Raw access of elements of a tail entry + */ +#define QTAILQ_RAW_NEXT(elm, entry) \ + field_at_offset(elm, entry, void *) +#define QTAILQ_RAW_TQE_CIRC(elm, entry) \ + field_at_offset(elm, entry, QTailQLink) +/* + * Tail queue traversal using pointer arithmetic. + */ +#define QTAILQ_RAW_FOREACH(elm, head, entry) \ + for ((elm) = *QTAILQ_RAW_FIRST(head); \ + (elm); \ + (elm) = *QTAILQ_RAW_NEXT(elm, entry)) +/* + * Tail queue insertion using pointer arithmetic. + */ +#define QTAILQ_RAW_INSERT_TAIL(head, elm, entry) do { \ + *QTAILQ_RAW_NEXT(elm, entry) = NULL; \ + QTAILQ_RAW_TQE_CIRC(elm, entry)->tql_prev = QTAILQ_RAW_TQH_CIRC(head)->tql_prev; \ + QTAILQ_RAW_TQH_CIRC(head)->tql_prev->tql_next = (elm); \ + QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry); \ +} while (/*CONSTCOND*/0) + +#endif /* QTAILQ_H */ diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c new file mode 100644 index 0000000000..51a9f0cc7d --- /dev/null +++ b/slirp/src/sbuf.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" + +static void sbappendsb(struct sbuf *sb, struct mbuf *m); + +void +sbfree(struct sbuf *sb) +{ + free(sb->sb_data); +} + +bool +sbdrop(struct sbuf *sb, int num) +{ + int limit = sb->sb_datalen / 2; + + /* + * We can only drop how much we have + * This should never succeed + */ + if(num > sb->sb_cc) + num = sb->sb_cc; + sb->sb_cc -= num; + sb->sb_rptr += num; + if(sb->sb_rptr >= sb->sb_data + sb->sb_datalen) + sb->sb_rptr -= sb->sb_datalen; + + if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { + return true; + } + + return false; +} + +void +sbreserve(struct sbuf *sb, int size) +{ + if (sb->sb_data) { + /* Already alloced, realloc if necessary */ + if (sb->sb_datalen != size) { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)realloc(sb->sb_data, size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } + } else { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } +} + +/* + * Try and write() to the socket, whatever doesn't get written + * append to the buffer... for a host with a fast net connection, + * this prevents an unnecessary copy of the data + * (the socket is non-blocking, so we won't hang) + */ +void +sbappend(struct socket *so, struct mbuf *m) +{ + int ret = 0; + + DEBUG_CALL("sbappend"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("m->m_len = %d", m->m_len); + + /* Shouldn't happen, but... e.g. foreign host closes connection */ + if (m->m_len <= 0) { + m_free(m); + return; + } + + /* + * If there is urgent data, call sosendoob + * if not all was sent, sowrite will take care of the rest + * (The rest of this function is just an optimisation) + */ + if (so->so_urgc) { + sbappendsb(&so->so_rcv, m); + m_free(m); + (void)sosendoob(so); + return; + } + + /* + * We only write if there's nothing in the buffer, + * ottherwise it'll arrive out of order, and hence corrupt + */ + if (!so->so_rcv.sb_cc) + ret = slirp_send(so, m->m_data, m->m_len, 0); + + if (ret <= 0) { + /* + * Nothing was written + * It's possible that the socket has closed, but + * we don't need to check because if it has closed, + * it will be detected in the normal way by soread() + */ + sbappendsb(&so->so_rcv, m); + } else if (ret != m->m_len) { + /* + * Something was written, but not everything.. + * sbappendsb the rest + */ + m->m_len -= ret; + m->m_data += ret; + sbappendsb(&so->so_rcv, m); + } /* else */ + /* Whatever happened, we free the mbuf */ + m_free(m); +} + +/* + * Copy the data from m into sb + * The caller is responsible to make sure there's enough room + */ +static void +sbappendsb(struct sbuf *sb, struct mbuf *m) +{ + int len, n, nn; + + len = m->m_len; + + if (sb->sb_wptr < sb->sb_rptr) { + n = sb->sb_rptr - sb->sb_wptr; + if (n > len) n = len; + memcpy(sb->sb_wptr, m->m_data, n); + } else { + /* Do the right edge first */ + n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; + if (n > len) n = len; + memcpy(sb->sb_wptr, m->m_data, n); + len -= n; + if (len) { + /* Now the left edge */ + nn = sb->sb_rptr - sb->sb_data; + if (nn > len) nn = len; + memcpy(sb->sb_data,m->m_data+n,nn); + n += nn; + } + } + + sb->sb_cc += n; + sb->sb_wptr += n; + if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) + sb->sb_wptr -= sb->sb_datalen; +} + +/* + * Copy data from sbuf to a normal, straight buffer + * Don't update the sbuf rptr, this will be + * done in sbdrop when the data is acked + */ +void +sbcopy(struct sbuf *sb, int off, int len, char *to) +{ + char *from; + + from = sb->sb_rptr + off; + if (from >= sb->sb_data + sb->sb_datalen) + from -= sb->sb_datalen; + + if (from < sb->sb_wptr) { + if (len > sb->sb_cc) len = sb->sb_cc; + memcpy(to,from,len); + } else { + /* re-use off */ + off = (sb->sb_data + sb->sb_datalen) - from; + if (off > len) off = len; + memcpy(to,from,off); + len -= off; + if (len) + memcpy(to+off,sb->sb_data,len); + } +} diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h new file mode 100644 index 0000000000..1cb9a42834 --- /dev/null +++ b/slirp/src/sbuf.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef SBUF_H +#define SBUF_H + +#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) + +struct sbuf { + uint32_t sb_cc; /* actual chars in buffer */ + uint32_t sb_datalen; /* Length of data */ + char *sb_wptr; /* write pointer. points to where the next + * bytes should be written in the sbuf */ + char *sb_rptr; /* read pointer. points to where the next + * byte should be read from the sbuf */ + char *sb_data; /* Actual data */ +}; + +void sbfree(struct sbuf *); +bool sbdrop(struct sbuf *, int); +void sbreserve(struct sbuf *, int); +void sbappend(struct socket *, struct mbuf *); +void sbcopy(struct sbuf *, int, int, char *); + +#endif diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c new file mode 100644 index 0000000000..18af670a0a --- /dev/null +++ b/slirp/src/slirp.c @@ -0,0 +1,1117 @@ +/* + * libslirp glue + * + * Copyright (c) 2004-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "slirp.h" + + +#ifndef _WIN32 +#include <net/if.h> +#endif + +int slirp_debug; + +/* Define to 1 if you want KEEPALIVE timers */ +bool slirp_do_keepalive; + +/* host loopback address */ +struct in_addr loopback_addr; +/* host loopback network mask */ +unsigned long loopback_mask; + +/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ +static const uint8_t special_ethaddr[ETH_ALEN] = { + 0x52, 0x55, 0x00, 0x00, 0x00, 0x00 +}; + +unsigned curtime; + +static struct in_addr dns_addr; +#ifndef _WIN32 +static struct in6_addr dns6_addr; +#endif +static unsigned dns_addr_time; +#ifndef _WIN32 +static unsigned dns6_addr_time; +#endif + +#define TIMEOUT_FAST 2 /* milliseconds */ +#define TIMEOUT_SLOW 499 /* milliseconds */ +/* for the aging of certain requests like DNS */ +#define TIMEOUT_DEFAULT 1000 /* milliseconds */ + +#ifdef _WIN32 + +int get_dns_addr(struct in_addr *pdns_addr) +{ + FIXED_INFO *FixedInfo=NULL; + ULONG BufLen; + DWORD ret; + IP_ADDR_STRING *pIPAddr; + struct in_addr tmp_addr; + + if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { + *pdns_addr = dns_addr; + return 0; + } + + FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); + BufLen = sizeof(FIXED_INFO); + + if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { + if (FixedInfo) { + GlobalFree(FixedInfo); + FixedInfo = NULL; + } + FixedInfo = GlobalAlloc(GPTR, BufLen); + } + + if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { + printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret ); + if (FixedInfo) { + GlobalFree(FixedInfo); + FixedInfo = NULL; + } + return -1; + } + + pIPAddr = &(FixedInfo->DnsServerList); + inet_aton(pIPAddr->IpAddress.String, &tmp_addr); + *pdns_addr = tmp_addr; + dns_addr = tmp_addr; + dns_addr_time = curtime; + if (FixedInfo) { + GlobalFree(FixedInfo); + FixedInfo = NULL; + } + return 0; +} + +int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) +{ + return -1; +} + +static void winsock_cleanup(void) +{ + WSACleanup(); +} + +#else + +static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, + socklen_t addrlen, + struct stat *cached_stat, unsigned *cached_time) +{ + struct stat old_stat; + if (curtime - *cached_time < TIMEOUT_DEFAULT) { + memcpy(pdns_addr, cached_addr, addrlen); + return 0; + } + old_stat = *cached_stat; + if (stat("/etc/resolv.conf", cached_stat) != 0) { + return -1; + } + if (cached_stat->st_dev == old_stat.st_dev + && cached_stat->st_ino == old_stat.st_ino + && cached_stat->st_size == old_stat.st_size + && cached_stat->st_mtime == old_stat.st_mtime) { + memcpy(pdns_addr, cached_addr, addrlen); + return 0; + } + return 1; +} + +static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, + socklen_t addrlen, uint32_t *scope_id, + unsigned *cached_time) +{ + char buff[512]; + char buff2[257]; + FILE *f; + int found = 0; + void *tmp_addr = alloca(addrlen); + unsigned if_index; + + f = fopen("/etc/resolv.conf", "r"); + if (!f) + return -1; + + DEBUG_MISC("IP address of your DNS(s):"); + while (fgets(buff, 512, f) != NULL) { + if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { + char *c = strchr(buff2, '%'); + if (c) { + if_index = if_nametoindex(c + 1); + *c = '\0'; + } else { + if_index = 0; + } + + if (!inet_pton(af, buff2, tmp_addr)) { + continue; + } + /* If it's the first one, set it to dns_addr */ + if (!found) { + memcpy(pdns_addr, tmp_addr, addrlen); + memcpy(cached_addr, tmp_addr, addrlen); + if (scope_id) { + *scope_id = if_index; + } + *cached_time = curtime; + } + + if (++found > 3) { + DEBUG_MISC(" (more)"); + break; + } else if (slirp_debug & DBG_MISC) { + char s[INET6_ADDRSTRLEN]; + const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); + if (!res) { + res = " (string conversion error)"; + } + DEBUG_MISC(" %s", res); + } + } + } + fclose(f); + if (!found) + return -1; + return 0; +} + +int get_dns_addr(struct in_addr *pdns_addr) +{ + static struct stat dns_addr_stat; + + if (dns_addr.s_addr != 0) { + int ret; + ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), + &dns_addr_stat, &dns_addr_time); + if (ret <= 0) { + return ret; + } + } + return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, + sizeof(dns_addr), NULL, &dns_addr_time); +} + +int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) +{ + static struct stat dns6_addr_stat; + + if (!in6_zero(&dns6_addr)) { + int ret; + ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), + &dns6_addr_stat, &dns6_addr_time); + if (ret <= 0) { + return ret; + } + } + return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, + sizeof(dns6_addr), + scope_id, &dns6_addr_time); +} + +#endif + +static void slirp_init_once(void) +{ + static int initialized; + const char *debug; +#ifdef _WIN32 + WSADATA Data; +#endif + + if (initialized) { + return; + } + initialized = 1; + +#ifdef _WIN32 + WSAStartup(MAKEWORD(2,0), &Data); + atexit(winsock_cleanup); +#endif + + loopback_addr.s_addr = htonl(INADDR_LOOPBACK); + loopback_mask = htonl(IN_CLASSA_NET); + + debug = g_getenv("SLIRP_DEBUG"); + if (debug) { + const GDebugKey keys[] = { + { "call", DBG_CALL }, + { "misc", DBG_MISC }, + { "error", DBG_ERROR }, + { "tftp", DBG_TFTP }, + }; + slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); + } + + +} + +Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, + struct in_addr vnetmask, struct in_addr vhost, + bool in6_enabled, + struct in6_addr vprefix_addr6, uint8_t vprefix_len, + struct in6_addr vhost6, const char *vhostname, + const char *tftp_server_name, + const char *tftp_path, const char *bootfile, + struct in_addr vdhcp_start, struct in_addr vnameserver, + struct in6_addr vnameserver6, const char **vdnssearch, + const char *vdomainname, + const SlirpCb *callbacks, + void *opaque) +{ + Slirp *slirp = g_malloc0(sizeof(Slirp)); + + slirp_init_once(); + + slirp->opaque = opaque; + slirp->cb = callbacks; + slirp->grand = g_rand_new(); + slirp->restricted = restricted; + + slirp->in_enabled = in_enabled; + slirp->in6_enabled = in6_enabled; + + if_init(slirp); + ip_init(slirp); + ip6_init(slirp); + + /* Initialise mbufs *after* setting the MTU */ + m_init(slirp); + + slirp->vnetwork_addr = vnetwork; + slirp->vnetwork_mask = vnetmask; + slirp->vhost_addr = vhost; + slirp->vprefix_addr6 = vprefix_addr6; + slirp->vprefix_len = vprefix_len; + slirp->vhost_addr6 = vhost6; + if (vhostname) { + slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), + vhostname); + } + slirp->tftp_prefix = g_strdup(tftp_path); + slirp->bootp_filename = g_strdup(bootfile); + slirp->vdomainname = g_strdup(vdomainname); + slirp->vdhcp_startaddr = vdhcp_start; + slirp->vnameserver_addr = vnameserver; + slirp->vnameserver_addr6 = vnameserver6; + slirp->tftp_server_name = g_strdup(tftp_server_name); + + if (vdnssearch) { + translate_dnssearch(slirp, vdnssearch); + } + + return slirp; +} + +void slirp_cleanup(Slirp *slirp) +{ + struct gfwd_list *e, *next; + + for (e = slirp->guestfwd_list; e; e = next) { + next = e->ex_next; + g_free(e->ex_exec); + g_free(e); + } + + ip_cleanup(slirp); + ip6_cleanup(slirp); + m_cleanup(slirp); + + g_rand_free(slirp->grand); + + g_free(slirp->vdnssearch); + g_free(slirp->tftp_prefix); + g_free(slirp->bootp_filename); + g_free(slirp->vdomainname); + g_free(slirp); +} + +#define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) +#define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) + +static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) +{ + uint32_t t; + + if (*timeout <= TIMEOUT_FAST) { + return; + } + + t = MIN(1000, *timeout); + + /* If we have tcp timeout with slirp, then we will fill @timeout with + * more precise value. + */ + if (slirp->time_fasttimo) { + *timeout = TIMEOUT_FAST; + return; + } + if (slirp->do_slowtimo) { + t = MIN(TIMEOUT_SLOW, t); + } + *timeout = t; +} + +void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, + SlirpAddPollCb add_poll, void *opaque) +{ + struct socket *so, *so_next; + + /* + * First, TCP sockets + */ + + /* + * *_slowtimo needs calling if there are IP fragments + * in the fragment queue, or there are TCP connections active + */ + slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || + (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); + + for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { + int events = 0; + + so_next = so->so_next; + + so->pollfds_idx = -1; + + /* + * See if we need a tcp_fasttimo + */ + if (slirp->time_fasttimo == 0 && + so->so_tcpcb->t_flags & TF_DELACK) { + slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ + } + + /* + * NOFDREF can include still connecting to local-host, + * newly socreated() sockets etc. Don't want to select these. + */ + if (so->so_state & SS_NOFDREF || so->s == -1) { + continue; + } + + /* + * Set for reading sockets which are accepting + */ + if (so->so_state & SS_FACCEPTCONN) { + so->pollfds_idx = add_poll(so->s, + SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); + continue; + } + + /* + * Set for writing sockets which are connecting + */ + if (so->so_state & SS_ISFCONNECTING) { + so->pollfds_idx = add_poll(so->s, + SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); + continue; + } + + /* + * Set for writing if we are connected, can send more, and + * we have something to send + */ + if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { + events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; + } + + /* + * Set for reading (and urgent data) if we are connected, can + * receive more, and we have room for it XXX /2 ? + */ + if (CONN_CANFRCV(so) && + (so->so_snd.sb_cc < (so->so_snd.sb_datalen/2))) { + events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | + SLIRP_POLL_ERR | SLIRP_POLL_PRI; + } + + if (events) { + so->pollfds_idx = add_poll(so->s, events, opaque); + } + } + + /* + * UDP sockets + */ + for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { + so_next = so->so_next; + + so->pollfds_idx = -1; + + /* + * See if it's timed out + */ + if (so->so_expire) { + if (so->so_expire <= curtime) { + udp_detach(so); + continue; + } else { + slirp->do_slowtimo = true; /* Let socket expire */ + } + } + + /* + * When UDP packets are received from over the + * link, they're sendto()'d straight away, so + * no need for setting for writing + * Limit the number of packets queued by this session + * to 4. Note that even though we try and limit this + * to 4 packets, the session could have more queued + * if the packets needed to be fragmented + * (XXX <= 4 ?) + */ + if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { + so->pollfds_idx = add_poll(so->s, + SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); + } + } + + /* + * ICMP sockets + */ + for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { + so_next = so->so_next; + + so->pollfds_idx = -1; + + /* + * See if it's timed out + */ + if (so->so_expire) { + if (so->so_expire <= curtime) { + icmp_detach(so); + continue; + } else { + slirp->do_slowtimo = true; /* Let socket expire */ + } + } + + if (so->so_state & SS_ISFCONNECTED) { + so->pollfds_idx = add_poll(so->s, + SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); + } + } + + slirp_update_timeout(slirp, timeout); +} + +void slirp_pollfds_poll(Slirp *slirp, int select_error, + SlirpGetREventsCb get_revents, void *opaque) +{ + struct socket *so, *so_next; + int ret; + + curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; + + /* + * See if anything has timed out + */ + if (slirp->time_fasttimo && + ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { + tcp_fasttimo(slirp); + slirp->time_fasttimo = 0; + } + if (slirp->do_slowtimo && + ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { + ip_slowtimo(slirp); + tcp_slowtimo(slirp); + slirp->last_slowtimo = curtime; + } + + /* + * Check sockets + */ + if (!select_error) { + /* + * Check TCP sockets + */ + for (so = slirp->tcb.so_next; so != &slirp->tcb; + so = so_next) { + int revents; + + so_next = so->so_next; + + revents = 0; + if (so->pollfds_idx != -1) { + revents = get_revents(so->pollfds_idx, opaque); + } + + if (so->so_state & SS_NOFDREF || so->s == -1) { + continue; + } + + /* + * Check for URG data + * This will soread as well, so no need to + * test for SLIRP_POLL_IN below if this succeeds + */ + if (revents & SLIRP_POLL_PRI) { + ret = sorecvoob(so); + if (ret < 0) { + /* Socket error might have resulted in the socket being + * removed, do not try to do anything more with it. */ + continue; + } + } + /* + * Check sockets for reading + */ + else if (revents & + (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR)) { + /* + * Check for incoming connections + */ + if (so->so_state & SS_FACCEPTCONN) { + tcp_connect(so); + continue; + } /* else */ + ret = soread(so); + + /* Output it if we read something */ + if (ret > 0) { + tcp_output(sototcpcb(so)); + } + if (ret < 0) { + /* Socket error might have resulted in the socket being + * removed, do not try to do anything more with it. */ + continue; + } + } + + /* + * Check sockets for writing + */ + if (!(so->so_state & SS_NOFDREF) && + (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { + /* + * Check for non-blocking, still-connecting sockets + */ + if (so->so_state & SS_ISFCONNECTING) { + /* Connected */ + so->so_state &= ~SS_ISFCONNECTING; + + ret = send(so->s, (const void *) &ret, 0, 0); + if (ret < 0) { + /* XXXXX Must fix, zero bytes is a NOP */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) { + continue; + } + + /* else failed */ + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; + } + /* else so->so_state &= ~SS_ISFCONNECTING; */ + + /* + * Continue tcp_input + */ + tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, + so->so_ffamily); + /* continue; */ + } else { + ret = sowrite(so); + if (ret > 0) { + /* Call tcp_output in case we need to send a window + * update to the guest, otherwise it will be stuck + * until it sends a window probe. */ + tcp_output(sototcpcb(so)); + } + } + } + } + + /* + * Now UDP sockets. + * Incoming packets are sent straight away, they're not buffered. + * Incoming UDP data isn't buffered either. + */ + for (so = slirp->udb.so_next; so != &slirp->udb; + so = so_next) { + int revents; + + so_next = so->so_next; + + revents = 0; + if (so->pollfds_idx != -1) { + revents = get_revents(so->pollfds_idx, opaque); + } + + if (so->s != -1 && + (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { + sorecvfrom(so); + } + } + + /* + * Check incoming ICMP relies. + */ + for (so = slirp->icmp.so_next; so != &slirp->icmp; + so = so_next) { + int revents; + + so_next = so->so_next; + + revents = 0; + if (so->pollfds_idx != -1) { + revents = get_revents(so->pollfds_idx, opaque); + } + + if (so->s != -1 && + (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { + icmp_receive(so); + } + } + } + + if_start(slirp); +} + +static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ + struct slirp_arphdr *ah = (struct slirp_arphdr *)(pkt + ETH_HLEN); + uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; + struct ethhdr *reh = (struct ethhdr *)arp_reply; + struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); + int ar_op; + struct gfwd_list *ex_ptr; + + if (!slirp->in_enabled) { + return; + } + + ar_op = ntohs(ah->ar_op); + switch(ar_op) { + case ARPOP_REQUEST: + if (ah->ar_tip == ah->ar_sip) { + /* Gratuitous ARP */ + arp_table_add(slirp, ah->ar_sip, ah->ar_sha); + return; + } + + if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + if (ah->ar_tip == slirp->vnameserver_addr.s_addr || + ah->ar_tip == slirp->vhost_addr.s_addr) + goto arp_ok; + /* TODO: IPv6 */ + for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_addr.s_addr == ah->ar_tip) + goto arp_ok; + } + return; + arp_ok: + memset(arp_reply, 0, sizeof(arp_reply)); + + arp_table_add(slirp, ah->ar_sip, ah->ar_sha); + + /* ARP request for alias/dns mac address */ + memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); + memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); + memcpy(&reh->h_source[2], &ah->ar_tip, 4); + reh->h_proto = htons(ETH_P_ARP); + + rah->ar_hrd = htons(1); + rah->ar_pro = htons(ETH_P_IP); + rah->ar_hln = ETH_ALEN; + rah->ar_pln = 4; + rah->ar_op = htons(ARPOP_REPLY); + memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); + rah->ar_sip = ah->ar_tip; + memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); + rah->ar_tip = ah->ar_sip; + slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); + } + break; + case ARPOP_REPLY: + arp_table_add(slirp, ah->ar_sip, ah->ar_sha); + break; + default: + break; + } +} + +void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ + struct mbuf *m; + int proto; + + if (pkt_len < ETH_HLEN) + return; + + proto = (((uint16_t) pkt[12]) << 8) + pkt[13]; + switch(proto) { + case ETH_P_ARP: + arp_input(slirp, pkt, pkt_len); + break; + case ETH_P_IP: + case ETH_P_IPV6: + m = m_get(slirp); + if (!m) + return; + /* Note: we add 2 to align the IP header on 4 bytes, + * and add the margin for the tcpiphdr overhead */ + if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { + m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); + } + m->m_len = pkt_len + TCPIPHDR_DELTA + 2; + memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); + + m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; + m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; + + if (proto == ETH_P_IP) { + ip_input(m); + } else if (proto == ETH_P_IPV6) { + ip6_input(m); + } + break; + + case ETH_P_NCSI: + ncsi_input(slirp, pkt, pkt_len); + break; + + default: + break; + } +} + +/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no + * packet should be sent, 0 if the packet must be re-queued, 2 if the packet + * is ready to go. + */ +static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, + uint8_t ethaddr[ETH_ALEN]) +{ + const struct ip *iph = (const struct ip *)ifm->m_data; + + if (iph->ip_dst.s_addr == 0) { + /* 0.0.0.0 can not be a destination address, something went wrong, + * avoid making it worse */ + return 1; + } + if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { + uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; + struct ethhdr *reh = (struct ethhdr *)arp_req; + struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); + + if (!ifm->resolution_requested) { + /* If the client addr is not known, send an ARP request */ + memset(reh->h_dest, 0xff, ETH_ALEN); + memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); + memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); + reh->h_proto = htons(ETH_P_ARP); + rah->ar_hrd = htons(1); + rah->ar_pro = htons(ETH_P_IP); + rah->ar_hln = ETH_ALEN; + rah->ar_pln = 4; + rah->ar_op = htons(ARPOP_REQUEST); + + /* source hw addr */ + memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); + memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); + + /* source IP */ + rah->ar_sip = slirp->vhost_addr.s_addr; + + /* target hw addr (none) */ + memset(rah->ar_tha, 0, ETH_ALEN); + + /* target IP */ + rah->ar_tip = iph->ip_dst.s_addr; + slirp->client_ipaddr = iph->ip_dst; + slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); + ifm->resolution_requested = true; + + /* Expire request and drop outgoing packet after 1 second */ + ifm->expiration_date = + slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; + } + return 0; + } else { + memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); + /* XXX: not correct */ + memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); + eh->h_proto = htons(ETH_P_IP); + + /* Send this */ + return 2; + } +} + +/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no + * packet should be sent, 0 if the packet must be re-queued, 2 if the packet + * is ready to go. + */ +static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, + uint8_t ethaddr[ETH_ALEN]) +{ + const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); + if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { + if (!ifm->resolution_requested) { + ndp_send_ns(slirp, ip6h->ip_dst); + ifm->resolution_requested = true; + ifm->expiration_date = slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; + } + return 0; + } else { + eh->h_proto = htons(ETH_P_IPV6); + in6_compute_ethaddr(ip6h->ip_src, eh->h_source); + + /* Send this */ + return 2; + } +} + +/* Output the IP packet to the ethernet device. Returns 0 if the packet must be + * re-queued. + */ +int if_encap(Slirp *slirp, struct mbuf *ifm) +{ + uint8_t buf[1600]; + struct ethhdr *eh = (struct ethhdr *)buf; + uint8_t ethaddr[ETH_ALEN]; + const struct ip *iph = (const struct ip *)ifm->m_data; + int ret; + + if (ifm->m_len + ETH_HLEN > sizeof(buf)) { + return 1; + } + + switch (iph->ip_v) { + case IPVERSION: + ret = if_encap4(slirp, ifm, eh, ethaddr); + if (ret < 2) { + return ret; + } + break; + + case IP6VERSION: + ret = if_encap6(slirp, ifm, eh, ethaddr); + if (ret < 2) { + return ret; + } + break; + + default: + g_assert_not_reached(); + break; + } + + memcpy(eh->h_dest, ethaddr, ETH_ALEN); + DEBUG_ARG("src = %02x:%02x:%02x:%02x:%02x:%02x", + eh->h_source[0], eh->h_source[1], eh->h_source[2], + eh->h_source[3], eh->h_source[4], eh->h_source[5]); + DEBUG_ARG("dst = %02x:%02x:%02x:%02x:%02x:%02x", + eh->h_dest[0], eh->h_dest[1], eh->h_dest[2], + eh->h_dest[3], eh->h_dest[4], eh->h_dest[5]); + memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); + slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); + return 1; +} + +/* Drop host forwarding rule, return 0 if found. */ +/* TODO: IPv6 */ +int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, + int host_port) +{ + struct socket *so; + struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); + struct sockaddr_in addr; + int port = htons(host_port); + socklen_t addr_len; + + for (so = head->so_next; so != head; so = so->so_next) { + addr_len = sizeof(addr); + if ((so->so_state & SS_HOSTFWD) && + getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && + addr.sin_addr.s_addr == host_addr.s_addr && + addr.sin_port == port) { + so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); + closesocket(so->s); + sofree(so); + return 0; + } + } + + return -1; +} + +/* TODO: IPv6 */ +int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, + int host_port, struct in_addr guest_addr, int guest_port) +{ + if (!guest_addr.s_addr) { + guest_addr = slirp->vdhcp_startaddr; + } + if (is_udp) { + if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), + guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) + return -1; + } else { + if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), + guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) + return -1; + } + return 0; +} + +/* TODO: IPv6 */ +static bool +check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, int guest_port) +{ + struct gfwd_list *tmp_ptr; + + if (!guest_addr->s_addr) { + guest_addr->s_addr = slirp->vnetwork_addr.s_addr | + (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); + } + if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != + slirp->vnetwork_addr.s_addr || + guest_addr->s_addr == slirp->vhost_addr.s_addr || + guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { + return false; + } + + /* check if the port is "bound" */ + for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { + if (guest_port == tmp_ptr->ex_fport && + guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) + return false; + } + + return true; +} + +int slirp_add_exec(Slirp *slirp, const char *cmdline, + struct in_addr *guest_addr, int guest_port) +{ + if (!check_guestfwd(slirp, guest_addr, guest_port)) { + return -1; + } + + add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); + return 0; +} + +int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, + struct in_addr *guest_addr, int guest_port) +{ + if (!check_guestfwd(slirp, guest_addr, guest_port)) { + return -1; + } + + add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, + *guest_addr, htons(guest_port)); + return 0; +} + +ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) +{ + if (so->s == -1 && so->guestfwd) { + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); + return len; + } + + if (so->s == -1) { + /* + * This should in theory not happen but it is hard to be + * sure because some code paths will end up with so->s == -1 + * on a failure but don't dispose of the struct socket. + * Check specifically, so we don't pass -1 to send(). + */ + errno = EBADF; + return -1; + } + + return send(so->s, buf, len, flags); +} + +struct socket * +slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, int guest_port) +{ + struct socket *so; + + /* TODO: IPv6 */ + for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { + if (so->so_faddr.s_addr == guest_addr.s_addr && + htons(so->so_fport) == guest_port) { + return so; + } + } + return NULL; +} + +size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port) +{ + struct iovec iov[2]; + struct socket *so; + + so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); + + if (!so || so->so_state & SS_NOFDREF) { + return 0; + } + + if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen/2)) { + return 0; + } + + return sopreprbuf(so, iov, NULL); +} + +void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, + const uint8_t *buf, int size) +{ + int ret; + struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); + + if (!so) + return; + + ret = soreadbuf(so, (const char *)buf, size); + + if (ret > 0) + tcp_output(sototcpcb(so)); +} + +void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) +{ + ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); + + if (ret < 0) { + g_critical("Failed to send packet, ret: %ld", (long) ret); + } else if (ret < len) { + DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", + (long) ret, (unsigned long) len); + } +} diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h new file mode 100644 index 0000000000..8068ba1d1e --- /dev/null +++ b/slirp/src/slirp.h @@ -0,0 +1,274 @@ +#ifndef SLIRP_H +#define SLIRP_H + +#ifdef _WIN32 + +/* as defined in sdkddkver.h */ +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 /* Vista */ +#endif +/* reduces the number of implicitly included headers */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +# include <winsock2.h> +# include <windows.h> +# include <ws2tcpip.h> +# include <sys/timeb.h> +# include <iphlpapi.h> + +#else +# if !defined(__HAIKU__) +# define O_BINARY 0 +# endif +#endif + +#ifndef _WIN32 +#include <sys/uio.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#endif + +#ifdef __APPLE__ +# include <sys/filio.h> +#endif + +/* Avoid conflicting with the libc insque() and remque(), which + have different prototypes. */ +#define insque slirp_insque +#define remque slirp_remque +#define quehead slirp_quehead + +#include "debug.h" +#include "util.h" +#include "qtailq.h" + +#include "libslirp.h" +#include "ip.h" +#include "ip6.h" +#include "tcp.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "udp.h" +#include "ip_icmp.h" +#include "ip6_icmp.h" +#include "mbuf.h" +#include "sbuf.h" +#include "socket.h" +#include "if.h" +#include "main.h" +#include "misc.h" + +#include "bootp.h" +#include "tftp.h" + +#define ARPOP_REQUEST 1 /* ARP request */ +#define ARPOP_REPLY 2 /* ARP reply */ + +struct ethhdr { + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + unsigned short h_proto; /* packet type ID field */ +}; + +struct slirp_arphdr { + unsigned short ar_hrd; /* format of hardware address */ + unsigned short ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + unsigned short ar_op; /* ARP opcode (command) */ + + /* + * Ethernet looks like this : This bit is variable sized however... + */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + uint32_t ar_sip; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + uint32_t ar_tip; /* target IP address */ +} SLIRP_PACKED; + +#define ARP_TABLE_SIZE 16 + +typedef struct ArpTable { + struct slirp_arphdr table[ARP_TABLE_SIZE]; + int next_victim; +} ArpTable; + +void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); + +bool arp_table_search(Slirp *slirp, uint32_t ip_addr, + uint8_t out_ethaddr[ETH_ALEN]); + +struct ndpentry { + unsigned char eth_addr[ETH_ALEN]; /* sender hardware address */ + struct in6_addr ip_addr; /* sender IP address */ +}; + +#define NDP_TABLE_SIZE 16 + +typedef struct NdpTable { + struct ndpentry table[NDP_TABLE_SIZE]; + int next_victim; +} NdpTable; + +void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, + uint8_t ethaddr[ETH_ALEN]); +bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, + uint8_t out_ethaddr[ETH_ALEN]); + +struct Slirp { + QTAILQ_ENTRY(Slirp) entry; + unsigned time_fasttimo; + unsigned last_slowtimo; + bool do_slowtimo; + + bool in_enabled, in6_enabled; + + /* virtual network configuration */ + struct in_addr vnetwork_addr; + struct in_addr vnetwork_mask; + struct in_addr vhost_addr; + struct in6_addr vprefix_addr6; + uint8_t vprefix_len; + struct in6_addr vhost_addr6; + struct in_addr vdhcp_startaddr; + struct in_addr vnameserver_addr; + struct in6_addr vnameserver_addr6; + + struct in_addr client_ipaddr; + char client_hostname[33]; + + int restricted; + struct gfwd_list *guestfwd_list; + + /* mbuf states */ + struct quehead m_freelist; + struct quehead m_usedlist; + int mbuf_alloced; + + /* if states */ + struct quehead if_fastq; /* fast queue (for interactive data) */ + struct quehead if_batchq; /* queue for non-interactive data */ + bool if_start_busy; /* avoid if_start recursion */ + + /* ip states */ + struct ipq ipq; /* ip reass. queue */ + uint16_t ip_id; /* ip packet ctr, for ids */ + + /* bootp/dhcp states */ + BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; + char *bootp_filename; + size_t vdnssearch_len; + uint8_t *vdnssearch; + char *vdomainname; + + /* tcp states */ + struct socket tcb; + struct socket *tcp_last_so; + tcp_seq tcp_iss; /* tcp initial send seq # */ + uint32_t tcp_now; /* for RFC 1323 timestamps */ + + /* udp states */ + struct socket udb; + struct socket *udp_last_so; + + /* icmp states */ + struct socket icmp; + struct socket *icmp_last_so; + + /* tftp states */ + char *tftp_prefix; + struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; + char *tftp_server_name; + + ArpTable arp_table; + NdpTable ndp_table; + + GRand *grand; + void *ra_timer; + + const SlirpCb *cb; + void *opaque; +}; + +void if_start(Slirp *); + +int get_dns_addr(struct in_addr *pdns_addr); +int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); + +/* ncsi.c */ +void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); + +#ifndef _WIN32 +#include <netdb.h> +#endif + + +extern bool slirp_do_keepalive; + +#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) + +/* dnssearch.c */ +int translate_dnssearch(Slirp *s, const char ** names); + +/* cksum.c */ +int cksum(struct mbuf *m, int len); +int ip6_cksum(struct mbuf *m); + +/* if.c */ +void if_init(Slirp *); +void if_output(struct socket *, struct mbuf *); + +/* ip_input.c */ +void ip_init(Slirp *); +void ip_cleanup(Slirp *); +void ip_input(struct mbuf *); +void ip_slowtimo(Slirp *); +void ip_stripoptions(register struct mbuf *, struct mbuf *); + +/* ip_output.c */ +int ip_output(struct socket *, struct mbuf *); + +/* ip6_input.c */ +void ip6_init(Slirp *); +void ip6_cleanup(Slirp *); +void ip6_input(struct mbuf *); + +/* ip6_output */ +int ip6_output(struct socket *, struct mbuf *, int fast); + +/* tcp_input.c */ +void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); +int tcp_mss(register struct tcpcb *, unsigned); + +/* tcp_output.c */ +int tcp_output(register struct tcpcb *); +void tcp_setpersist(register struct tcpcb *); + +/* tcp_subr.c */ +void tcp_init(Slirp *); +void tcp_cleanup(Slirp *); +void tcp_template(struct tcpcb *); +void tcp_respond(struct tcpcb *, register struct tcpiphdr *, + register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); +struct tcpcb * tcp_newtcpcb(struct socket *); +struct tcpcb * tcp_close(register struct tcpcb *); +void tcp_sockclosed(struct tcpcb *); +int tcp_fconnect(struct socket *, unsigned short af); +void tcp_connect(struct socket *); +int tcp_attach(struct socket *); +uint8_t tcp_tos(struct socket *); +int tcp_emu(struct socket *, struct mbuf *); +int tcp_ctl(struct socket *); +struct tcpcb *tcp_drop(struct tcpcb *tp, int err); + +struct socket * +slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, int guest_port); + +void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); + +#endif diff --git a/slirp/src/socket.c b/slirp/src/socket.c new file mode 100644 index 0000000000..f2428a3ae8 --- /dev/null +++ b/slirp/src/socket.c @@ -0,0 +1,944 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" +#include "ip_icmp.h" +#ifdef __sun__ +#include <sys/filio.h> +#endif + +static void sofcantrcvmore(struct socket *so); +static void sofcantsendmore(struct socket *so); + +struct socket *solookup(struct socket **last, struct socket *head, + struct sockaddr_storage *lhost, struct sockaddr_storage *fhost) +{ + struct socket *so = *last; + + /* Optimisation */ + if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) + && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { + return so; + } + + for (so = head->so_next; so != head; so = so->so_next) { + if (sockaddr_equal(&(so->lhost.ss), lhost) + && (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { + *last = so; + return so; + } + } + + return (struct socket *)NULL; +} + +/* + * Create a new socket, initialise the fields + * It is the responsibility of the caller to + * insque() it into the correct linked-list + */ +struct socket * +socreate(Slirp *slirp) +{ + struct socket *so = g_new(struct socket, 1); + + memset(so, 0, sizeof(struct socket)); + so->so_state = SS_NOFDREF; + so->s = -1; + so->slirp = slirp; + so->pollfds_idx = -1; + + return so; +} + +/* + * Remove references to so from the given message queue. + */ +static void +soqfree(struct socket *so, struct quehead *qh) +{ + struct mbuf *ifq; + + for (ifq = (struct mbuf *) qh->qh_link; + (struct quehead *) ifq != qh; + ifq = ifq->ifq_next) { + if (ifq->ifq_so == so) { + struct mbuf *ifm; + ifq->ifq_so = NULL; + for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { + ifm->ifq_so = NULL; + } + } + } +} + +/* + * remque and free a socket, clobber cache + */ +void +sofree(struct socket *so) +{ + Slirp *slirp = so->slirp; + + soqfree(so, &slirp->if_fastq); + soqfree(so, &slirp->if_batchq); + + if (so == slirp->tcp_last_so) { + slirp->tcp_last_so = &slirp->tcb; + } else if (so == slirp->udp_last_so) { + slirp->udp_last_so = &slirp->udb; + } else if (so == slirp->icmp_last_so) { + slirp->icmp_last_so = &slirp->icmp; + } + m_free(so->so_m); + + if(so->so_next && so->so_prev) + remque(so); /* crashes if so is not in a queue */ + + if (so->so_tcpcb) { + free(so->so_tcpcb); + } + g_free(so); +} + +size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) +{ + int n, lss, total; + struct sbuf *sb = &so->so_snd; + int len = sb->sb_datalen - sb->sb_cc; + int mss = so->so_tcpcb->t_maxseg; + + DEBUG_CALL("sopreprbuf"); + DEBUG_ARG("so = %p", so); + + if (len <= 0) + return 0; + + iov[0].iov_base = sb->sb_wptr; + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + if (sb->sb_wptr < sb->sb_rptr) { + iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) + iov[0].iov_len = len; + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } else { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_rptr - sb->sb_data; + if(iov[1].iov_len > len) + iov[1].iov_len = len; + total = iov[0].iov_len + iov[1].iov_len; + if (total > mss) { + lss = total%mss; + if (iov[1].iov_len > lss) { + iov[1].iov_len -= lss; + n = 2; + } else { + lss -= iov[1].iov_len; + iov[0].iov_len -= lss; + n = 1; + } + } else + n = 2; + } else { + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } + } + if (np) + *np = n; + + return iov[0].iov_len + (n - 1) * iov[1].iov_len; +} + +/* + * Read from so's socket into sb_snd, updating all relevant sbuf fields + * NOTE: This will only be called if it is select()ed for reading, so + * a read() of 0 (or less) means it's disconnected + */ +int +soread(struct socket *so) +{ + int n, nn; + struct sbuf *sb = &so->so_snd; + struct iovec iov[2]; + + DEBUG_CALL("soread"); + DEBUG_ARG("so = %p", so); + + /* + * No need to check if there's enough room to read. + * soread wouldn't have been called if there weren't + */ + sopreprbuf(so, iov, &n); + + nn = recv(so->s, iov[0].iov_base, iov[0].iov_len,0); + if (nn <= 0) { + if (nn < 0 && (errno == EINTR || errno == EAGAIN)) + return 0; + else { + int err; + socklen_t elen = sizeof err; + struct sockaddr_storage addr; + struct sockaddr *paddr = (struct sockaddr *) &addr; + socklen_t alen = sizeof addr; + + err = errno; + if (nn == 0) { + if (getpeername(so->s, paddr, &alen) < 0) { + err = errno; + } else { + getsockopt(so->s, SOL_SOCKET, SO_ERROR, + &err, &elen); + } + } + + DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", + nn, errno,strerror(errno)); + sofcantrcvmore(so); + + if (err == ECONNRESET || err == ECONNREFUSED + || err == ENOTCONN || err == EPIPE) { + tcp_drop(sototcpcb(so), err); + } else { + tcp_sockclosed(sototcpcb(so)); + } + return -1; + } + } + + /* + * If there was no error, try and read the second time round + * We read again if n = 2 (ie, there's another part of the buffer) + * and we read as much as we could in the first read + * We don't test for <= 0 this time, because there legitimately + * might not be any more data (since the socket is non-blocking), + * a close will be detected on next iteration. + * A return of -1 won't (shouldn't) happen, since it didn't happen above + */ + if (n == 2 && nn == iov[0].iov_len) { + int ret; + ret = recv(so->s, iov[1].iov_base, iov[1].iov_len,0); + if (ret > 0) + nn += ret; + } + + DEBUG_MISC(" ... read nn = %d bytes", nn); + + /* Update fields */ + sb->sb_cc += nn; + sb->sb_wptr += nn; + if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_wptr -= sb->sb_datalen; + return nn; +} + +int soreadbuf(struct socket *so, const char *buf, int size) +{ + int n, nn, copy = size; + struct sbuf *sb = &so->so_snd; + struct iovec iov[2]; + + DEBUG_CALL("soreadbuf"); + DEBUG_ARG("so = %p", so); + + /* + * No need to check if there's enough room to read. + * soread wouldn't have been called if there weren't + */ + if (sopreprbuf(so, iov, &n) < size) + goto err; + + nn = MIN(iov[0].iov_len, copy); + memcpy(iov[0].iov_base, buf, nn); + + copy -= nn; + buf += nn; + + if (copy == 0) + goto done; + + memcpy(iov[1].iov_base, buf, copy); + +done: + /* Update fields */ + sb->sb_cc += size; + sb->sb_wptr += size; + if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_wptr -= sb->sb_datalen; + return size; +err: + + sofcantrcvmore(so); + tcp_sockclosed(sototcpcb(so)); + g_critical("soreadbuf buffer too small"); + return -1; +} + +/* + * Get urgent data + * + * When the socket is created, we set it SO_OOBINLINE, + * so when OOB data arrives, we soread() it and everything + * in the send buffer is sent as urgent data + */ +int +sorecvoob(struct socket *so) +{ + struct tcpcb *tp = sototcpcb(so); + int ret; + + DEBUG_CALL("sorecvoob"); + DEBUG_ARG("so = %p", so); + + /* + * We take a guess at how much urgent data has arrived. + * In most situations, when urgent data arrives, the next + * read() should get all the urgent data. This guess will + * be wrong however if more data arrives just after the + * urgent data, or the read() doesn't return all the + * urgent data. + */ + ret = soread(so); + if (ret > 0) { + tp->snd_up = tp->snd_una + so->so_snd.sb_cc; + tp->t_force = 1; + tcp_output(tp); + tp->t_force = 0; + } + + return ret; +} + +/* + * Send urgent data + * There's a lot duplicated code here, but... + */ +int +sosendoob(struct socket *so) +{ + struct sbuf *sb = &so->so_rcv; + char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ + + int n; + + DEBUG_CALL("sosendoob"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); + + if (so->so_urgc > 2048) + so->so_urgc = 2048; /* XXXX */ + + if (sb->sb_rptr < sb->sb_wptr) { + /* We can send it directly */ + n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */ + } else { + /* + * Since there's no sendv or sendtov like writev, + * we must copy all data to a linear buffer then + * send it all + */ + uint32_t urgc = so->so_urgc; + int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (len > urgc) { + len = urgc; + } + memcpy(buff, sb->sb_rptr, len); + urgc -= len; + if (urgc) { + n = sb->sb_wptr - sb->sb_data; + if (n > urgc) { + n = urgc; + } + memcpy((buff + len), sb->sb_data, n); + len += n; + } + n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ +#ifdef DEBUG + if (n != len) { + DEBUG_ERROR("Didn't send all data urgently XXXXX"); + } +#endif + } + + if (n < 0) { + return n; + } + so->so_urgc -= n; + DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, so->so_urgc); + + sb->sb_cc -= n; + sb->sb_rptr += n; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + return n; +} + +/* + * Write data from so_rcv to so's socket, + * updating all sbuf field as necessary + */ +int +sowrite(struct socket *so) +{ + int n,nn; + struct sbuf *sb = &so->so_rcv; + int len = sb->sb_cc; + struct iovec iov[2]; + + DEBUG_CALL("sowrite"); + DEBUG_ARG("so = %p", so); + + if (so->so_urgc) { + uint32_t expected = so->so_urgc; + if (sosendoob(so) < expected) { + /* Treat a short write as a fatal error too, + * rather than continuing on and sending the urgent + * data as if it were non-urgent and leaving the + * so_urgc count wrong. + */ + goto err_disconnected; + } + if (sb->sb_cc == 0) + return 0; + } + + /* + * No need to check if there's something to write, + * sowrite wouldn't have been called otherwise + */ + + iov[0].iov_base = sb->sb_rptr; + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + if (sb->sb_rptr < sb->sb_wptr) { + iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) iov[0].iov_len = len; + n = 1; + } else { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (iov[0].iov_len > len) iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_wptr - sb->sb_data; + if (iov[1].iov_len > len) iov[1].iov_len = len; + n = 2; + } else + n = 1; + } + /* Check if there's urgent data to send, and if so, send it */ + + nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0); + /* This should never happen, but people tell me it does *shrug* */ + if (nn < 0 && (errno == EAGAIN || errno == EINTR)) + return 0; + + if (nn <= 0) { + goto err_disconnected; + } + + if (n == 2 && nn == iov[0].iov_len) { + int ret; + ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0); + if (ret > 0) + nn += ret; + } + DEBUG_MISC(" ... wrote nn = %d bytes", nn); + + /* Update sbuf */ + sb->sb_cc -= nn; + sb->sb_rptr += nn; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + /* + * If in DRAIN mode, and there's no more data, set + * it CANTSENDMORE + */ + if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) + sofcantsendmore(so); + + return nn; + +err_disconnected: + DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", + so->so_state, errno); + sofcantsendmore(so); + tcp_sockclosed(sototcpcb(so)); + return -1; +} + +/* + * recvfrom() a UDP socket + */ +void +sorecvfrom(struct socket *so) +{ + struct sockaddr_storage addr; + struct sockaddr_storage saddr, daddr; + socklen_t addrlen = sizeof(struct sockaddr_storage); + + DEBUG_CALL("sorecvfrom"); + DEBUG_ARG("so = %p", so); + + if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ + char buff[256]; + int len; + + len = recvfrom(so->s, buff, 256, 0, + (struct sockaddr *)&addr, &addrlen); + /* XXX Check if reply is "correct"? */ + + if(len == -1 || len == 0) { + uint8_t code=ICMP_UNREACH_PORT; + + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; + + DEBUG_MISC(" udp icmp rx errno = %d-%s", + errno,strerror(errno)); + icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); + } else { + icmp_reflect(so->so_m); + so->so_m = NULL; /* Don't m_free() it again! */ + } + /* No need for this socket anymore, udp_detach it */ + udp_detach(so); + } else { /* A "normal" UDP packet */ + struct mbuf *m; + int len; +#ifdef _WIN32 + unsigned long n; +#else + int n; +#endif + + if (ioctlsocket(so->s, FIONREAD, &n) != 0) { + DEBUG_MISC(" ioctlsocket errno = %d-%s\n", + errno,strerror(errno)); + return; + } + if (n == 0) { + return; + } + + m = m_get(so->slirp); + if (!m) { + return; + } + switch (so->so_ffamily) { + case AF_INET: + m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); + break; + case AF_INET6: + m->m_data += IF_MAXLINKHDR + sizeof(struct ip6) + + sizeof(struct udphdr); + break; + default: + g_assert_not_reached(); + break; + } + + /* + * XXX Shouldn't FIONREAD packets destined for port 53, + * but I don't know the max packet size for DNS lookups + */ + len = M_FREEROOM(m); + /* if (so->so_fport != htons(53)) { */ + + if (n > len) { + n = (m->m_data - m->m_dat) + m->m_len + n + 1; + m_inc(m, n); + len = M_FREEROOM(m); + } + /* } */ + + m->m_len = recvfrom(so->s, m->m_data, len, 0, + (struct sockaddr *)&addr, &addrlen); + DEBUG_MISC(" did recvfrom %d, errno = %d-%s", + m->m_len, errno,strerror(errno)); + if(m->m_len<0) { + /* Report error as ICMP */ + switch (so->so_lfamily) { + uint8_t code; + case AF_INET: + code = ICMP_UNREACH_PORT; + + if (errno == EHOSTUNREACH) { + code = ICMP_UNREACH_HOST; + } else if (errno == ENETUNREACH) { + code = ICMP_UNREACH_NET; + } + + DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); + icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); + break; + case AF_INET6: + code = ICMP6_UNREACH_PORT; + + if (errno == EHOSTUNREACH) { + code = ICMP6_UNREACH_ADDRESS; + } else if (errno == ENETUNREACH) { + code = ICMP6_UNREACH_NO_ROUTE; + } + + DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); + icmp6_send_error(so->so_m, ICMP6_UNREACH, code); + break; + default: + g_assert_not_reached(); + break; + } + m_free(m); + } else { + /* + * Hack: domain name lookup will be used the most for UDP, + * and since they'll only be used once there's no need + * for the 4 minute (or whatever) timeout... So we time them + * out much quicker (10 seconds for now...) + */ + if (so->so_expire) { + if (so->so_fport == htons(53)) + so->so_expire = curtime + SO_EXPIREFAST; + else + so->so_expire = curtime + SO_EXPIRE; + } + + /* + * If this packet was destined for CTL_ADDR, + * make it look like that's where it came from + */ + saddr = addr; + sotranslate_in(so, &saddr); + daddr = so->lhost.ss; + + switch (so->so_ffamily) { + case AF_INET: + udp_output(so, m, (struct sockaddr_in *) &saddr, + (struct sockaddr_in *) &daddr, + so->so_iptos); + break; + case AF_INET6: + udp6_output(so, m, (struct sockaddr_in6 *) &saddr, + (struct sockaddr_in6 *) &daddr); + break; + default: + g_assert_not_reached(); + break; + } + } /* rx error */ + } /* if ping packet */ +} + +/* + * sendto() a socket + */ +int +sosendto(struct socket *so, struct mbuf *m) +{ + int ret; + struct sockaddr_storage addr; + + DEBUG_CALL("sosendto"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); + + addr = so->fhost.ss; + DEBUG_CALL(" sendto()ing)"); + sotranslate_out(so, &addr); + + /* Don't care what port we get */ + ret = sendto(so->s, m->m_data, m->m_len, 0, + (struct sockaddr *)&addr, sockaddr_size(&addr)); + if (ret < 0) + return -1; + + /* + * Kill the socket if there's no reply in 4 minutes, + * but only if it's an expirable socket + */ + if (so->so_expire) + so->so_expire = curtime + SO_EXPIRE; + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ + return 0; +} + +/* + * Listen for incoming TCP connections + */ +struct socket * +tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, uint32_t laddr, + unsigned lport, int flags) +{ + /* TODO: IPv6 */ + struct sockaddr_in addr; + struct socket *so; + int s, opt = 1; + socklen_t addrlen = sizeof(addr); + memset(&addr, 0, addrlen); + + DEBUG_CALL("tcp_listen"); + DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){.s_addr = haddr})); + DEBUG_ARG("hport = %d", ntohs(hport)); + DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){.s_addr = laddr})); + DEBUG_ARG("lport = %d", ntohs(lport)); + DEBUG_ARG("flags = %x", flags); + + so = socreate(slirp); + + /* Don't tcp_attach... we don't need so_snd nor so_rcv */ + if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { + g_free(so); + return NULL; + } + insque(so, &slirp->tcb); + + /* + * SS_FACCEPTONCE sockets must time out. + */ + if (flags & SS_FACCEPTONCE) + so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2; + + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= (SS_FACCEPTCONN | flags); + so->so_lfamily = AF_INET; + so->so_lport = lport; /* Kept in network format */ + so->so_laddr.s_addr = laddr; /* Ditto */ + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = haddr; + addr.sin_port = hport; + + if (((s = slirp_socket(AF_INET,SOCK_STREAM,0)) < 0) || + (slirp_socket_set_fast_reuse(s) < 0) || + (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) || + (listen(s,1) < 0)) { + int tmperrno = errno; /* Don't clobber the real reason we failed */ + + if (s >= 0) { + closesocket(s); + } + sofree(so); + /* Restore the real errno */ +#ifdef _WIN32 + WSASetLastError(tmperrno); +#else + errno = tmperrno; +#endif + return NULL; + } + setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); + opt = 1; + setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int)); + + getsockname(s,(struct sockaddr *)&addr,&addrlen); + so->so_ffamily = AF_INET; + so->so_fport = addr.sin_port; + if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr) + so->so_faddr = slirp->vhost_addr; + else + so->so_faddr = addr.sin_addr; + + so->s = s; + return so; +} + +/* + * Various session state calls + * XXX Should be #define's + * The socket state stuff needs work, these often get call 2 or 3 + * times each when only 1 was needed + */ +void +soisfconnecting(struct socket *so) +{ + so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE| + SS_FCANTSENDMORE|SS_FWDRAIN); + so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ +} + +void +soisfconnected(struct socket *so) +{ + so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF); + so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ +} + +static void +sofcantrcvmore(struct socket *so) +{ + if ((so->so_state & SS_NOFDREF) == 0) { + shutdown(so->s,0); + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTSENDMORE) { + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; /* Don't select it */ + } else { + so->so_state |= SS_FCANTRCVMORE; + } +} + +static void +sofcantsendmore(struct socket *so) +{ + if ((so->so_state & SS_NOFDREF) == 0) { + shutdown(so->s,1); /* send FIN to fhost */ + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTRCVMORE) { + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; /* as above */ + } else { + so->so_state |= SS_FCANTSENDMORE; + } +} + +/* + * Set write drain mode + * Set CANTSENDMORE once all data has been write()n + */ +void +sofwdrain(struct socket *so) +{ + if (so->so_rcv.sb_cc) + so->so_state |= SS_FWDRAIN; + else + sofcantsendmore(so); +} + +/* + * Translate addr in host addr when it is a virtual address + */ +void sotranslate_out(struct socket *so, struct sockaddr_storage *addr) +{ + Slirp *slirp = so->slirp; + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + switch (addr->ss_family) { + case AF_INET: + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + /* It's an alias */ + if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { + if (get_dns_addr(&sin->sin_addr) < 0) { + sin->sin_addr = loopback_addr; + } + } else { + sin->sin_addr = loopback_addr; + } + } + + DEBUG_MISC(" addr.sin_port=%d, addr.sin_addr.s_addr=%.16s", + ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); + break; + + case AF_INET6: + if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, + slirp->vprefix_len)) { + if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) { + uint32_t scope_id; + if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) { + sin6->sin6_scope_id = scope_id; + } else { + sin6->sin6_addr = in6addr_loopback; + } + } else { + sin6->sin6_addr = in6addr_loopback; + } + } + break; + + default: + break; + } +} + +void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) +{ + Slirp *slirp = so->slirp; + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + switch (addr->ss_family) { + case AF_INET: + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; + + if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { + sin->sin_addr = slirp->vhost_addr; + } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || + so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { + sin->sin_addr = so->so_faddr; + } + } + break; + + case AF_INET6: + if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, + slirp->vprefix_len)) { + if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) + || !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { + sin6->sin6_addr = so->so_faddr6; + } + } + break; + + default: + break; + } +} + +/* + * Translate connections from localhost to the real hostname + */ +void sotranslate_accept(struct socket *so) +{ + Slirp *slirp = so->slirp; + + switch (so->so_ffamily) { + case AF_INET: + if (so->so_faddr.s_addr == INADDR_ANY || + (so->so_faddr.s_addr & loopback_mask) == + (loopback_addr.s_addr & loopback_mask)) { + so->so_faddr = slirp->vhost_addr; + } + break; + + case AF_INET6: + if (in6_equal(&so->so_faddr6, &in6addr_any) || + in6_equal(&so->so_faddr6, &in6addr_loopback)) { + so->so_faddr6 = slirp->vhost_addr6; + } + break; + + default: + break; + } +} + +void sodrop(struct socket *s, int num) +{ + if (sbdrop(&s->so_snd, num)) { + s->slirp->cb->notify(s->slirp->opaque); + } +} diff --git a/slirp/src/socket.h b/slirp/src/socket.h new file mode 100644 index 0000000000..e4d12cd591 --- /dev/null +++ b/slirp/src/socket.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef SLIRP_SOCKET_H +#define SLIRP_SOCKET_H + +#include "misc.h" + +#define SO_EXPIRE 240000 +#define SO_EXPIREFAST 10000 + +/* + * Our socket structure + */ + +union slirp_sockaddr { + struct sockaddr_storage ss; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + +struct socket { + struct socket *so_next,*so_prev; /* For a linked list of sockets */ + + int s; /* The actual socket */ + struct gfwd_list *guestfwd; + + int pollfds_idx; /* GPollFD GArray index */ + + Slirp *slirp; /* managing slirp instance */ + + /* XXX union these with not-yet-used sbuf params */ + struct mbuf *so_m; /* Pointer to the original SYN packet, + * for non-blocking connect()'s, and + * PING reply's */ + struct tcpiphdr *so_ti; /* Pointer to the original ti within + * so_mconn, for non-blocking connections */ + uint32_t so_urgc; + union slirp_sockaddr fhost; /* Foreign host */ +#define so_faddr fhost.sin.sin_addr +#define so_fport fhost.sin.sin_port +#define so_faddr6 fhost.sin6.sin6_addr +#define so_fport6 fhost.sin6.sin6_port +#define so_ffamily fhost.ss.ss_family + + union slirp_sockaddr lhost; /* Local host */ +#define so_laddr lhost.sin.sin_addr +#define so_lport lhost.sin.sin_port +#define so_laddr6 lhost.sin6.sin6_addr +#define so_lport6 lhost.sin6.sin6_port +#define so_lfamily lhost.ss.ss_family + + uint8_t so_iptos; /* Type of service */ + uint8_t so_emu; /* Is the socket emulated? */ + + uint8_t so_type; /* Type of socket, UDP or TCP */ + int32_t so_state; /* internal state flags SS_*, below */ + + struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ + unsigned so_expire; /* When the socket will expire */ + + int so_queued; /* Number of packets queued from this socket */ + int so_nqueued; /* Number of packets queued in a row + * Used to determine when to "downgrade" a session + * from fastq to batchq */ + + struct sbuf so_rcv; /* Receive buffer */ + struct sbuf so_snd; /* Send buffer */ +}; + + +/* + * Socket state bits. (peer means the host on the Internet, + * local host means the host on the other end of the modem) + */ +#define SS_NOFDREF 0x001 /* No fd reference */ + +#define SS_ISFCONNECTING 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ +#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ +#define SS_FCANTRCVMORE 0x008 /* Socket can't receive more from peer (for half-closes) */ +#define SS_FCANTSENDMORE 0x010 /* Socket can't send more to peer (for half-closes) */ +#define SS_FWDRAIN 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ + +#define SS_CTL 0x080 +#define SS_FACCEPTCONN 0x100 /* Socket is accepting connections from a host on the internet */ +#define SS_FACCEPTONCE 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ + +#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ +#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ +#define SS_INCOMING 0x2000 /* Connection was initiated by a host on the internet */ + +static inline int sockaddr_equal(struct sockaddr_storage *a, + struct sockaddr_storage *b) +{ + if (a->ss_family != b->ss_family) { + return 0; + } + + switch (a->ss_family) { + case AF_INET: + { + struct sockaddr_in *a4 = (struct sockaddr_in *) a; + struct sockaddr_in *b4 = (struct sockaddr_in *) b; + return a4->sin_addr.s_addr == b4->sin_addr.s_addr + && a4->sin_port == b4->sin_port; + } + case AF_INET6: + { + struct sockaddr_in6 *a6 = (struct sockaddr_in6 *) a; + struct sockaddr_in6 *b6 = (struct sockaddr_in6 *) b; + return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) + && a6->sin6_port == b6->sin6_port); + } + default: + g_assert_not_reached(); + } + + return 0; +} + +static inline socklen_t sockaddr_size(struct sockaddr_storage *a) +{ + switch (a->ss_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + default: + g_assert_not_reached(); + } +} + +struct socket *solookup(struct socket **, struct socket *, + struct sockaddr_storage *, struct sockaddr_storage *); +struct socket *socreate(Slirp *); +void sofree(struct socket *); +int soread(struct socket *); +int sorecvoob(struct socket *); +int sosendoob(struct socket *); +int sowrite(struct socket *); +void sorecvfrom(struct socket *); +int sosendto(struct socket *, struct mbuf *); +struct socket * tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, + int); +void soisfconnecting(register struct socket *); +void soisfconnected(register struct socket *); +void sofwdrain(struct socket *); +struct iovec; /* For win32 */ +size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); +int soreadbuf(struct socket *so, const char *buf, int size); + +void sotranslate_out(struct socket *, struct sockaddr_storage *); +void sotranslate_in(struct socket *, struct sockaddr_storage *); +void sotranslate_accept(struct socket *); +void sodrop(struct socket *, int num); + + +#endif /* SLIRP_SOCKET_H */ diff --git a/slirp/src/state.c b/slirp/src/state.c new file mode 100644 index 0000000000..f5dd80cdc8 --- /dev/null +++ b/slirp/src/state.c @@ -0,0 +1,388 @@ +/* + * libslirp + * + * Copyright (c) 2004-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "slirp.h" +#include "vmstate.h" +#include "state.h" +#include "stream.h" + +static int slirp_tcp_post_load(void *opaque, int version) +{ + tcp_template((struct tcpcb *)opaque); + + return 0; +} + +static const VMStateDescription vmstate_slirp_tcp = { + .name = "slirp-tcp", + .version_id = 0, + .post_load = slirp_tcp_post_load, + .fields = (VMStateField[]) { + VMSTATE_INT16(t_state, struct tcpcb), + VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, TCPT_NTIMERS), + VMSTATE_INT16(t_rxtshift, struct tcpcb), + VMSTATE_INT16(t_rxtcur, struct tcpcb), + VMSTATE_INT16(t_dupacks, struct tcpcb), + VMSTATE_UINT16(t_maxseg, struct tcpcb), + VMSTATE_UINT8(t_force, struct tcpcb), + VMSTATE_UINT16(t_flags, struct tcpcb), + VMSTATE_UINT32(snd_una, struct tcpcb), + VMSTATE_UINT32(snd_nxt, struct tcpcb), + VMSTATE_UINT32(snd_up, struct tcpcb), + VMSTATE_UINT32(snd_wl1, struct tcpcb), + VMSTATE_UINT32(snd_wl2, struct tcpcb), + VMSTATE_UINT32(iss, struct tcpcb), + VMSTATE_UINT32(snd_wnd, struct tcpcb), + VMSTATE_UINT32(rcv_wnd, struct tcpcb), + VMSTATE_UINT32(rcv_nxt, struct tcpcb), + VMSTATE_UINT32(rcv_up, struct tcpcb), + VMSTATE_UINT32(irs, struct tcpcb), + VMSTATE_UINT32(rcv_adv, struct tcpcb), + VMSTATE_UINT32(snd_max, struct tcpcb), + VMSTATE_UINT32(snd_cwnd, struct tcpcb), + VMSTATE_UINT32(snd_ssthresh, struct tcpcb), + VMSTATE_INT16(t_idle, struct tcpcb), + VMSTATE_INT16(t_rtt, struct tcpcb), + VMSTATE_UINT32(t_rtseq, struct tcpcb), + VMSTATE_INT16(t_srtt, struct tcpcb), + VMSTATE_INT16(t_rttvar, struct tcpcb), + VMSTATE_UINT16(t_rttmin, struct tcpcb), + VMSTATE_UINT32(max_sndwnd, struct tcpcb), + VMSTATE_UINT8(t_oobflags, struct tcpcb), + VMSTATE_UINT8(t_iobc, struct tcpcb), + VMSTATE_INT16(t_softerror, struct tcpcb), + VMSTATE_UINT8(snd_scale, struct tcpcb), + VMSTATE_UINT8(rcv_scale, struct tcpcb), + VMSTATE_UINT8(request_r_scale, struct tcpcb), + VMSTATE_UINT8(requested_s_scale, struct tcpcb), + VMSTATE_UINT32(ts_recent, struct tcpcb), + VMSTATE_UINT32(ts_recent_age, struct tcpcb), + VMSTATE_UINT32(last_ack_sent, struct tcpcb), + VMSTATE_END_OF_LIST() + } +}; + +/* The sbuf has a pair of pointers that are migrated as offsets; + * we calculate the offsets and restore the pointers using + * pre_save/post_load on a tmp structure. + */ +struct sbuf_tmp { + struct sbuf *parent; + uint32_t roff, woff; +}; + +static int sbuf_tmp_pre_save(void *opaque) +{ + struct sbuf_tmp *tmp = opaque; + tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; + tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; + + return 0; +} + +static int sbuf_tmp_post_load(void *opaque, int version) +{ + struct sbuf_tmp *tmp = opaque; + uint32_t requested_len = tmp->parent->sb_datalen; + + /* Allocate the buffer space used by the field after the tmp */ + sbreserve(tmp->parent, tmp->parent->sb_datalen); + + if (tmp->parent->sb_datalen != requested_len) { + return -ENOMEM; + } + if (tmp->woff >= requested_len || + tmp->roff >= requested_len) { + g_critical("invalid sbuf offsets r/w=%u/%u len=%u", + tmp->roff, tmp->woff, requested_len); + return -EINVAL; + } + + tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; + tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; + + return 0; +} + + +static const VMStateDescription vmstate_slirp_sbuf_tmp = { + .name = "slirp-sbuf-tmp", + .post_load = sbuf_tmp_post_load, + .pre_save = sbuf_tmp_pre_save, + .version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(woff, struct sbuf_tmp), + VMSTATE_UINT32(roff, struct sbuf_tmp), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_slirp_sbuf = { + .name = "slirp-sbuf", + .version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(sb_cc, struct sbuf), + VMSTATE_UINT32(sb_datalen, struct sbuf), + VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, vmstate_slirp_sbuf_tmp), + VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, NULL, sb_datalen), + VMSTATE_END_OF_LIST() + } +}; + +static bool slirp_older_than_v4(void *opaque, int version_id) +{ + return version_id < 4; +} + +static bool slirp_family_inet(void *opaque, int version_id) +{ + union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; + return ssa->ss.ss_family == AF_INET; +} + +static int slirp_socket_pre_load(void *opaque) +{ + struct socket *so = opaque; + if (tcp_attach(so) < 0) { + return -ENOMEM; + } + /* Older versions don't load these fields */ + so->so_ffamily = AF_INET; + so->so_lfamily = AF_INET; + return 0; +} + +#ifndef _WIN32 +#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) +#else +/* Win uses u_long rather than uint32_t - but it's still 32bits long */ +#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_SINGLE_TEST(f, s, t, 0, \ + slirp_vmstate_info_uint32, u_long) +#endif + +/* The OS provided ss_family field isn't that portable; it's size + * and type varies (16/8 bit, signed, unsigned) + * and the values it contains aren't fully portable. + */ +typedef struct SS_FamilyTmpStruct { + union slirp_sockaddr *parent; + uint16_t portable_family; +} SS_FamilyTmpStruct; + +#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ +#define SS_FAMILY_MIG_IPV6 10 /* Linux */ +#define SS_FAMILY_MIG_OTHER 0xffff + +static int ss_family_pre_save(void *opaque) +{ + SS_FamilyTmpStruct *tss = opaque; + + tss->portable_family = SS_FAMILY_MIG_OTHER; + + if (tss->parent->ss.ss_family == AF_INET) { + tss->portable_family = SS_FAMILY_MIG_IPV4; + } else if (tss->parent->ss.ss_family == AF_INET6) { + tss->portable_family = SS_FAMILY_MIG_IPV6; + } + + return 0; +} + +static int ss_family_post_load(void *opaque, int version_id) +{ + SS_FamilyTmpStruct *tss = opaque; + + switch (tss->portable_family) { + case SS_FAMILY_MIG_IPV4: + tss->parent->ss.ss_family = AF_INET; + break; + case SS_FAMILY_MIG_IPV6: + case 23: /* compatibility: AF_INET6 from mingw */ + case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ + tss->parent->ss.ss_family = AF_INET6; + break; + default: + g_critical("invalid ss_family type %x", tss->portable_family); + return -EINVAL; + } + + return 0; +} + +static const VMStateDescription vmstate_slirp_ss_family = { + .name = "slirp-socket-addr/ss_family", + .pre_save = ss_family_pre_save, + .post_load = ss_family_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_slirp_socket_addr = { + .name = "slirp-socket-addr", + .version_id = 4, + .fields = (VMStateField[]) { + VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, + vmstate_slirp_ss_family), + VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, + slirp_family_inet), + VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, + slirp_family_inet), + +#if 0 + /* Untested: Needs checking by someone with IPv6 test */ + VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, + slirp_family_inet6), + VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, + slirp_family_inet6), + VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, + slirp_family_inet6), + VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, + slirp_family_inet6), +#endif + + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_slirp_socket = { + .name = "slirp-socket", + .version_id = 4, + .pre_load = slirp_socket_pre_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(so_urgc, struct socket), + /* Pre-v4 versions */ + VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, + slirp_older_than_v4), + VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, + slirp_older_than_v4), + VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), + VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), + /* v4 and newer */ + VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, + union slirp_sockaddr), + VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, + union slirp_sockaddr), + + VMSTATE_UINT8(so_iptos, struct socket), + VMSTATE_UINT8(so_emu, struct socket), + VMSTATE_UINT8(so_type, struct socket), + VMSTATE_INT32(so_state, struct socket), + VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, + struct sbuf), + VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, + struct sbuf), + VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, + struct tcpcb), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_slirp_bootp_client = { + .name = "slirp_bootpclient", + .fields = (VMStateField[]) { + VMSTATE_UINT16(allocated, BOOTPClient), + VMSTATE_BUFFER(macaddr, BOOTPClient), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_slirp = { + .name = "slirp", + .version_id = 4, + .fields = (VMStateField[]) { + VMSTATE_UINT16_V(ip_id, Slirp, 2), + VMSTATE_STRUCT_ARRAY(bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, + vmstate_slirp_bootp_client, BOOTPClient), + VMSTATE_END_OF_LIST() + } +}; + +void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) +{ + struct gfwd_list *ex_ptr; + SlirpOStream f = { + .write_cb = write_cb, + .opaque = opaque, + }; + + for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) + if (ex_ptr->write_cb) { + struct socket *so; + so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, + ntohs(ex_ptr->ex_fport)); + if (!so) { + continue; + } + + slirp_ostream_write_u8(&f, 42); + slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); + } + slirp_ostream_write_u8(&f, 0); + + slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); +} + + +int slirp_state_load(Slirp *slirp, int version_id, + SlirpReadCb read_cb, void *opaque) +{ + struct gfwd_list *ex_ptr; + SlirpIStream f = { + .read_cb = read_cb, + .opaque = opaque, + }; + + while (slirp_istream_read_u8(&f)) { + int ret; + struct socket *so = socreate(slirp); + + ret = slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); + if (ret < 0) { + return ret; + } + + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != + slirp->vnetwork_addr.s_addr) { + return -EINVAL; + } + for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->write_cb && + so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && + so->so_fport == ex_ptr->ex_fport) { + break; + } + } + if (!ex_ptr) { + return -EINVAL; + } + } + + return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); +} + +int slirp_state_version(void) +{ + return 4; +} diff --git a/slirp/src/state.h b/slirp/src/state.h new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/slirp/src/state.h diff --git a/slirp/src/stream.c b/slirp/src/stream.c new file mode 100644 index 0000000000..d114dde334 --- /dev/null +++ b/slirp/src/stream.c @@ -0,0 +1,119 @@ +/* + * libslirp io streams + * + * Copyright (c) 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "stream.h" +#include <glib.h> + +bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) +{ + return f->read_cb(buf, size, f->opaque) == size; +} + +bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) +{ + return f->write_cb(buf, size, f->opaque) == size; +} + +uint8_t slirp_istream_read_u8(SlirpIStream *f) +{ + uint8_t b; + + if (slirp_istream_read(f, &b, sizeof(b))) { + return b; + } + + return 0; +} + +bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) +{ + return slirp_ostream_write(f, &b, sizeof(b)); +} + +uint16_t slirp_istream_read_u16(SlirpIStream *f) +{ + uint16_t b; + + if (slirp_istream_read(f, &b, sizeof(b))) { + return GUINT16_FROM_BE(b); + } + + return 0; +} + +bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) +{ + b = GUINT16_TO_BE(b); + return slirp_ostream_write(f, &b, sizeof(b)); +} + +uint32_t slirp_istream_read_u32(SlirpIStream *f) +{ + uint32_t b; + + if (slirp_istream_read(f, &b, sizeof(b))) { + return GUINT32_FROM_BE(b); + } + + return 0; +} + +bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) +{ + b = GUINT32_TO_BE(b); + return slirp_ostream_write(f, &b, sizeof(b)); +} + +int16_t slirp_istream_read_i16(SlirpIStream *f) +{ + int16_t b; + + if (slirp_istream_read(f, &b, sizeof(b))) { + return GINT16_FROM_BE(b); + } + + return 0; +} + +bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) +{ + b = GINT16_TO_BE(b); + return slirp_ostream_write(f, &b, sizeof(b)); +} + +int32_t slirp_istream_read_i32(SlirpIStream *f) +{ + int32_t b; + + if (slirp_istream_read(f, &b, sizeof(b))) { + return GINT32_FROM_BE(b); + } + + return 0; +} + +bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) +{ + b = GINT32_TO_BE(b); + return slirp_ostream_write(f, &b, sizeof(b)); +} diff --git a/slirp/src/stream.h b/slirp/src/stream.h new file mode 100644 index 0000000000..985334c043 --- /dev/null +++ b/slirp/src/stream.h @@ -0,0 +1,34 @@ +#ifndef STREAM_H_ +#define STREAM_H_ + +#include "libslirp.h" + +typedef struct SlirpIStream { + SlirpReadCb read_cb; + void *opaque; +} SlirpIStream; + +typedef struct SlirpOStream { + SlirpWriteCb write_cb; + void *opaque; +} SlirpOStream; + +bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); +bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); + +uint8_t slirp_istream_read_u8(SlirpIStream *f); +bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); + +uint16_t slirp_istream_read_u16(SlirpIStream *f); +bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); + +uint32_t slirp_istream_read_u32(SlirpIStream *f); +bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); + +int16_t slirp_istream_read_i16(SlirpIStream *f); +bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); + +int32_t slirp_istream_read_i32(SlirpIStream *f); +bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); + +#endif /* STREAM_H_ */ diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h new file mode 100644 index 0000000000..47aaea6c5b --- /dev/null +++ b/slirp/src/tcp.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp + */ + +#ifndef TCP_H +#define TCP_H + +#include <glib.h> + +typedef uint32_t tcp_seq; + +#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ +#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ + +#define TCP_SNDSPACE 8192 +#define TCP_RCVSPACE 8192 + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +#define tcphdr slirp_tcphdr +struct tcphdr { + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#if G_BYTE_ORDER == G_BIG_ENDIAN + uint8_t th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#else + uint8_t th_x2:4, /* (unused) */ + th_off:4; /* data offset */ +#endif + uint8_t th_flags; + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +}; + +#include "tcp_var.h" + +#ifndef TH_FIN +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#endif + +#ifndef TCPOPT_EOL +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ +#define TCPOPT_SACK 5 /* Experimental */ +#define TCPOPT_TIMESTAMP 8 + +#define TCPOPT_TSTAMP_HDR \ + (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) +#endif + +#ifndef TCPOLEN_MAXSEG +#define TCPOLEN_MAXSEG 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ +#endif + +/* + * Default maximum segment size for TCP. + * With an IP MSS of 576, this is 536, + * but 512 is probably more convenient. + * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). + * + * We make this 1460 because we only care about Ethernet in the qemu context. + */ +#undef TCP_MSS +#define TCP_MSS 1460 +#undef TCP6_MSS +#define TCP6_MSS 1440 + +#undef TCP_MAXWIN +#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ + +#undef TCP_MAX_WINSHIFT +#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +/* + * User-settable options (used with setsockopt). + * + * We don't use the system headers on unix because we have conflicting + * local structures. We can't avoid the system definitions on Windows, + * so we undefine them. + */ +#undef TCP_NODELAY +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#undef TCP_MAXSEG + +/* + * TCP FSM state definitions. + * Per RFC793, September, 1981. + */ + +#define TCP_NSTATES 11 + +#define TCPS_CLOSED 0 /* closed */ +#define TCPS_LISTEN 1 /* listening for connection */ +#define TCPS_SYN_SENT 2 /* active, have sent syn */ +#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ +/* states < TCPS_ESTABLISHED are those where connections not established */ +#define TCPS_ESTABLISHED 4 /* established */ +#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +/* states > TCPS_CLOSE_WAIT are those where user has closed */ +#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ +/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ +#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ + +#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) +#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) +#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) + +/* + * TCP sequence numbers are 32 bit integers operated + * on with modular arithmetic. These macros can be + * used to compare such integers. + */ +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss + +#define TCP_ISSINCR (125*1024) /* increment for tcp_iss each second */ + +#endif diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c new file mode 100644 index 0000000000..b10477fc57 --- /dev/null +++ b/slirp/src/tcp_input.c @@ -0,0 +1,1556 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 + * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" +#include "ip_icmp.h" + +#define TCPREXMTTHRESH 3 + +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) + +/* for modulo comparisons of timestamps */ +#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) +#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Insert segment ti into reassembly queue of tcp with + * control block tp. Return TH_FIN if reassembly now includes + * a segment with FIN. The macro form does the common case inline + * (segment is the next to be received on an established connection, + * and the queue is empty), avoiding linkage into and removal + * from the queue and repetition of various conversions. + * Set DELACK for segments received in order, but ack immediately + * when segments are out of order (so fast retransmit can work). + */ +#define TCP_REASS(tp, ti, m, so, flags) { \ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + tcpfrag_list_empty(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) { \ + tp->t_flags |= TF_DELACK; \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + if (so->so_emu) { \ + if (tcp_emu((so),(m))) sbappend(so, (m)); \ + } else \ + sbappend((so), (m)); \ + } else { \ + (flags) = tcp_reass((tp), (ti), (m)); \ + tp->t_flags |= TF_ACKNOW; \ + } \ +} + +static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, + struct tcpiphdr *ti); +static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); + +static int +tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, + struct mbuf *m) +{ + register struct tcpiphdr *q; + struct socket *so = tp->t_socket; + int flags; + + /* + * Call with ti==NULL after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (ti == NULL) + goto present; + + /* + * Find a segment which begins after this one does. + */ + for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); + q = tcpiphdr_next(q)) + if (SEQ_GT(q->ti_seq, ti->ti_seq)) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { + register int i; + q = tcpiphdr_prev(q); + /* conversion to int (in i) handles seq wraparound */ + i = q->ti_seq + q->ti_len - ti->ti_seq; + if (i > 0) { + if (i >= ti->ti_len) { + m_free(m); + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ + } + m_adj(m, i); + ti->ti_len -= i; + ti->ti_seq += i; + } + q = tcpiphdr_next(q); + } + ti->ti_mbuf = m; + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (!tcpfrag_list_end(q, tp)) { + register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; + if (i <= 0) + break; + if (i < q->ti_len) { + q->ti_seq += i; + q->ti_len -= i; + m_adj(q->ti_mbuf, i); + break; + } + q = tcpiphdr_next(q); + m = tcpiphdr_prev(q)->ti_mbuf; + remque(tcpiphdr2qlink(tcpiphdr_prev(q))); + m_free(m); + } + + /* + * Stick new segment in its place. + */ + insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (!TCPS_HAVEESTABLISHED(tp->t_state)) + return (0); + ti = tcpfrag_list_first(tp); + if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) + return (0); + if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) + return (0); + do { + tp->rcv_nxt += ti->ti_len; + flags = ti->ti_flags & TH_FIN; + remque(tcpiphdr2qlink(ti)); + m = ti->ti_mbuf; + ti = tcpiphdr_next(ti); + if (so->so_state & SS_FCANTSENDMORE) + m_free(m); + else { + if (so->so_emu) { + if (tcp_emu(so,m)) sbappend(so, m); + } else + sbappend(so, m); + } + } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); + return (flags); +} + +/* + * TCP input routine, follows pages 65-76 of the + * protocol specification dated September, 1981 very closely. + */ +void +tcp_input(struct mbuf *m, int iphlen, struct socket *inso, unsigned short af) +{ + struct ip save_ip, *ip; + struct ip6 save_ip6, *ip6; + register struct tcpiphdr *ti; + char *optp = NULL; + int optlen = 0; + int len, tlen, off; + register struct tcpcb *tp = NULL; + register int tiflags; + struct socket *so = NULL; + int todrop, acked, ourfinisacked, needoutput = 0; + int iss = 0; + uint32_t tiwin; + int ret; + struct sockaddr_storage lhost, fhost; + struct sockaddr_in *lhost4, *fhost4; + struct sockaddr_in6 *lhost6, *fhost6; + struct gfwd_list *ex_ptr; + Slirp *slirp; + + DEBUG_CALL("tcp_input"); + DEBUG_ARG("m = %p iphlen = %2d inso = %p", + m, iphlen, inso); + + /* + * If called with m == 0, then we're continuing the connect + */ + if (m == NULL) { + so = inso; + slirp = so->slirp; + + /* Re-set a few variables */ + tp = sototcpcb(so); + m = so->so_m; + so->so_m = NULL; + ti = so->so_ti; + tiwin = ti->ti_win; + tiflags = ti->ti_flags; + + goto cont_conn; + } + slirp = m->slirp; + + ip = mtod(m, struct ip *); + ip6 = mtod(m, struct ip6 *); + + switch (af) { + case AF_INET: + if (iphlen > sizeof(struct ip)) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + /* XXX Check if too short */ + + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip = *ip; + save_ip.ip_len += iphlen; + + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + m->m_data -= sizeof(struct tcpiphdr) - sizeof(struct ip) + - sizeof(struct tcphdr); + m->m_len += sizeof(struct tcpiphdr) - sizeof(struct ip) + - sizeof(struct tcphdr); + ti = mtod(m, struct tcpiphdr *); + + /* + * Checksum extended TCP header and data. + */ + tlen = ip->ip_len; + tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; + memset(&ti->ih_mbuf, 0 , sizeof(struct mbuf_ptr)); + memset(&ti->ti, 0, sizeof(ti->ti)); + ti->ti_x0 = 0; + ti->ti_src = save_ip.ip_src; + ti->ti_dst = save_ip.ip_dst; + ti->ti_pr = save_ip.ip_p; + ti->ti_len = htons((uint16_t)tlen); + break; + + case AF_INET6: + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip6 = *ip6; + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + m->m_data -= sizeof(struct tcpiphdr) - (sizeof(struct ip6) + + sizeof(struct tcphdr)); + m->m_len += sizeof(struct tcpiphdr) - (sizeof(struct ip6) + + sizeof(struct tcphdr)); + ti = mtod(m, struct tcpiphdr *); + + tlen = ip6->ip_pl; + tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; + memset(&ti->ih_mbuf, 0 , sizeof(struct mbuf_ptr)); + memset(&ti->ti, 0, sizeof(ti->ti)); + ti->ti_x0 = 0; + ti->ti_src6 = save_ip6.ip_src; + ti->ti_dst6 = save_ip6.ip_dst; + ti->ti_nh6 = save_ip6.ip_nh; + ti->ti_len = htons((uint16_t)tlen); + break; + + default: + g_assert_not_reached(); + } + + len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); + if (cksum(m, len)) { + goto drop; + } + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = ti->ti_off << 2; + if (off < sizeof (struct tcphdr) || off > tlen) { + goto drop; + } + tlen -= off; + ti->ti_len = tlen; + if (off > sizeof (struct tcphdr)) { + optlen = off - sizeof (struct tcphdr); + optp = mtod(m, char *) + sizeof (struct tcpiphdr); + } + tiflags = ti->ti_flags; + + /* + * Convert TCP protocol specific fields to host format. + */ + NTOHL(ti->ti_seq); + NTOHL(ti->ti_ack); + NTOHS(ti->ti_win); + NTOHS(ti->ti_urp); + + /* + * Drop TCP, IP headers and TCP options. + */ + m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + + /* + * Locate pcb for segment. + */ +findso: + lhost.ss_family = af; + fhost.ss_family = af; + switch (af) { + case AF_INET: + lhost4 = (struct sockaddr_in *) &lhost; + lhost4->sin_addr = ti->ti_src; + lhost4->sin_port = ti->ti_sport; + fhost4 = (struct sockaddr_in *) &fhost; + fhost4->sin_addr = ti->ti_dst; + fhost4->sin_port = ti->ti_dport; + break; + case AF_INET6: + lhost6 = (struct sockaddr_in6 *) &lhost; + lhost6->sin6_addr = ti->ti_src6; + lhost6->sin6_port = ti->ti_sport; + fhost6 = (struct sockaddr_in6 *) &fhost; + fhost6->sin6_addr = ti->ti_dst6; + fhost6->sin6_port = ti->ti_dport; + break; + default: + g_assert_not_reached(); + } + + so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); + + /* + * If the state is CLOSED (i.e., TCB does not exist) then + * all data in the incoming segment is discarded. + * If the TCB exists but is in CLOSED state, it is embryonic, + * but should either do a listen or a connect soon. + * + * state == CLOSED means we've done socreate() but haven't + * attached it to a protocol yet... + * + * XXX If a TCB does not exist, and the TH_SYN flag is + * the only flag set, then create a session, mark it + * as if it was LISTENING, and continue... + */ + if (so == NULL) { + /* TODO: IPv6 */ + if (slirp->restricted) { + /* Any hostfwds will have an existing socket, so we only get here + * for non-hostfwd connections. These should be dropped, unless it + * happens to be a guestfwd. + */ + for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_fport == ti->ti_dport && + ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { + break; + } + } + if (!ex_ptr) { + goto dropwithreset; + } + } + + if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN) + goto dropwithreset; + + so = socreate(slirp); + if (tcp_attach(so) < 0) { + g_free(so); /* Not sofree (if it failed, it's not insqued) */ + goto dropwithreset; + } + + sbreserve(&so->so_snd, TCP_SNDSPACE); + sbreserve(&so->so_rcv, TCP_RCVSPACE); + + so->lhost.ss = lhost; + so->fhost.ss = fhost; + + so->so_iptos = tcp_tos(so); + if (so->so_iptos == 0) { + switch (af) { + case AF_INET: + so->so_iptos = ((struct ip *)ti)->ip_tos; + break; + case AF_INET6: + break; + default: + g_assert_not_reached(); + } + } + + tp = sototcpcb(so); + tp->t_state = TCPS_LISTEN; + } + + /* + * If this is a still-connecting socket, this probably + * a retransmit of the SYN. Whether it's a retransmit SYN + * or something else, we nuke it. + */ + if (so->so_state & SS_ISFCONNECTING) + goto drop; + + tp = sototcpcb(so); + + /* XXX Should never fail */ + if (tp == NULL) + goto dropwithreset; + if (tp->t_state == TCPS_CLOSED) + goto drop; + + tiwin = ti->ti_win; + + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + */ + tp->t_idle = 0; + if (slirp_do_keepalive) + tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; + else + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; + + /* + * Process options if not in LISTEN state, + * else do it below (after getting remote address). + */ + if (optp && tp->t_state != TCPS_LISTEN) + tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); + + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + * + * XXX Some of these tests are not needed + * eg: the tiwin == tp->snd_wnd prevents many more + * predictions.. with no *real* advantage.. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && + ti->ti_seq == tp->rcv_nxt && + tiwin && tiwin == tp->snd_wnd && + tp->snd_nxt == tp->snd_max) { + if (ti->ti_len == 0) { + if (SEQ_GT(ti->ti_ack, tp->snd_una) && + SEQ_LEQ(ti->ti_ack, tp->snd_max) && + tp->snd_cwnd >= tp->snd_wnd) { + /* + * this is a pure ack for outstanding data. + */ + if (tp->t_rtt && + SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(tp, tp->t_rtt); + acked = ti->ti_ack - tp->snd_una; + sodrop(so, acked); + tp->snd_una = ti->ti_ack; + m_free(m); + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let tcp_output + * decide between more output or persist. + */ + if (tp->snd_una == tp->snd_max) + tp->t_timer[TCPT_REXMT] = 0; + else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + /* + * This is called because sowwakeup might have + * put data into so_snd. Since we don't so sowwakeup, + * we don't need this.. XXX??? + */ + if (so->so_snd.sb_cc) + (void) tcp_output(tp); + + return; + } + } else if (ti->ti_ack == tp->snd_una && + tcpfrag_list_empty(tp) && + ti->ti_len <= sbspace(&so->so_rcv)) { + /* + * this is a pure, in-sequence data packet + * with nothing on the reassembly queue and + * we have enough buffer space to take it. + */ + tp->rcv_nxt += ti->ti_len; + /* + * Add data to socket buffer. + */ + if (so->so_emu) { + if (tcp_emu(so,m)) sbappend(so, m); + } else + sbappend(so, m); + + /* + * If this is a short packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + * + * It is better to not delay acks at all to maximize + * TCP throughput. See RFC 2581. + */ + tp->t_flags |= TF_ACKNOW; + tcp_output(tp); + return; + } + } /* header prediction */ + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ + { int win; + win = sbspace(&so->so_rcv); + if (win < 0) + win = 0; + tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + } + + switch (tp->t_state) { + + /* + * If the state is LISTEN then ignore segment if it contains an RST. + * If the segment contains an ACK then it is bad and send a RST. + * If it does not contain a SYN then it is not interesting; drop it. + * Don't bother responding if the destination was a broadcast. + * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial + * tp->iss, and send a segment: + * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> + * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. + * Fill in remote peer address fields if not previously specified. + * Enter SYN_RECEIVED state, and process any other fields of this + * segment in this state. + */ + case TCPS_LISTEN: { + + if (tiflags & TH_RST) + goto drop; + if (tiflags & TH_ACK) + goto dropwithreset; + if ((tiflags & TH_SYN) == 0) + goto drop; + + /* + * This has way too many gotos... + * But a bit of spaghetti code never hurt anybody :) + */ + + /* + * If this is destined for the control address, then flag to + * tcp_ctl once connected, otherwise connect + */ + /* TODO: IPv6 */ + if (af == AF_INET && + (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && + so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { + /* May be an add exec */ + for (ex_ptr = slirp->guestfwd_list; ex_ptr; + ex_ptr = ex_ptr->ex_next) { + if(ex_ptr->ex_fport == so->so_fport && + so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { + so->so_state |= SS_CTL; + break; + } + } + if (so->so_state & SS_CTL) { + goto cont_input; + } + } + /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ + } + + if (so->so_emu & EMU_NOCONNECT) { + so->so_emu &= ~EMU_NOCONNECT; + goto cont_input; + } + + if ((tcp_fconnect(so, so->so_ffamily) == -1) && + (errno != EAGAIN) && + (errno != EINPROGRESS) && (errno != EWOULDBLOCK) + ) { + uint8_t code; + DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); + if(errno == ECONNREFUSED) { + /* ACK the SYN, send RST to refuse the connection */ + tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq) 0, + TH_RST | TH_ACK, af); + } else { + switch (af) { + case AF_INET: + code = ICMP_UNREACH_NET; + if (errno == EHOSTUNREACH) { + code = ICMP_UNREACH_HOST; + } + break; + case AF_INET6: + code = ICMP6_UNREACH_NO_ROUTE; + if (errno == EHOSTUNREACH) { + code = ICMP6_UNREACH_ADDRESS; + } + break; + default: + g_assert_not_reached(); + } + HTONL(ti->ti_seq); /* restore tcp header */ + HTONL(ti->ti_ack); + HTONS(ti->ti_win); + HTONS(ti->ti_urp); + m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + switch (af) { + case AF_INET: + m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) + - sizeof(struct tcphdr); + m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) + - sizeof(struct tcphdr); + *ip = save_ip; + icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); + break; + case AF_INET6: + m->m_data += sizeof(struct tcpiphdr) - (sizeof(struct ip6) + + sizeof(struct tcphdr)); + m->m_len -= sizeof(struct tcpiphdr) - (sizeof(struct ip6) + + sizeof(struct tcphdr)); + *ip6 = save_ip6; + icmp6_send_error(m, ICMP6_UNREACH, code); + break; + default: + g_assert_not_reached(); + } + } + tcp_close(tp); + m_free(m); + } else { + /* + * Haven't connected yet, save the current mbuf + * and ti, and return + * XXX Some OS's don't tell us whether the connect() + * succeeded or not. So we must time it out. + */ + so->so_m = m; + so->so_ti = ti; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->t_state = TCPS_SYN_RECEIVED; + /* + * Initialize receive sequence numbers now so that we can send a + * valid RST if the remote end rejects our connection. + */ + tp->irs = ti->ti_seq; + tcp_rcvseqinit(tp); + tcp_template(tp); + } + return; + + cont_conn: + /* m==NULL + * Check if the connect succeeded + */ + if (so->so_state & SS_NOFDREF) { + tp = tcp_close(tp); + goto dropwithreset; + } + cont_input: + tcp_template(tp); + + if (optp) + tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); + + if (iss) + tp->iss = iss; + else + tp->iss = slirp->tcp_iss; + slirp->tcp_iss += TCP_ISSINCR/2; + tp->irs = ti->ti_seq; + tcp_sendseqinit(tp); + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + tp->t_state = TCPS_SYN_RECEIVED; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + goto trimthenstep6; + } /* case TCPS_LISTEN */ + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ((tiflags & TH_ACK) && + (SEQ_LEQ(ti->ti_ack, tp->iss) || + SEQ_GT(ti->ti_ack, tp->snd_max))) + goto dropwithreset; + + if (tiflags & TH_RST) { + if (tiflags & TH_ACK) { + tcp_drop(tp, 0); /* XXX Check t_softerror! */ + } + goto drop; + } + + if ((tiflags & TH_SYN) == 0) + goto drop; + if (tiflags & TH_ACK) { + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + } + + tp->t_timer[TCPT_REXMT] = 0; + tp->irs = ti->ti_seq; + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { + soisfconnected(so); + tp->t_state = TCPS_ESTABLISHED; + + (void) tcp_reass(tp, (struct tcpiphdr *)0, + (struct mbuf *)0); + /* + * if we didn't have to retransmit the SYN, + * use its rtt as our initial srtt & rtt var. + */ + if (tp->t_rtt) + tcp_xmit_timer(tp, tp->t_rtt); + } else + tp->t_state = TCPS_SYN_RECEIVED; + +trimthenstep6: + /* + * Advance ti->ti_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + ti->ti_seq++; + if (ti->ti_len > tp->rcv_wnd) { + todrop = ti->ti_len - tp->rcv_wnd; + m_adj(m, -todrop); + ti->ti_len = tp->rcv_wnd; + tiflags &= ~TH_FIN; + } + tp->snd_wl1 = ti->ti_seq - 1; + tp->rcv_up = ti->ti_seq; + goto step6; + } /* switch tp->t_state */ + /* + * States other than LISTEN or SYN_SENT. + * Check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + */ + todrop = tp->rcv_nxt - ti->ti_seq; + if (todrop > 0) { + if (tiflags & TH_SYN) { + tiflags &= ~TH_SYN; + ti->ti_seq++; + if (ti->ti_urp > 1) + ti->ti_urp--; + else + tiflags &= ~TH_URG; + todrop--; + } + /* + * Following if statement from Stevens, vol. 2, p. 960. + */ + if (todrop > ti->ti_len + || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { + /* + * Any valid FIN must be to the left of the window. + * At this point the FIN must be a duplicate or out + * of sequence; drop it. + */ + tiflags &= ~TH_FIN; + + /* + * Send an ACK to resynchronize and drop any data. + * But keep on processing for RST or ACK. + */ + tp->t_flags |= TF_ACKNOW; + todrop = ti->ti_len; + } + m_adj(m, todrop); + ti->ti_seq += todrop; + ti->ti_len -= todrop; + if (ti->ti_urp > todrop) + ti->ti_urp -= todrop; + else { + tiflags &= ~TH_URG; + ti->ti_urp = 0; + } + } + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { + tp = tcp_close(tp); + goto dropwithreset; + } + + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); + if (todrop > 0) { + if (todrop >= ti->ti_len) { + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if (tiflags & TH_SYN && + tp->t_state == TCPS_TIME_WAIT && + SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { + iss = tp->rcv_nxt + TCP_ISSINCR; + tp = tcp_close(tp); + goto findso; + } + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { + tp->t_flags |= TF_ACKNOW; + } else { + goto dropafterack; + } + } + m_adj(m, -todrop); + ti->ti_len -= todrop; + tiflags &= ~(TH_PUSH|TH_FIN); + } + + /* + * If the RST bit is set examine the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close tcb. + * CLOSING, LAST_ACK, TIME_WAIT STATES + * Close the tcb. + */ + if (tiflags&TH_RST) switch (tp->t_state) { + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_CLOSED; + tcp_close(tp); + goto drop; + + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + tcp_close(tp); + goto drop; + } + + /* + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. + */ + if (tiflags & TH_SYN) { + tp = tcp_drop(tp,0); + goto dropwithreset; + } + + /* + * If the ACK bit is off we drop the segment and return. + */ + if ((tiflags & TH_ACK) == 0) goto drop; + + /* + * Ack processing. + */ + switch (tp->t_state) { + /* + * In SYN_RECEIVED state if the ack ACKs our SYN then enter + * ESTABLISHED state and continue processing, otherwise + * send an RST. una<=ack<=max + */ + case TCPS_SYN_RECEIVED: + + if (SEQ_GT(tp->snd_una, ti->ti_ack) || + SEQ_GT(ti->ti_ack, tp->snd_max)) + goto dropwithreset; + tp->t_state = TCPS_ESTABLISHED; + /* + * The sent SYN is ack'ed with our sequence number +1 + * The first data byte already in the buffer will get + * lost if no correction is made. This is only needed for + * SS_CTL since the buffer is empty otherwise. + * tp->snd_una++; or: + */ + tp->snd_una=ti->ti_ack; + if (so->so_state & SS_CTL) { + /* So tcp_ctl reports the right state */ + ret = tcp_ctl(so); + if (ret == 1) { + soisfconnected(so); + so->so_state &= ~SS_CTL; /* success XXX */ + } else if (ret == 2) { + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; /* CTL_CMD */ + } else { + needoutput = 1; + tp->t_state = TCPS_FIN_WAIT_1; + } + } else { + soisfconnected(so); + } + + (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); + tp->snd_wl1 = ti->ti_seq - 1; + /* Avoid ack processing; snd_una==ti_ack => dup ack */ + goto synrx_to_est; + /* fall into ... */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < ti->ti_ack <= tp->snd_max + * then advance tp->snd_una to ti->ti_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + + if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { + if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { + DEBUG_MISC(" dup ack m = %p so = %p", m, so); + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change), the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + */ + if (tp->t_timer[TCPT_REXMT] == 0 || + ti->ti_ack != tp->snd_una) + tp->t_dupacks = 0; + else if (++tp->t_dupacks == TCPREXMTTHRESH) { + tcp_seq onxt = tp->snd_nxt; + unsigned win = + MIN(tp->snd_wnd, tp->snd_cwnd) / + 2 / tp->t_maxseg; + + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = ti->ti_ack; + tp->snd_cwnd = tp->t_maxseg; + (void) tcp_output(tp); + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tp->t_dupacks; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (tp->t_dupacks > TCPREXMTTHRESH) { + tp->snd_cwnd += tp->t_maxseg; + (void) tcp_output(tp); + goto drop; + } + } else + tp->t_dupacks = 0; + break; + } + synrx_to_est: + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ + if (tp->t_dupacks > TCPREXMTTHRESH && + tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = 0; + if (SEQ_GT(ti->ti_ack, tp->snd_max)) { + goto dropafterack; + } + acked = ti->ti_ack - tp->snd_una; + + /* + * If transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + */ + if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(tp,tp->t_rtt); + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (ti->ti_ack == tp->snd_max) { + tp->t_timer[TCPT_REXMT] = 0; + needoutput = 1; + } else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * When new data is acked, open the congestion window. + * If the window gives us less than ssthresh packets + * in flight, open exponentially (maxseg per packet). + * Otherwise open linearly: maxseg per window + * (maxseg^2 / cwnd per packet). + */ + { + register unsigned cw = tp->snd_cwnd; + register unsigned incr = tp->t_maxseg; + + if (cw > tp->snd_ssthresh) + incr = incr * incr / cw; + tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); + } + if (acked > so->so_snd.sb_cc) { + tp->snd_wnd -= so->so_snd.sb_cc; + sodrop(so, (int)so->so_snd.sb_cc); + ourfinisacked = 1; + } else { + sodrop(so, acked); + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) { + + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + */ + if (so->so_state & SS_FCANTRCVMORE) { + tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; + } + tp->t_state = TCPS_FIN_WAIT_2; + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) { + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) { + tcp_close(tp); + goto drop; + } + break; + + /* + * In TIME_WAIT state the only thing that should arrive + * is a retransmission of the remote FIN. Acknowledge + * it and restart the finack timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + goto dropafterack; + } + } /* switch(tp->t_state) */ + +step6: + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ((tiflags & TH_ACK) && + (SEQ_LT(tp->snd_wl1, ti->ti_seq) || + (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || + (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { + tp->snd_wnd = tiwin; + tp->snd_wl1 = ti->ti_seq; + tp->snd_wl2 = ti->ti_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ + if ((tiflags & TH_URG) && ti->ti_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { + ti->ti_urp = 0; + tiflags &= ~TH_URG; + goto dodata; + } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { + tp->rcv_up = ti->ti_seq + ti->ti_urp; + so->so_urgc = so->so_rcv.sb_cc + + (tp->rcv_up - tp->rcv_nxt); /* -1; */ + tp->rcv_up = ti->ti_seq + ti->ti_urp; + + } + } else + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; +dodata: + + /* + * If this is a small packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + */ + if (ti->ti_len && (unsigned)ti->ti_len <= 5 && + ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { + tp->t_flags |= TF_ACKNOW; + } + + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ((ti->ti_len || (tiflags&TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + TCP_REASS(tp, ti, m, so, tiflags); + } else { + m_free(m); + tiflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (tiflags & TH_FIN) { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * If we receive a FIN we can't send more data, + * set it SS_FDRAIN + * Shutdown the socket if there is no rx data in the + * buffer. + * soread() is called on completion of shutdown() and + * will got to TCPS_LAST_ACK, and use tcp_output() + * to send the FIN. + */ + sofwdrain(so); + + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + switch (tp->t_state) { + + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + if(so->so_emu == EMU_CTL) /* no shutdown on socket */ + tp->t_state = TCPS_LAST_ACK; + else + tp->t_state = TCPS_CLOSE_WAIT; + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + tp->t_state = TCPS_CLOSING; + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + + /* + * In TIME_WAIT state restart the 2 MSL time_wait timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + } + } + + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) { + (void) tcp_output(tp); + } + return; + +dropafterack: + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + */ + if (tiflags & TH_RST) + goto drop; + m_free(m); + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + return; + +dropwithreset: + /* reuses m if m!=NULL, m_free() unnecessary */ + if (tiflags & TH_ACK) + tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); + else { + if (tiflags & TH_SYN) ti->ti_len++; + tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq) 0, + TH_RST | TH_ACK, af); + } + + return; + +drop: + /* + * Drop space held by incoming segment and return. + */ + m_free(m); +} + +static void +tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, struct tcpiphdr *ti) +{ + uint16_t mss; + int opt, optlen; + + DEBUG_CALL("tcp_dooptions"); + DEBUG_ARG("tp = %p cnt=%i", tp, cnt); + + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + switch (opt) { + + default: + continue; + + case TCPOPT_MAXSEG: + if (optlen != TCPOLEN_MAXSEG) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + memcpy((char *) &mss, (char *) cp + 2, sizeof(mss)); + NTOHS(mss); + (void) tcp_mss(tp, mss); /* sets t_maxseg */ + break; + } + } +} + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ + +static void +tcp_xmit_timer(register struct tcpcb *tp, int rtt) +{ + register short delta; + + DEBUG_CALL("tcp_xmit_timer"); + DEBUG_ARG("tp = %p", tp); + DEBUG_ARG("rtt = %d", rtt); + + if (tp->t_srtt != 0) { + /* + * srtt is stored as fixed point with 3 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 2 bits after the + * binary point (scaled by 4). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + } else { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << TCP_RTT_SHIFT; + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); + } + tp->t_rtt = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing + * interface without forcing IP to fragment; if bigger than + * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES + * to utilize large mbufs. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + */ + +int +tcp_mss(struct tcpcb *tp, unsigned offer) +{ + struct socket *so = tp->t_socket; + int mss; + + DEBUG_CALL("tcp_mss"); + DEBUG_ARG("tp = %p", tp); + DEBUG_ARG("offer = %d", offer); + + switch (so->so_ffamily) { + case AF_INET: + mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) + - sizeof(struct ip); + break; + case AF_INET6: + mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) + - sizeof(struct ip6); + break; + default: + g_assert_not_reached(); + } + + if (offer) + mss = MIN(mss, offer); + mss = MAX(mss, 32); + if (mss < tp->t_maxseg || offer != 0) + tp->t_maxseg = mss; + + tp->snd_cwnd = mss; + + sbreserve(&so->so_snd, TCP_SNDSPACE + ((TCP_SNDSPACE % mss) ? + (mss - (TCP_SNDSPACE % mss)) : + 0)); + sbreserve(&so->so_rcv, TCP_RCVSPACE + ((TCP_RCVSPACE % mss) ? + (mss - (TCP_RCVSPACE % mss)) : + 0)); + + DEBUG_MISC(" returning mss = %d", mss); + + return mss; +} diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c new file mode 100644 index 0000000000..e9674df121 --- /dev/null +++ b/slirp/src/tcp_output.c @@ -0,0 +1,524 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 + * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" + +static const uint8_t tcp_outflags[TCP_NSTATES] = { + TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK, + TH_ACK, TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, + TH_FIN|TH_ACK, TH_ACK, TH_ACK, +}; + + +#undef MAX_TCPOPTLEN +#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ + +/* + * Tcp output routine: figure out what should be sent and send it. + */ +int +tcp_output(struct tcpcb *tp) +{ + register struct socket *so = tp->t_socket; + register long len, win; + int off, flags, error; + register struct mbuf *m; + register struct tcpiphdr *ti, tcpiph_save; + struct ip *ip; + struct ip6 *ip6; + uint8_t opt[MAX_TCPOPTLEN]; + unsigned optlen, hdrlen; + int idle, sendalot; + + DEBUG_CALL("tcp_output"); + DEBUG_ARG("tp = %p", tp); + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->snd_max == tp->snd_una); + if (idle && tp->t_idle >= tp->t_rxtcur) + /* + * We have been idle for "a while" and no acks are + * expected to clock out any data we send -- + * slow start to get ack "clock" running again. + */ + tp->snd_cwnd = tp->t_maxseg; +again: + sendalot = 0; + off = tp->snd_nxt - tp->snd_una; + win = MIN(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + + DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); + + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_force) { + if (win == 0) { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unset data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ + if (off < so->so_snd.sb_cc) + flags &= ~TH_FIN; + win = 1; + } else { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + + len = MIN(so->so_snd.sb_cc, win) - off; + + if (len < 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be -1. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit and pull snd_nxt + * back to (closed) window. We will enter persist + * state below. If the window didn't close completely, + * just wait for an ACK. + */ + len = 0; + if (win == 0) { + tp->t_timer[TCPT_REXMT] = 0; + tp->snd_nxt = tp->snd_una; + } + } + + if (len > tp->t_maxseg) { + len = tp->t_maxseg; + sendalot = 1; + } + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + + win = sbspace(&so->so_rcv); + + /* + * Sender silly window avoidance. If connection is idle + * and can send all data, a maximum segment, + * at least a maximum default-size segment do it, + * or are forced, do it; otherwise don't bother. + * If peer's buffer is tiny, then send + * when window is at least half open. + * If retransmitting (possibly after persist timer forced us + * to send into a small window), then must resend. + */ + if (len) { + if (len == tp->t_maxseg) + goto send; + if ((1 || idle || tp->t_flags & TF_NODELAY) && + len + off >= so->so_snd.sb_cc) + goto send; + if (tp->t_force) + goto send; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + goto send; + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input). If the difference is at least two + * max size segments, or at least 50% of the maximum possible + * window, then want to send a window update to peer. + */ + if (win > 0) { + /* + * "adv" is the amount we can increase the window, + * taking into account that we are limited by + * TCP_MAXWIN << tp->rcv_scale. + */ + long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - + (tp->rcv_adv - tp->rcv_nxt); + + if (adv >= (long) (2 * tp->t_maxseg)) + goto send; + if (2 * adv >= (long) so->so_rcv.sb_datalen) + goto send; + } + + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if (flags & (TH_SYN|TH_RST)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, or we're retransmitting the FIN, + * then we need to send. + */ + if (flags & TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ + return (0); + +send: + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN + */ + optlen = 0; + hdrlen = sizeof (struct tcpiphdr); + if (flags & TH_SYN) { + tp->snd_nxt = tp->iss; + if ((tp->t_flags & TF_NOOPT) == 0) { + uint16_t mss; + + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + mss = htons((uint16_t) tcp_mss(tp, 0)); + memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); + optlen = 4; + } + } + + hdrlen += optlen; + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxseg length. + */ + if (len > tp->t_maxseg - optlen) { + len = tp->t_maxseg - optlen; + sendalot = 1; + } + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + if (len) { + m = m_get(so->slirp); + if (m == NULL) { + error = 1; + goto out; + } + m->m_data += IF_MAXLINKHDR; + m->m_len = hdrlen; + + sbcopy(&so->so_snd, off, (int) len, mtod(m, char *) + hdrlen); + m->m_len += len; + + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == so->so_snd.sb_cc) + flags |= TH_PUSH; + } else { + m = m_get(so->slirp); + if (m == NULL) { + error = 1; + goto out; + } + m->m_data += IF_MAXLINKHDR; + m->m_len = hdrlen; + } + + ti = mtod(m, struct tcpiphdr *); + + memcpy((char *)ti, &tp->t_template, sizeof (struct tcpiphdr)); + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && + tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) + ti->ti_seq = htonl(tp->snd_nxt); + else + ti->ti_seq = htonl(tp->snd_max); + ti->ti_ack = htonl(tp->rcv_nxt); + if (optlen) { + memcpy((char *)(ti + 1), (char *)opt, optlen); + ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; + } + ti->ti_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) + win = 0; + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) + win = (long)(tp->rcv_adv - tp->rcv_nxt); + ti->ti_win = htons((uint16_t) (win>>tp->rcv_scale)); + + if (SEQ_GT(tp->snd_up, tp->snd_una)) { + ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); + ti->ti_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + if (len + optlen) + ti->ti_len = htons((uint16_t)(sizeof (struct tcphdr) + + optlen + len)); + ti->ti_sum = cksum(m, (int)(hdrlen + len)); + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) { + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + } + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtt == 0) { + tp->t_rtt = 1; + tp->t_rtseq = startseq; + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing an ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ + if (tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_una) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + } else + if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ + tcpiph_save = *mtod(m, struct tcpiphdr *); + + switch (so->so_ffamily) { + case AF_INET: + m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip); + m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip); + ip = mtod(m, struct ip *); + + ip->ip_len = m->m_len; + ip->ip_dst = tcpiph_save.ti_dst; + ip->ip_src = tcpiph_save.ti_src; + ip->ip_p = tcpiph_save.ti_pr; + + ip->ip_ttl = IPDEFTTL; + ip->ip_tos = so->so_iptos; + error = ip_output(so, m); + break; + + case AF_INET6: + m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip6); + m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip6); + ip6 = mtod(m, struct ip6 *); + + ip6->ip_pl = tcpiph_save.ti_len; + ip6->ip_dst = tcpiph_save.ti_dst6; + ip6->ip_src = tcpiph_save.ti_src6; + ip6->ip_nh = tcpiph_save.ti_nh6; + + error = ip6_output(so, m, 0); + break; + + default: + g_assert_not_reached(); + } + + if (error) { +out: + return (error); + } + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + if (sendalot) + goto again; + + return (0); +} + +void +tcp_setpersist(struct tcpcb *tp) +{ + int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + + /* + * Start/restart persistence timer. + */ + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c new file mode 100644 index 0000000000..1db59caa89 --- /dev/null +++ b/slirp/src/tcp_subr.c @@ -0,0 +1,989 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 + * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" + +/* patchable/settable parameters for tcp */ +/* Don't do rfc1323 performance enhancements */ +#define TCP_DO_RFC1323 0 + +/* + * Tcp initialization + */ +void +tcp_init(Slirp *slirp) +{ + slirp->tcp_iss = 1; /* wrong */ + slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; + slirp->tcp_last_so = &slirp->tcb; +} + +void tcp_cleanup(Slirp *slirp) +{ + while (slirp->tcb.so_next != &slirp->tcb) { + tcp_close(sototcpcb(slirp->tcb.so_next)); + } +} + +/* + * Create template to be used to send tcp packets on a connection. + * Call after host entry created, fills + * in a skeletal tcp/ip header, minimizing the amount of work + * necessary when the connection is used. + */ +void +tcp_template(struct tcpcb *tp) +{ + struct socket *so = tp->t_socket; + register struct tcpiphdr *n = &tp->t_template; + + n->ti_mbuf = NULL; + memset(&n->ti, 0, sizeof(n->ti)); + n->ti_x0 = 0; + switch (so->so_ffamily) { + case AF_INET: + n->ti_pr = IPPROTO_TCP; + n->ti_len = htons(sizeof(struct tcphdr)); + n->ti_src = so->so_faddr; + n->ti_dst = so->so_laddr; + n->ti_sport = so->so_fport; + n->ti_dport = so->so_lport; + break; + + case AF_INET6: + n->ti_nh6 = IPPROTO_TCP; + n->ti_len = htons(sizeof(struct tcphdr)); + n->ti_src6 = so->so_faddr6; + n->ti_dst6 = so->so_laddr6; + n->ti_sport = so->so_fport6; + n->ti_dport = so->so_lport6; + break; + + default: + g_assert_not_reached(); + } + + n->ti_seq = 0; + n->ti_ack = 0; + n->ti_x2 = 0; + n->ti_off = 5; + n->ti_flags = 0; + n->ti_win = 0; + n->ti_sum = 0; + n->ti_urp = 0; +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == 0, then we make a copy + * of the tcpiphdr at ti and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection tp->t_template. If flags are given + * then we send a message back to the TCP which originated the + * segment ti, and discard the mbuf containing it and any other + * attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + */ +void +tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, + tcp_seq ack, tcp_seq seq, int flags, unsigned short af) +{ + register int tlen; + int win = 0; + + DEBUG_CALL("tcp_respond"); + DEBUG_ARG("tp = %p", tp); + DEBUG_ARG("ti = %p", ti); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("ack = %u", ack); + DEBUG_ARG("seq = %u", seq); + DEBUG_ARG("flags = %x", flags); + + if (tp) + win = sbspace(&tp->t_socket->so_rcv); + if (m == NULL) { + if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) + return; + tlen = 0; + m->m_data += IF_MAXLINKHDR; + *mtod(m, struct tcpiphdr *) = *ti; + ti = mtod(m, struct tcpiphdr *); + switch (af) { + case AF_INET: + ti->ti.ti_i4.ih_x1 = 0; + break; + case AF_INET6: + ti->ti.ti_i6.ih_x1 = 0; + break; + default: + g_assert_not_reached(); + } + flags = TH_ACK; + } else { + /* + * ti points into m so the next line is just making + * the mbuf point to ti + */ + m->m_data = (char *)ti; + + m->m_len = sizeof (struct tcpiphdr); + tlen = 0; +#define xchg(a,b,type) { type t; t=a; a=b; b=t; } + switch (af) { + case AF_INET: + xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); + xchg(ti->ti_dport, ti->ti_sport, uint16_t); + break; + case AF_INET6: + xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); + xchg(ti->ti_dport, ti->ti_sport, uint16_t); + break; + default: + g_assert_not_reached(); + } +#undef xchg + } + ti->ti_len = htons((uint16_t)(sizeof (struct tcphdr) + tlen)); + tlen += sizeof (struct tcpiphdr); + m->m_len = tlen; + + ti->ti_mbuf = NULL; + ti->ti_x0 = 0; + ti->ti_seq = htonl(seq); + ti->ti_ack = htonl(ack); + ti->ti_x2 = 0; + ti->ti_off = sizeof (struct tcphdr) >> 2; + ti->ti_flags = flags; + if (tp) + ti->ti_win = htons((uint16_t) (win >> tp->rcv_scale)); + else + ti->ti_win = htons((uint16_t)win); + ti->ti_urp = 0; + ti->ti_sum = 0; + ti->ti_sum = cksum(m, tlen); + + struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); + struct ip *ip; + struct ip6 *ip6; + + switch (af) { + case AF_INET: + m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip); + m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip); + ip = mtod(m, struct ip *); + ip->ip_len = m->m_len; + ip->ip_dst = tcpiph_save.ti_dst; + ip->ip_src = tcpiph_save.ti_src; + ip->ip_p = tcpiph_save.ti_pr; + + if (flags & TH_RST) { + ip->ip_ttl = MAXTTL; + } else { + ip->ip_ttl = IPDEFTTL; + } + + ip_output(NULL, m); + break; + + case AF_INET6: + m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip6); + m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) + - sizeof(struct ip6); + ip6 = mtod(m, struct ip6 *); + ip6->ip_pl = tcpiph_save.ti_len; + ip6->ip_dst = tcpiph_save.ti_dst6; + ip6->ip_src = tcpiph_save.ti_src6; + ip6->ip_nh = tcpiph_save.ti_nh6; + + ip6_output(NULL, m, 0); + break; + + default: + g_assert_not_reached(); + } +} + +/* + * Create a new TCP control block, making an + * empty reassembly queue and hooking it to the argument + * protocol control block. + */ +struct tcpcb * +tcp_newtcpcb(struct socket *so) +{ + register struct tcpcb *tp; + + tp = (struct tcpcb *)malloc(sizeof(*tp)); + if (tp == NULL) + return ((struct tcpcb *)0); + + memset((char *) tp, 0, sizeof(struct tcpcb)); + tp->seg_next = tp->seg_prev = (struct tcpiphdr*)tp; + tp->t_maxseg = (so->so_ffamily == AF_INET) ? TCP_MSS : TCP6_MSS; + + tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + tp->t_socket = so; + + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = TCPTV_SRTTDFLT << 2; + tp->t_rttmin = TCPTV_MIN; + + TCPT_RANGESET(tp->t_rxtcur, + ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, + TCPTV_MIN, TCPTV_REXMTMAX); + + tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->t_state = TCPS_CLOSED; + + so->so_tcpcb = tp; + + return (tp); +} + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +struct tcpcb *tcp_drop(struct tcpcb *tp, int err) +{ + DEBUG_CALL("tcp_drop"); + DEBUG_ARG("tp = %p", tp); + DEBUG_ARG("errno = %d", errno); + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_state = TCPS_CLOSED; + (void) tcp_output(tp); + } + return (tcp_close(tp)); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(struct tcpcb *tp) +{ + register struct tcpiphdr *t; + struct socket *so = tp->t_socket; + Slirp *slirp = so->slirp; + register struct mbuf *m; + + DEBUG_CALL("tcp_close"); + DEBUG_ARG("tp = %p", tp); + + /* free the reassembly queue, if any */ + t = tcpfrag_list_first(tp); + while (!tcpfrag_list_end(t, tp)) { + t = tcpiphdr_next(t); + m = tcpiphdr_prev(t)->ti_mbuf; + remque(tcpiphdr2qlink(tcpiphdr_prev(t))); + m_free(m); + } + free(tp); + so->so_tcpcb = NULL; + /* clobber input socket cache if we're closing the cached connection */ + if (so == slirp->tcp_last_so) + slirp->tcp_last_so = &slirp->tcb; + so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); + closesocket(so->s); + sbfree(&so->so_rcv); + sbfree(&so->so_snd); + sofree(so); + return ((struct tcpcb *)0); +} + +/* + * TCP protocol interface to socket abstraction. + */ + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +void +tcp_sockclosed(struct tcpcb *tp) +{ + + DEBUG_CALL("tcp_sockclosed"); + DEBUG_ARG("tp = %p", tp); + + if (!tp) { + return; + } + + switch (tp->t_state) { + + case TCPS_CLOSED: + case TCPS_LISTEN: + case TCPS_SYN_SENT: + tp->t_state = TCPS_CLOSED; + tp = tcp_close(tp); + break; + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + tp->t_state = TCPS_FIN_WAIT_1; + break; + + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_LAST_ACK; + break; + } + tcp_output(tp); +} + +/* + * Connect to a host on the Internet + * Called by tcp_input + * Only do a connect, the tcp fields will be set in tcp_input + * return 0 if there's a result of the connect, + * else return -1 means we're still connecting + * The return value is almost always -1 since the socket is + * nonblocking. Connect returns after the SYN is sent, and does + * not wait for ACK+SYN. + */ +int tcp_fconnect(struct socket *so, unsigned short af) +{ + int ret=0; + + DEBUG_CALL("tcp_fconnect"); + DEBUG_ARG("so = %p", so); + + ret = so->s = slirp_socket(af, SOCK_STREAM, 0); + if (ret >= 0) { + int opt, s=so->s; + struct sockaddr_storage addr; + + slirp_set_nonblock(s); + so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); + slirp_socket_set_fast_reuse(s); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); + opt = 1; + setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); + + addr = so->fhost.ss; + DEBUG_CALL(" connect()ing"); + sotranslate_out(so, &addr); + + /* We don't care what port we get */ + ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); + + /* + * If it's not in progress, it failed, so we just return 0, + * without clearing SS_NOFDREF + */ + soisfconnecting(so); + } + + return(ret); +} + +/* + * Accept the socket and connect to the local-host + * + * We have a problem. The correct thing to do would be + * to first connect to the local-host, and only if the + * connection is accepted, then do an accept() here. + * But, a) we need to know who's trying to connect + * to the socket to be able to SYN the local-host, and + * b) we are already connected to the foreign host by + * the time it gets to accept(), so... We simply accept + * here and SYN the local-host. + */ +void tcp_connect(struct socket *inso) +{ + Slirp *slirp = inso->slirp; + struct socket *so; + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct tcpcb *tp; + int s, opt; + + DEBUG_CALL("tcp_connect"); + DEBUG_ARG("inso = %p", inso); + + /* + * If it's an SS_ACCEPTONCE socket, no need to socreate() + * another socket, just use the accept() socket. + */ + if (inso->so_state & SS_FACCEPTONCE) { + /* FACCEPTONCE already have a tcpcb */ + so = inso; + } else { + so = socreate(slirp); + if (tcp_attach(so) < 0) { + g_free(so); /* NOT sofree */ + return; + } + so->lhost = inso->lhost; + so->so_ffamily = inso->so_ffamily; + } + + tcp_mss(sototcpcb(so), 0); + + s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); + if (s < 0) { + tcp_close(sototcpcb(so)); /* This will sofree() as well */ + return; + } + slirp_set_nonblock(s); + so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); + slirp_socket_set_fast_reuse(s); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); + slirp_socket_set_nodelay(s); + + so->fhost.ss = addr; + sotranslate_accept(so); + + /* Close the accept() socket, set right state */ + if (inso->so_state & SS_FACCEPTONCE) { + /* If we only accept once, close the accept() socket */ + so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); + closesocket(so->s); + + /* Don't select it yet, even though we have an FD */ + /* if it's not FACCEPTONCE, it's already NOFDREF */ + so->so_state = SS_NOFDREF; + } + so->s = s; + so->so_state |= SS_INCOMING; + + so->so_iptos = tcp_tos(so); + tp = sototcpcb(so); + + tcp_template(tp); + + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = slirp->tcp_iss; + slirp->tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(tp); +} + +/* + * Attach a TCPCB to a socket. + */ +int +tcp_attach(struct socket *so) +{ + if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) + return -1; + + insque(so, &so->slirp->tcb); + + return 0; +} + +/* + * Set the socket's type of service field + */ +static const struct tos_t tcptos[] = { + {0, 20, IPTOS_THROUGHPUT, 0}, /* ftp data */ + {21, 21, IPTOS_LOWDELAY, EMU_FTP}, /* ftp control */ + {0, 23, IPTOS_LOWDELAY, 0}, /* telnet */ + {0, 80, IPTOS_THROUGHPUT, 0}, /* WWW */ + {0, 513, IPTOS_LOWDELAY, EMU_RLOGIN|EMU_NOCONNECT}, /* rlogin */ + {0, 544, IPTOS_LOWDELAY, EMU_KSH}, /* kshell */ + {0, 543, IPTOS_LOWDELAY, 0}, /* klogin */ + {0, 6667, IPTOS_THROUGHPUT, EMU_IRC}, /* IRC */ + {0, 6668, IPTOS_THROUGHPUT, EMU_IRC}, /* IRC undernet */ + {0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ + {0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ + {0, 0, 0, 0} +}; + +static struct emu_t *tcpemu = NULL; + +/* + * Return TOS according to the above table + */ +uint8_t +tcp_tos(struct socket *so) +{ + int i = 0; + struct emu_t *emup; + + while(tcptos[i].tos) { + if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || + (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { + so->so_emu = tcptos[i].emu; + return tcptos[i].tos; + } + i++; + } + + /* Nope, lets see if there's a user-added one */ + for (emup = tcpemu; emup; emup = emup->next) { + if ((emup->fport && (ntohs(so->so_fport) == emup->fport)) || + (emup->lport && (ntohs(so->so_lport) == emup->lport))) { + so->so_emu = emup->emu; + return emup->tos; + } + } + + return 0; +} + +/* + * Emulate programs that try and connect to us + * This includes ftp (the data connection is + * initiated by the server) and IRC (DCC CHAT and + * DCC SEND) for now + * + * NOTE: It's possible to crash SLiRP by sending it + * unstandard strings to emulate... if this is a problem, + * more checks are needed here + * + * XXX Assumes the whole command came in one packet + * + * XXX Some ftp clients will have their TOS set to + * LOWDELAY and so Nagel will kick in. Because of this, + * we'll get the first letter, followed by the rest, so + * we simply scan for ORT instead of PORT... + * DCC doesn't have this problem because there's other stuff + * in the packet before the DCC command. + * + * Return 1 if the mbuf m is still valid and should be + * sbappend()ed + * + * NOTE: if you return 0 you MUST m_free() the mbuf! + */ +int +tcp_emu(struct socket *so, struct mbuf *m) +{ + Slirp *slirp = so->slirp; + unsigned n1, n2, n3, n4, n5, n6; + char buff[257]; + uint32_t laddr; + unsigned lport; + char *bptr; + + DEBUG_CALL("tcp_emu"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); + + switch(so->so_emu) { + int x, i; + + /* TODO: IPv6 */ + case EMU_IDENT: + /* + * Identification protocol as per rfc-1413 + */ + + { + struct socket *tmpso; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct sbuf *so_rcv = &so->so_rcv; + + if (m->m_len > so_rcv->sb_datalen + - (so_rcv->sb_wptr - so_rcv->sb_data)) { + return 1; + } + + memcpy(so_rcv->sb_wptr, m->m_data, m->m_len); + so_rcv->sb_wptr += m->m_len; + so_rcv->sb_rptr += m->m_len; + m->m_data[m->m_len] = 0; /* NULL terminate */ + if (strchr(m->m_data, '\r') || strchr(m->m_data, '\n')) { + if (sscanf(so_rcv->sb_data, "%u%*[ ,]%u", &n1, &n2) == 2) { + HTONS(n1); + HTONS(n2); + /* n2 is the one on our host */ + for (tmpso = slirp->tcb.so_next; + tmpso != &slirp->tcb; + tmpso = tmpso->so_next) { + if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && + tmpso->so_lport == n2 && + tmpso->so_faddr.s_addr == so->so_faddr.s_addr && + tmpso->so_fport == n1) { + if (getsockname(tmpso->s, + (struct sockaddr *)&addr, &addrlen) == 0) + n2 = addr.sin_port; + break; + } + } + NTOHS(n1); + NTOHS(n2); + so_rcv->sb_cc = snprintf(so_rcv->sb_data, + so_rcv->sb_datalen, + "%d,%d\r\n", n1, n2); + so_rcv->sb_rptr = so_rcv->sb_data; + so_rcv->sb_wptr = so_rcv->sb_data + so_rcv->sb_cc; + } + } + m_free(m); + return 0; + } + + case EMU_FTP: /* ftp */ + *(m->m_data+m->m_len) = 0; /* NUL terminate for strstr */ + if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { + /* + * Need to emulate the PORT command + */ + x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", + &n1, &n2, &n3, &n4, &n5, &n6, buff); + if (x < 6) + return 1; + + laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); + lport = htons((n5 << 8) | (n6)); + + if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, + lport, SS_FACCEPTONCE)) == NULL) { + return 1; + } + n6 = ntohs(so->so_fport); + + n5 = (n6 >> 8) & 0xff; + n6 &= 0xff; + + laddr = ntohl(so->so_faddr.s_addr); + + n1 = ((laddr >> 24) & 0xff); + n2 = ((laddr >> 16) & 0xff); + n3 = ((laddr >> 8) & 0xff); + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size - m->m_len, + "ORT %d,%d,%d,%d,%d,%d\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + return 1; + } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { + /* + * Need to emulate the PASV response + */ + x = sscanf(bptr, "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", + &n1, &n2, &n3, &n4, &n5, &n6, buff); + if (x < 6) + return 1; + + laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); + lport = htons((n5 << 8) | (n6)); + + if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, + lport, SS_FACCEPTONCE)) == NULL) { + return 1; + } + n6 = ntohs(so->so_fport); + + n5 = (n6 >> 8) & 0xff; + n6 &= 0xff; + + laddr = ntohl(so->so_faddr.s_addr); + + n1 = ((laddr >> 24) & 0xff); + n2 = ((laddr >> 16) & 0xff); + n3 = ((laddr >> 8) & 0xff); + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size - m->m_len, + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + + return 1; + } + + return 1; + + case EMU_KSH: + /* + * The kshell (Kerberos rsh) and shell services both pass + * a local port port number to carry signals to the server + * and stderr to the client. It is passed at the beginning + * of the connection as a NUL-terminated decimal ASCII string. + */ + so->so_emu = 0; + for (lport = 0, i = 0; i < m->m_len-1; ++i) { + if (m->m_data[i] < '0' || m->m_data[i] > '9') + return 1; /* invalid number */ + lport *= 10; + lport += m->m_data[i] - '0'; + } + if (m->m_data[m->m_len-1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) + m->m_len = snprintf(m->m_data, m->m_size, "%d", + ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: + /* + * Need to emulate DCC CHAT, DCC SEND and DCC MOVE + */ + *(m->m_data+m->m_len) = 0; /* NULL terminate the string for strstr */ + if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) + return 1; + + /* The %256s is for the broken mIRC */ + if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, + htonl(laddr), htons(lport), + SS_FACCEPTONCE)) == NULL) { + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size, + "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, + htonl(laddr), htons(lport), + SS_FACCEPTONCE)) == NULL) { + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size, + "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, + htonl(laddr), htons(lport), + SS_FACCEPTONCE)) == NULL) { + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size, + "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } + return 1; + + case EMU_REALAUDIO: + /* + * RealAudio emulation - JP. We must try to parse the incoming + * data and try to find the two characters that contain the + * port number. Then we redirect an udp port and replace the + * number with the real port we got. + * + * The 1.0 beta versions of the player are not supported + * any more. + * + * A typical packet for player version 1.0 (release version): + * + * 0000:50 4E 41 00 05 + * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P + * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH + * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v + * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB + * + * Now the port number 0x1BD7 is found at offset 0x04 of the + * Now the port number 0x1BD7 is found at offset 0x04 of the + * second packet. This time we received five bytes first and + * then the rest. You never know how many bytes you get. + * + * A typical packet for player version 2.0 (beta): + * + * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. + * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 + * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ + * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas + * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B + * + * Port number 0x1BC1 is found at offset 0x0d. + * + * This is just a horrible switch statement. Variable ra tells + * us where we're going. + */ + + bptr = m->m_data; + while (bptr < m->m_data + m->m_len) { + uint16_t p; + static int ra = 0; + char ra_tbl[4]; + + ra_tbl[0] = 0x50; + ra_tbl[1] = 0x4e; + ra_tbl[2] = 0x41; + ra_tbl[3] = 0; + + switch (ra) { + case 0: + case 2: + case 3: + if (*bptr++ != ra_tbl[ra]) { + ra = 0; + continue; + } + break; + + case 1: + /* + * We may get 0x50 several times, ignore them + */ + if (*bptr == 0x50) { + ra = 1; + bptr++; + continue; + } else if (*bptr++ != ra_tbl[ra]) { + ra = 0; + continue; + } + break; + + case 4: + /* + * skip version number + */ + bptr++; + break; + + case 5: + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of + * the player this may need to be modified. + */ + if (*(bptr + 1) == 0x02) + bptr += 8; + else + bptr += 4; + break; + + case 6: + /* This is the field containing the port + * number that RA-player is listening to. + */ + lport = (((uint8_t*)bptr)[0] << 8) + + ((uint8_t *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ + if (lport < 6970 || lport > 7170) + return 1; /* failed */ + + /* try to get udp port between 6970 - 7170 */ + for (p = 6970; p < 7071; p++) { + if (udp_listen(slirp, INADDR_ANY, + htons(p), + so->so_laddr.s_addr, + htons(lport), + SS_FACCEPTONCE)) { + break; + } + } + if (p == 7071) + p = 0; + *(uint8_t *)bptr++ = (p >> 8) & 0xff; + *(uint8_t *)bptr = p & 0xff; + ra = 0; + return 1; /* port redirected, we're done */ + break; + + default: + ra = 0; + } + ra++; + } + return 1; + + default: + /* Ooops, not emulated, won't call tcp_emu again */ + so->so_emu = 0; + return 1; + } +} + +/* + * Do misc. config of SLiRP while its running. + * Return 0 if this connections is to be closed, 1 otherwise, + * return 2 if this is a command-line connection + */ +int tcp_ctl(struct socket *so) +{ + Slirp *slirp = so->slirp; + struct sbuf *sb = &so->so_snd; + struct gfwd_list *ex_ptr; + + DEBUG_CALL("tcp_ctl"); + DEBUG_ARG("so = %p", so); + + /* TODO: IPv6 */ + if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { + /* Check if it's pty_exec */ + for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_fport == so->so_fport && + so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { + if (ex_ptr->write_cb) { + so->s = -1; + so->guestfwd = ex_ptr; + return 1; + } + DEBUG_MISC(" executing %s", ex_ptr->ex_exec); + return fork_exec(so, ex_ptr->ex_exec); + } + } + } + sb->sb_cc = + snprintf(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), + "Error: No application configured.\r\n"); + sb->sb_wptr += sb->sb_cc; + return 0; +} diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c new file mode 100644 index 0000000000..7be54570af --- /dev/null +++ b/slirp/src/tcp_timer.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 + * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp + */ + +#include "slirp.h" + +static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); + +/* + * Fast timeout routine for processing delayed acks + */ +void +tcp_fasttimo(Slirp *slirp) +{ + register struct socket *so; + register struct tcpcb *tp; + + DEBUG_CALL("tcp_fasttimo"); + + so = slirp->tcb.so_next; + if (so) + for (; so != &slirp->tcb; so = so->so_next) + if ((tp = (struct tcpcb *)so->so_tcpcb) && + (tp->t_flags & TF_DELACK)) { + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + } +} + +/* + * Tcp protocol timeout routine called every 500 ms. + * Updates the timers in all active tcb's and + * causes finite state machine actions if timers expire. + */ +void +tcp_slowtimo(Slirp *slirp) +{ + register struct socket *ip, *ipnxt; + register struct tcpcb *tp; + register int i; + + DEBUG_CALL("tcp_slowtimo"); + + /* + * Search through tcb's and update active timers. + */ + ip = slirp->tcb.so_next; + if (ip == NULL) { + return; + } + for (; ip != &slirp->tcb; ip = ipnxt) { + ipnxt = ip->so_next; + tp = sototcpcb(ip); + if (tp == NULL) { + continue; + } + for (i = 0; i < TCPT_NTIMERS; i++) { + if (tp->t_timer[i] && --tp->t_timer[i] == 0) { + tcp_timers(tp,i); + if (ipnxt->so_prev != ip) + goto tpgone; + } + } + tp->t_idle++; + if (tp->t_rtt) + tp->t_rtt++; +tpgone: + ; + } + slirp->tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ + slirp->tcp_now++; /* for timestamps */ +} + +/* + * Cancel all timers for TCP tp. + */ +void +tcp_canceltimers(struct tcpcb *tp) +{ + register int i; + + for (i = 0; i < TCPT_NTIMERS; i++) + tp->t_timer[i] = 0; +} + +const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; + +/* + * TCP timer processing. + */ +static struct tcpcb * +tcp_timers(register struct tcpcb *tp, int timer) +{ + register int rexmt; + + DEBUG_CALL("tcp_timers"); + + switch (timer) { + + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long, or if 2MSL time is up from TIME_WAIT, delete connection + * control block. Otherwise, check again in a bit. + */ + case TCPT_2MSL: + if (tp->t_state != TCPS_TIME_WAIT && + tp->t_idle <= TCP_MAXIDLE) + tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; + else + tp = tcp_close(tp); + break; + + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + case TCPT_REXMT: + + /* + * XXXXX If a packet has timed out, then remove all the queued + * packets for that session. + */ + + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + /* + * This is a hack to suit our terminal server here at the uni of canberra + * since they have trouble with zeroes... It usually lets them through + * unharmed, but under some conditions, it'll eat the zeros. If we + * keep retransmitting it, it'll keep eating the zeroes, so we keep + * retransmitting, and eventually the connection dies... + * (this only happens on incoming data) + * + * So, if we were gonna drop the connection from too many retransmits, + * don't... instead halve the t_maxseg, which might break up the NULLs and + * let them through + * + * *sigh* + */ + + tp->t_maxseg >>= 1; + if (tp->t_maxseg < 32) { + /* + * We tried our best, now the connection must die! + */ + tp->t_rxtshift = TCP_MAXRXTSHIFT; + tp = tcp_drop(tp, tp->t_softerror); + /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ + return (tp); /* XXX */ + } + + /* + * Set rxtshift to 6, which is still at the maximum + * backoff time + */ + tp->t_rxtshift = 6; + } + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * If losing, let the lower level know and try for + * a better route. Also, if we backed off this far, + * our srtt estimate is probably bogus. Clobber it + * so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtt = 0; + /* + * Close the congestion window down to one segment + * (we'll open it by one segment for each ack we get). + * Since we probably have a window's worth of unacked + * data accumulated, this "slow start" keeps us from + * dumping all that data as back-to-back packets (which + * might overwhelm an intermediate gateway). + * + * There are two phases to the opening: Initially we + * open by one mss on each ack. This makes the window + * size increase exponentially with time. If the + * window is larger than the path can handle, this + * exponential growth results in dropped packet(s) + * almost immediately. To get more time between + * drops but still "push" the network to take advantage + * of improving conditions, we switch from exponential + * to linear window opening at some threshold size. + * For a threshold, we use half the current window + * size, truncated to a multiple of the mss. + * + * (the minimum cwnd that will give us exponential + * growth is 2 mss. We don't allow the threshold + * to go below this.) + */ + { + unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + if (win < 2) + win = 2; + tp->snd_cwnd = tp->t_maxseg; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_dupacks = 0; + } + (void) tcp_output(tp); + break; + + /* + * Persistence timer into zero window. + * Force a byte to be output, if possible. + */ + case TCPT_PERSIST: + tcp_setpersist(tp); + tp->t_force = 1; + (void) tcp_output(tp); + tp->t_force = 0; + break; + + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ + case TCPT_KEEP: + if (tp->t_state < TCPS_ESTABLISHED) + goto dropit; + + if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { + if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ + tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0, + tp->t_socket->so_ffamily); + tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; + } else + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; + break; + + dropit: + tp = tcp_drop(tp, 0); + break; + } + + return (tp); +} diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h new file mode 100644 index 0000000000..b25b3911d7 --- /dev/null +++ b/slirp/src/tcp_timer.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp + */ + +#ifndef TCP_TIMER_H +#define TCP_TIMER_H + +/* + * Definitions of the TCP timers. These timers are counted + * down PR_SLOWHZ times a second. + */ +#define TCPT_NTIMERS 4 + +#define TCPT_REXMT 0 /* retransmit */ +#define TCPT_PERSIST 1 /* retransmit persistence */ +#define TCPT_KEEP 2 /* keep alive */ +#define TCPT_2MSL 3 /* 2*msl quiet time timer */ + +/* + * The TCPT_REXMT timer is used to force retransmissions. + * The TCP has the TCPT_REXMT timer set whenever segments + * have been sent for which ACKs are expected but not yet + * received. If an ACK is received which advances tp->snd_una, + * then the retransmit timer is cleared (if there are no more + * outstanding segments) or reset to the base value (if there + * are more ACKs expected). Whenever the retransmit timer goes off, + * we retransmit one unacknowledged segment, and do a backoff + * on the retransmit timer. + * + * The TCPT_PERSIST timer is used to keep window size information + * flowing even if the window goes shut. If all previous transmissions + * have been acknowledged (so that there are no retransmissions in progress), + * and the window is too small to bother sending anything, then we start + * the TCPT_PERSIST timer. When it expires, if the window is nonzero, + * we go to transmit state. Otherwise, at intervals send a single byte + * into the peer's window to force him to update our window information. + * We do this at most as often as TCPT_PERSMIN time intervals, + * but no more frequently than the current estimate of round-trip + * packet time. The TCPT_PERSIST timer is cleared whenever we receive + * a window update from the peer. + * + * The TCPT_KEEP timer is used to keep connections alive. If an + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, + * but not yet established, then we drop the connection. Once the connection + * is established, if the connection is idle for TCPTV_KEEP_IDLE time + * (and keepalives have been enabled on the socket), we begin to probe + * the connection. We force the peer to send us a segment by sending: + * <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK> + * This segment is (deliberately) outside the window, and should elicit + * an ack segment in response from the peer. If, despite the TCPT_KEEP + * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE + * amount of time probing, then we drop the connection. + */ + +/* + * Time constants. + */ +#define TCPTV_MSL ( 5*PR_SLOWHZ) /* max seg lifetime (hah!) */ + +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */ + +#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */ +#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */ +#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */ +#define TCPTV_REXMTMAX ( 12*PR_SLOWHZ) /* max allowable REXMT value */ + +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ + +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ + + +/* + * Force a time value to be in a certain range. + */ +#define TCPT_RANGESET(tv, value, tvmin, tvmax) { \ + (tv) = (value); \ + if ((tv) < (tvmin)) \ + (tv) = (tvmin); \ + else if ((tv) > (tvmax)) \ + (tv) = (tvmax); \ +} + +extern const int tcp_backoff[]; + +struct tcpcb; + +void tcp_fasttimo(Slirp *); +void tcp_slowtimo(Slirp *); +void tcp_canceltimers(struct tcpcb *); + +#endif diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h new file mode 100644 index 0000000000..27ef1a51cb --- /dev/null +++ b/slirp/src/tcp_var.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 1982, 1986, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 + * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp + */ + +#ifndef TCP_VAR_H +#define TCP_VAR_H + +#include "tcpip.h" +#include "tcp_timer.h" + +/* + * Tcp control block, one per tcp; fields: + */ +struct tcpcb { + struct tcpiphdr *seg_next; /* sequencing queue */ + struct tcpiphdr *seg_prev; + short t_state; /* state of this connection */ + short t_timer[TCPT_NTIMERS]; /* tcp timers */ + short t_rxtshift; /* log(2) of rexmt exp. backoff */ + short t_rxtcur; /* current retransmit value */ + short t_dupacks; /* consecutive dup acks recd */ + uint16_t t_maxseg; /* maximum segment size */ + uint8_t t_force; /* 1 if forcing out a byte */ + uint16_t t_flags; +#define TF_ACKNOW 0x0001 /* ack peer immediately */ +#define TF_DELACK 0x0002 /* ack, but try to delay it */ +#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x0008 /* don't use tcp options */ +#define TF_SENTFIN 0x0010 /* have sent FIN */ +#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ + + struct tcpiphdr t_template; /* static skeletal packet for xmit */ + + struct socket *t_socket; /* back pointer to socket */ +/* + * The following fields are used as in the protocol specification. + * See RFC783, Dec. 1981, page 21. + */ +/* send sequence variables */ + tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + uint32_t snd_wnd; /* send window */ +/* receive sequence variables */ + uint32_t rcv_wnd; /* receive window */ + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_up; /* receive urgent pointer */ + tcp_seq irs; /* initial receive sequence number */ +/* + * Additional variables for this implementation. + */ +/* receive variables */ + tcp_seq rcv_adv; /* advertised window */ +/* retransmit variables */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ +/* congestion control (for slow start, source quench, retransmit after loss) */ + uint32_t snd_cwnd; /* congestion-controlled window */ + uint32_t snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ +/* + * transmit timing stuff. See below for scale of srtt and rttvar. + * "Variance" is actually smoothed difference. + */ + short t_idle; /* inactivity time */ + short t_rtt; /* round trip time */ + tcp_seq t_rtseq; /* sequence number being timed */ + short t_srtt; /* smoothed round-trip time */ + short t_rttvar; /* variance in round-trip time */ + uint16_t t_rttmin; /* minimum rtt allowed */ + uint32_t max_sndwnd; /* largest window peer has offered */ + +/* out-of-band data */ + uint8_t t_oobflags; /* have some */ + uint8_t t_iobc; /* input character */ +#define TCPOOB_HAVEDATA 0x01 +#define TCPOOB_HADDATA 0x02 + short t_softerror; /* possible error not yet reported */ + +/* RFC 1323 variables */ + uint8_t snd_scale; /* window scaling for send window */ + uint8_t rcv_scale; /* window scaling for recv window */ + uint8_t request_r_scale; /* pending window scaling */ + uint8_t requested_s_scale; + uint32_t ts_recent; /* timestamp echo data */ + uint32_t ts_recent_age; /* when last updated */ + tcp_seq last_ack_sent; + +}; + +#define sototcpcb(so) ((so)->so_tcpcb) + +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This macro assumes that the value of TCP_RTTVAR_SCALE + * is the same as the multiplier for rttvar. + */ +#define TCP_REXMTVAL(tp) \ + (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) + +#endif diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h new file mode 100644 index 0000000000..07dbf2c432 --- /dev/null +++ b/slirp/src/tcpip.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 + * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp + */ + +#ifndef TCPIP_H +#define TCPIP_H + +/* + * Tcp+ip header, after ip options removed. + */ +struct tcpiphdr { + struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ + union { + struct { + struct in_addr ih_src; /* source internet address */ + struct in_addr ih_dst; /* destination internet address */ + uint8_t ih_x1; /* (unused) */ + uint8_t ih_pr; /* protocol */ + } ti_i4; + struct { + struct in6_addr ih_src; + struct in6_addr ih_dst; + uint8_t ih_x1; + uint8_t ih_nh; + } ti_i6; + } ti; + uint16_t ti_x0; + uint16_t ti_len; /* protocol length */ + struct tcphdr ti_t; /* tcp header */ +}; +#define ti_mbuf ih_mbuf.mptr +#define ti_pr ti.ti_i4.ih_pr +#define ti_src ti.ti_i4.ih_src +#define ti_dst ti.ti_i4.ih_dst +#define ti_src6 ti.ti_i6.ih_src +#define ti_dst6 ti.ti_i6.ih_dst +#define ti_nh6 ti.ti_i6.ih_nh +#define ti_sport ti_t.th_sport +#define ti_dport ti_t.th_dport +#define ti_seq ti_t.th_seq +#define ti_ack ti_t.th_ack +#define ti_x2 ti_t.th_x2 +#define ti_off ti_t.th_off +#define ti_flags ti_t.th_flags +#define ti_win ti_t.th_win +#define ti_sum ti_t.th_sum +#define ti_urp ti_t.th_urp + +#define tcpiphdr2qlink(T) ((struct qlink*)(((char*)(T)) - sizeof(struct qlink))) +#define qlink2tcpiphdr(Q) ((struct tcpiphdr*)(((char*)(Q)) + sizeof(struct qlink))) +#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) +#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) +#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) +#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink*)(T)) +#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr*)(T)) + +/* This is the difference between the size of a tcpiphdr structure, and the + * size of actual ip+tcp headers, rounded up since we need to align data. */ +#define TCPIPHDR_DELTA\ + (MAX(0,\ + (sizeof(struct tcpiphdr)\ + - sizeof(struct ip) - sizeof(struct tcphdr) + 3) & ~3)) + +/* + * Just a clean way to get to the first byte + * of the packet + */ +struct tcpiphdr_2 { + struct tcpiphdr dummy; + char first_char; +}; + +#endif diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c new file mode 100644 index 0000000000..2d8f978786 --- /dev/null +++ b/slirp/src/tftp.c @@ -0,0 +1,462 @@ +/* + * tftp.c - a simple, read-only tftp server for qemu + * + * Copyright (c) 2004 Magnus Damm <damm@opensource.se> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "slirp.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +static inline int tftp_session_in_use(struct tftp_session *spt) +{ + return (spt->slirp != NULL); +} + +static inline void tftp_session_update(struct tftp_session *spt) +{ + spt->timestamp = curtime; +} + +static void tftp_session_terminate(struct tftp_session *spt) +{ + if (spt->fd >= 0) { + close(spt->fd); + spt->fd = -1; + } + g_free(spt->filename); + spt->slirp = NULL; +} + +static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, + struct tftp_t *tp) +{ + struct tftp_session *spt; + int k; + + for (k = 0; k < TFTP_SESSIONS_MAX; k++) { + spt = &slirp->tftp_sessions[k]; + + if (!tftp_session_in_use(spt)) + goto found; + + /* sessions time out after 5 inactive seconds */ + if ((int)(curtime - spt->timestamp) > 5000) { + tftp_session_terminate(spt); + goto found; + } + } + + return -1; + + found: + memset(spt, 0, sizeof(*spt)); + memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); + spt->fd = -1; + spt->block_size = 512; + spt->client_port = tp->udp.uh_sport; + spt->slirp = slirp; + + tftp_session_update(spt); + + return k; +} + +static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, + struct tftp_t *tp) +{ + struct tftp_session *spt; + int k; + + for (k = 0; k < TFTP_SESSIONS_MAX; k++) { + spt = &slirp->tftp_sessions[k]; + + if (tftp_session_in_use(spt)) { + if (sockaddr_equal(&spt->client_addr, srcsas)) { + if (spt->client_port == tp->udp.uh_sport) { + return k; + } + } + } + } + + return -1; +} + +static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, + uint8_t *buf, int len) +{ + int bytes_read = 0; + + if (spt->fd < 0) { + spt->fd = open(spt->filename, O_RDONLY | O_BINARY); + } + + if (spt->fd < 0) { + return -1; + } + + if (len) { + lseek(spt->fd, block_nr * spt->block_size, SEEK_SET); + + bytes_read = read(spt->fd, buf, len); + } + + return bytes_read; +} + +static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, + struct mbuf *m) +{ + struct tftp_t *tp; + + memset(m->m_data, 0, m->m_size); + + m->m_data += IF_MAXLINKHDR; + if (spt->client_addr.ss_family == AF_INET6) { + m->m_data += sizeof(struct ip6); + } else { + m->m_data += sizeof(struct ip); + } + tp = (void *)m->m_data; + m->m_data += sizeof(struct udphdr); + + return tp; +} + +static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, + struct tftp_t *recv_tp) +{ + if (spt->client_addr.ss_family == AF_INET6) { + struct sockaddr_in6 sa6, da6; + + sa6.sin6_addr = spt->slirp->vhost_addr6; + sa6.sin6_port = recv_tp->udp.uh_dport; + da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; + da6.sin6_port = spt->client_port; + + udp6_output(NULL, m, &sa6, &da6); + } else { + struct sockaddr_in sa4, da4; + + sa4.sin_addr = spt->slirp->vhost_addr; + sa4.sin_port = recv_tp->udp.uh_dport; + da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; + da4.sin_port = spt->client_port; + + udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); + } +} + +static int tftp_send_oack(struct tftp_session *spt, + const char *keys[], uint32_t values[], int nb, + struct tftp_t *recv_tp) +{ + struct mbuf *m; + struct tftp_t *tp; + int i, n = 0; + + m = m_get(spt->slirp); + + if (!m) + return -1; + + tp = tftp_prep_mbuf_data(spt, m); + + tp->tp_op = htons(TFTP_OACK); + for (i = 0; i < nb; i++) { + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", + keys[i]) + 1; + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", + values[i]) + 1; + } + + m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + n + - sizeof(struct udphdr); + tftp_udp_output(spt, m, recv_tp); + + return 0; +} + +static void tftp_send_error(struct tftp_session *spt, + uint16_t errorcode, const char *msg, + struct tftp_t *recv_tp) +{ + struct mbuf *m; + struct tftp_t *tp; + + DEBUG_TFTP("tftp error msg: %s", msg); + + m = m_get(spt->slirp); + + if (!m) { + goto out; + } + + tp = tftp_prep_mbuf_data(spt, m); + + tp->tp_op = htons(TFTP_ERROR); + tp->x.tp_error.tp_error_code = htons(errorcode); + slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), msg); + + m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + strlen(msg) + - sizeof(struct udphdr); + tftp_udp_output(spt, m, recv_tp); + +out: + tftp_session_terminate(spt); +} + +static void tftp_send_next_block(struct tftp_session *spt, + struct tftp_t *recv_tp) +{ + struct mbuf *m; + struct tftp_t *tp; + int nobytes; + + m = m_get(spt->slirp); + + if (!m) { + return; + } + + tp = tftp_prep_mbuf_data(spt, m); + + tp->tp_op = htons(TFTP_DATA); + tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); + + nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, + spt->block_size); + + if (nobytes < 0) { + m_free(m); + + /* send "file not found" error back */ + + tftp_send_error(spt, 1, "File not found", tp); + + return; + } + + m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) + - sizeof(struct udphdr); + tftp_udp_output(spt, m, recv_tp); + + if (nobytes == spt->block_size) { + tftp_session_update(spt); + } + else { + tftp_session_terminate(spt); + } + + spt->block_nr++; +} + +static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, + struct tftp_t *tp, int pktlen) +{ + struct tftp_session *spt; + int s, k; + size_t prefix_len; + char *req_fname; + const char *option_name[2]; + uint32_t option_value[2]; + int nb_options = 0; + + /* check if a session already exists and if so terminate it */ + s = tftp_session_find(slirp, srcsas, tp); + if (s >= 0) { + tftp_session_terminate(&slirp->tftp_sessions[s]); + } + + s = tftp_session_allocate(slirp, srcsas, tp); + + if (s < 0) { + return; + } + + spt = &slirp->tftp_sessions[s]; + + /* unspecified prefix means service disabled */ + if (!slirp->tftp_prefix) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + /* skip header fields */ + k = 0; + pktlen -= offsetof(struct tftp_t, x.tp_buf); + + /* prepend tftp_prefix */ + prefix_len = strlen(slirp->tftp_prefix); + spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); + memcpy(spt->filename, slirp->tftp_prefix, prefix_len); + spt->filename[prefix_len] = '/'; + + /* get name */ + req_fname = spt->filename + prefix_len + 1; + + while (1) { + if (k >= TFTP_FILENAME_MAX || k >= pktlen) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + req_fname[k] = tp->x.tp_buf[k]; + if (req_fname[k++] == '\0') { + break; + } + } + + DEBUG_TFTP("tftp rrq file: %s", req_fname); + + /* check mode */ + if ((pktlen - k) < 6) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { + tftp_send_error(spt, 4, "Unsupported transfer mode", tp); + return; + } + + k += 6; /* skipping octet */ + + /* do sanity checks on the filename */ + if (!strncmp(req_fname, "../", 3) || + req_fname[strlen(req_fname) - 1] == '/' || + strstr(req_fname, "/../")) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + /* check if the file exists */ + if (tftp_read_data(spt, 0, NULL, 0) < 0) { + tftp_send_error(spt, 1, "File not found", tp); + return; + } + + if (tp->x.tp_buf[pktlen - 1] != 0) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { + const char *key, *value; + + key = &tp->x.tp_buf[k]; + k += strlen(key) + 1; + + if (k >= pktlen) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + value = &tp->x.tp_buf[k]; + k += strlen(value) + 1; + + if (strcasecmp(key, "tsize") == 0) { + int tsize = atoi(value); + struct stat stat_p; + + if (tsize == 0) { + if (stat(spt->filename, &stat_p) == 0) + tsize = stat_p.st_size; + else { + tftp_send_error(spt, 1, "File not found", tp); + return; + } + } + + option_name[nb_options] = "tsize"; + option_value[nb_options] = tsize; + nb_options++; + } else if (strcasecmp(key, "blksize") == 0) { + int blksize = atoi(value); + + /* Accept blksize up to our maximum size */ + if (blksize > 0) { + spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); + option_name[nb_options] = "blksize"; + option_value[nb_options] = spt->block_size; + nb_options++; + } + } + } + + if (nb_options > 0) { + assert(nb_options <= G_N_ELEMENTS(option_name)); + tftp_send_oack(spt, option_name, option_value, nb_options, tp); + return; + } + + spt->block_nr = 0; + tftp_send_next_block(spt, tp); +} + +static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, + struct tftp_t *tp, int pktlen) +{ + int s; + + s = tftp_session_find(slirp, srcsas, tp); + + if (s < 0) { + return; + } + + tftp_send_next_block(&slirp->tftp_sessions[s], tp); +} + +static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, + struct tftp_t *tp, int pktlen) +{ + int s; + + s = tftp_session_find(slirp, srcsas, tp); + + if (s < 0) { + return; + } + + tftp_session_terminate(&slirp->tftp_sessions[s]); +} + +void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) +{ + struct tftp_t *tp = (struct tftp_t *)m->m_data; + + switch(ntohs(tp->tp_op)) { + case TFTP_RRQ: + tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); + break; + + case TFTP_ACK: + tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); + break; + + case TFTP_ERROR: + tftp_handle_error(m->slirp, srcsas, tp, m->m_len); + break; + } +} diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h new file mode 100644 index 0000000000..a4c4a64e64 --- /dev/null +++ b/slirp/src/tftp.h @@ -0,0 +1,51 @@ +/* tftp defines */ + +#ifndef SLIRP_TFTP_H +#define SLIRP_TFTP_H + +#define TFTP_SESSIONS_MAX 20 + +#define TFTP_SERVER 69 + +#define TFTP_RRQ 1 +#define TFTP_WRQ 2 +#define TFTP_DATA 3 +#define TFTP_ACK 4 +#define TFTP_ERROR 5 +#define TFTP_OACK 6 + +#define TFTP_FILENAME_MAX 512 +#define TFTP_BLOCKSIZE_MAX 1428 + +struct tftp_t { + struct udphdr udp; + uint16_t tp_op; + union { + struct { + uint16_t tp_block_nr; + uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; + } tp_data; + struct { + uint16_t tp_error_code; + uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; + } tp_error; + char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; + } x; +} __attribute__((packed)); + +struct tftp_session { + Slirp *slirp; + char *filename; + int fd; + uint16_t block_size; + + struct sockaddr_storage client_addr; + uint16_t client_port; + uint32_t block_nr; + + int timestamp; +}; + +void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); + +#endif diff --git a/slirp/src/udp.c b/slirp/src/udp.c new file mode 100644 index 0000000000..fa9f4a08bd --- /dev/null +++ b/slirp/src/udp.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 + * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "slirp.h" +#include "ip_icmp.h" + +static uint8_t udp_tos(struct socket *so); + +void +udp_init(Slirp *slirp) +{ + slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; + slirp->udp_last_so = &slirp->udb; +} + +void udp_cleanup(Slirp *slirp) +{ + while (slirp->udb.so_next != &slirp->udb) { + udp_detach(slirp->udb.so_next); + } +} + +/* m->m_data points at ip packet header + * m->m_len length ip packet + * ip->ip_len length data (IPDU) + */ +void +udp_input(register struct mbuf *m, int iphlen) +{ + Slirp *slirp = m->slirp; + register struct ip *ip; + register struct udphdr *uh; + int len; + struct ip save_ip; + struct socket *so; + struct sockaddr_storage lhost; + struct sockaddr_in *lhost4; + + DEBUG_CALL("udp_input"); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("iphlen = %d", iphlen); + + /* + * Strip IP options, if any; should skip this, + * make available to user, and use on returned packets, + * but we don't yet have a way to check the checksum + * with options still present. + */ + if(iphlen > sizeof(struct ip)) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + + /* + * Get IP and UDP header together in first mbuf. + */ + ip = mtod(m, struct ip *); + uh = (struct udphdr *)((char *)ip + iphlen); + + /* + * Make mbuf data length reflect UDP length. + * If not enough data to reflect UDP length, drop. + */ + len = ntohs((uint16_t)uh->uh_ulen); + + if (ip->ip_len != len) { + if (len > ip->ip_len) { + goto bad; + } + m_adj(m, len - ip->ip_len); + ip->ip_len = len; + } + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip = *ip; + save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ + + /* + * Checksum extended UDP header and data. + */ + if (uh->uh_sum) { + memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); + ((struct ipovly *)ip)->ih_x1 = 0; + ((struct ipovly *)ip)->ih_len = uh->uh_ulen; + if(cksum(m, len + sizeof(struct ip))) { + goto bad; + } + } + + lhost.ss_family = AF_INET; + lhost4 = (struct sockaddr_in *) &lhost; + lhost4->sin_addr = ip->ip_src; + lhost4->sin_port = uh->uh_sport; + + /* + * handle DHCP/BOOTP + */ + if (ntohs(uh->uh_dport) == BOOTP_SERVER && + (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || + ip->ip_dst.s_addr == 0xffffffff)) { + bootp_input(m); + goto bad; + } + + /* + * handle TFTP + */ + if (ntohs(uh->uh_dport) == TFTP_SERVER && + ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { + m->m_data += iphlen; + m->m_len -= iphlen; + tftp_input(&lhost, m); + m->m_data -= iphlen; + m->m_len += iphlen; + goto bad; + } + + if (slirp->restricted) { + goto bad; + } + + /* + * Locate pcb for datagram. + */ + so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); + + if (so == NULL) { + /* + * If there's no socket for this packet, + * create one + */ + so = socreate(slirp); + if (udp_attach(so, AF_INET) == -1) { + DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); + sofree(so); + goto bad; + } + + /* + * Setup fields + */ + so->so_lfamily = AF_INET; + so->so_laddr = ip->ip_src; + so->so_lport = uh->uh_sport; + + if ((so->so_iptos = udp_tos(so)) == 0) + so->so_iptos = ip->ip_tos; + + /* + * XXXXX Here, check if it's in udpexec_list, + * and if it is, do the fork_exec() etc. + */ + } + + so->so_ffamily = AF_INET; + so->so_faddr = ip->ip_dst; /* XXX */ + so->so_fport = uh->uh_dport; /* XXX */ + + iphlen += sizeof(struct udphdr); + m->m_len -= iphlen; + m->m_data += iphlen; + + /* + * Now we sendto() the packet. + */ + if(sosendto(so,m) == -1) { + m->m_len += iphlen; + m->m_data -= iphlen; + *ip=save_ip; + DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); + icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, + strerror(errno)); + goto bad; + } + + m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ + + /* restore the orig mbuf packet */ + m->m_len += iphlen; + m->m_data -= iphlen; + *ip=save_ip; + so->so_m=m; /* ICMP backup */ + + return; +bad: + m_free(m); +} + +int udp_output(struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos) +{ + register struct udpiphdr *ui; + int error = 0; + + DEBUG_CALL("udp_output"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); + DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); + + /* + * Adjust for header + */ + m->m_data -= sizeof(struct udpiphdr); + m->m_len += sizeof(struct udpiphdr); + + /* + * Fill in mbuf with extended UDP header + * and addresses and length put into network format. + */ + ui = mtod(m, struct udpiphdr *); + memset(&ui->ui_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr)); + ui->ui_x1 = 0; + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons(m->m_len - sizeof(struct ip)); + /* XXXXX Check for from-one-location sockets, or from-any-location sockets */ + ui->ui_src = saddr->sin_addr; + ui->ui_dst = daddr->sin_addr; + ui->ui_sport = saddr->sin_port; + ui->ui_dport = daddr->sin_port; + ui->ui_ulen = ui->ui_len; + + /* + * Stuff checksum and output datagram. + */ + ui->ui_sum = 0; + if ((ui->ui_sum = cksum(m, m->m_len)) == 0) + ui->ui_sum = 0xffff; + ((struct ip *)ui)->ip_len = m->m_len; + + ((struct ip *)ui)->ip_ttl = IPDEFTTL; + ((struct ip *)ui)->ip_tos = iptos; + + error = ip_output(so, m); + + return (error); +} + +int +udp_attach(struct socket *so, unsigned short af) +{ + so->s = slirp_socket(af, SOCK_DGRAM, 0); + if (so->s != -1) { + so->so_expire = curtime + SO_EXPIRE; + insque(so, &so->slirp->udb); + } + return(so->s); +} + +void +udp_detach(struct socket *so) +{ + so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); + closesocket(so->s); + sofree(so); +} + +static const struct tos_t udptos[] = { + {0, 53, IPTOS_LOWDELAY, 0}, /* DNS */ + {0, 0, 0, 0} +}; + +static uint8_t +udp_tos(struct socket *so) +{ + int i = 0; + + while(udptos[i].tos) { + if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || + (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { + so->so_emu = udptos[i].emu; + return udptos[i].tos; + } + i++; + } + + return 0; +} + +struct socket * +udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, uint32_t laddr, + unsigned lport, int flags) +{ + /* TODO: IPv6 */ + struct sockaddr_in addr; + struct socket *so; + socklen_t addrlen = sizeof(struct sockaddr_in); + + so = socreate(slirp); + so->s = slirp_socket(AF_INET,SOCK_DGRAM,0); + if (so->s < 0) { + sofree(so); + return NULL; + } + so->so_expire = curtime + SO_EXPIRE; + insque(so, &slirp->udb); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = haddr; + addr.sin_port = hport; + + if (bind(so->s,(struct sockaddr *)&addr, addrlen) < 0) { + udp_detach(so); + return NULL; + } + slirp_socket_set_fast_reuse(so->s); + + getsockname(so->s,(struct sockaddr *)&addr,&addrlen); + so->fhost.sin = addr; + sotranslate_accept(so); + so->so_lfamily = AF_INET; + so->so_lport = lport; + so->so_laddr.s_addr = laddr; + if (flags != SS_FACCEPTONCE) + so->so_expire = 0; + + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_ISFCONNECTED | flags; + + return so; +} diff --git a/slirp/src/udp.h b/slirp/src/udp.h new file mode 100644 index 0000000000..3d29504caa --- /dev/null +++ b/slirp/src/udp.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp.h 8.1 (Berkeley) 6/10/93 + * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp + */ + +#ifndef UDP_H +#define UDP_H + +#define UDP_TTL 0x60 +#define UDP_UDPDATALEN 16192 + +/* + * Udp protocol header. + * Per RFC 768, September, 1981. + */ +struct udphdr { + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + int16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +}; + +/* + * UDP kernel structures and variables. + */ +struct udpiphdr { + struct ipovly ui_i; /* overlaid ip structure */ + struct udphdr ui_u; /* udp header */ +}; +#define ui_mbuf ui_i.ih_mbuf.mptr +#define ui_x1 ui_i.ih_x1 +#define ui_pr ui_i.ih_pr +#define ui_len ui_i.ih_len +#define ui_src ui_i.ih_src +#define ui_dst ui_i.ih_dst +#define ui_sport ui_u.uh_sport +#define ui_dport ui_u.uh_dport +#define ui_ulen ui_u.uh_ulen +#define ui_sum ui_u.uh_sum + +/* + * Names for UDP sysctl objects + */ +#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ +#define UDPCTL_MAXID 2 + +struct mbuf; + +void udp_init(Slirp *); +void udp_cleanup(Slirp *); +void udp_input(register struct mbuf *, int); +int udp_attach(struct socket *, unsigned short af); +void udp_detach(struct socket *); +struct socket * udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, + int); +int udp_output(struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos); + +void udp6_input(register struct mbuf *); +int udp6_output(struct socket *so, struct mbuf *m, + struct sockaddr_in6 *saddr, struct sockaddr_in6 *daddr); + +#endif diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c new file mode 100644 index 0000000000..be5cba1f54 --- /dev/null +++ b/slirp/src/udp6.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2013 + * Guillaume Subiron + */ + +#include "slirp.h" +#include "udp.h" +#include "dhcpv6.h" + +void udp6_input(struct mbuf *m) +{ + Slirp *slirp = m->slirp; + struct ip6 *ip, save_ip; + struct udphdr *uh; + int iphlen = sizeof(struct ip6); + int len; + struct socket *so; + struct sockaddr_in6 lhost; + + DEBUG_CALL("udp6_input"); + DEBUG_ARG("m = %p", m); + + if (slirp->restricted) { + goto bad; + } + + ip = mtod(m, struct ip6 *); + m->m_len -= iphlen; + m->m_data += iphlen; + uh = mtod(m, struct udphdr *); + m->m_len += iphlen; + m->m_data -= iphlen; + + if (ip6_cksum(m)) { + goto bad; + } + + len = ntohs((uint16_t)uh->uh_ulen); + + /* + * Make mbuf data length reflect UDP length. + * If not enough data to reflect UDP length, drop. + */ + if (ntohs(ip->ip_pl) != len) { + if (len > ntohs(ip->ip_pl)) { + goto bad; + } + m_adj(m, len - ntohs(ip->ip_pl)); + ip->ip_pl = htons(len); + } + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip = *ip; + + /* Locate pcb for datagram. */ + lhost.sin6_family = AF_INET6; + lhost.sin6_addr = ip->ip_src; + lhost.sin6_port = uh->uh_sport; + + /* handle DHCPv6 */ + if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && + (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || + in6_dhcp_multicast(&ip->ip_dst))) { + m->m_data += iphlen; + m->m_len -= iphlen; + dhcpv6_input(&lhost, m); + m->m_data -= iphlen; + m->m_len += iphlen; + goto bad; + } + + /* handle TFTP */ + if (ntohs(uh->uh_dport) == TFTP_SERVER && + !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { + m->m_data += iphlen; + m->m_len -= iphlen; + tftp_input((struct sockaddr_storage *)&lhost, m); + m->m_data -= iphlen; + m->m_len += iphlen; + goto bad; + } + + so = solookup(&slirp->udp_last_so, &slirp->udb, + (struct sockaddr_storage *) &lhost, NULL); + + if (so == NULL) { + /* If there's no socket for this packet, create one. */ + so = socreate(slirp); + if (udp_attach(so, AF_INET6) == -1) { + DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); + sofree(so); + goto bad; + } + + /* Setup fields */ + so->so_lfamily = AF_INET6; + so->so_laddr6 = ip->ip_src; + so->so_lport6 = uh->uh_sport; + } + + so->so_ffamily = AF_INET6; + so->so_faddr6 = ip->ip_dst; /* XXX */ + so->so_fport6 = uh->uh_dport; /* XXX */ + + iphlen += sizeof(struct udphdr); + m->m_len -= iphlen; + m->m_data += iphlen; + + /* + * Now we sendto() the packet. + */ + if (sosendto(so, m) == -1) { + m->m_len += iphlen; + m->m_data -= iphlen; + *ip = save_ip; + DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); + icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); + goto bad; + } + + m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ + + /* restore the orig mbuf packet */ + m->m_len += iphlen; + m->m_data -= iphlen; + *ip = save_ip; + so->so_m = m; + + return; +bad: + m_free(m); +} + +int udp6_output(struct socket *so, struct mbuf *m, + struct sockaddr_in6 *saddr, struct sockaddr_in6 *daddr) +{ + struct ip6 *ip; + struct udphdr *uh; + + DEBUG_CALL("udp6_output"); + DEBUG_ARG("so = %p", so); + DEBUG_ARG("m = %p", m); + + /* adjust for header */ + m->m_data -= sizeof(struct udphdr); + m->m_len += sizeof(struct udphdr); + uh = mtod(m, struct udphdr *); + m->m_data -= sizeof(struct ip6); + m->m_len += sizeof(struct ip6); + ip = mtod(m, struct ip6 *); + + /* Build IP header */ + ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); + ip->ip_nh = IPPROTO_UDP; + ip->ip_src = saddr->sin6_addr; + ip->ip_dst = daddr->sin6_addr; + + /* Build UDP header */ + uh->uh_sport = saddr->sin6_port; + uh->uh_dport = daddr->sin6_port; + uh->uh_ulen = ip->ip_pl; + uh->uh_sum = 0; + uh->uh_sum = ip6_cksum(m); + if (uh->uh_sum == 0) { + uh->uh_sum = 0xffff; + } + + return ip6_output(so, m, 0); +} diff --git a/slirp/src/util.c b/slirp/src/util.c new file mode 100644 index 0000000000..5ec2fa87ab --- /dev/null +++ b/slirp/src/util.c @@ -0,0 +1,367 @@ +/* + * util.c (mostly based on QEMU os-win32.c) + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010-2016 Red Hat, Inc. + * + * QEMU library functions for win32 which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "util.h" + +#include <glib.h> +#include <fcntl.h> +#include <stdint.h> + +#if defined(_WIN32) +int slirp_inet_aton(const char *cp, struct in_addr *ia) +{ + uint32_t addr = inet_addr(cp); + if (addr == 0xffffffff) { + return 0; + } + ia->s_addr = addr; + return 1; +} +#endif + +void slirp_set_nonblock(int fd) +{ +#ifndef _WIN32 + int f; + f = fcntl(fd, F_GETFL); + assert(f != -1); + f = fcntl(fd, F_SETFL, f | O_NONBLOCK); + assert(f != -1); +#else + unsigned long opt = 1; + ioctlsocket(fd, FIONBIO, &opt); +#endif +} + +static void slirp_set_cloexec(int fd) +{ +#ifndef _WIN32 + int f; + f = fcntl(fd, F_GETFD); + assert(f != -1); + f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); + assert(f != -1); +#endif +} + +/* + * Opens a socket with FD_CLOEXEC set + */ +int slirp_socket(int domain, int type, int protocol) +{ + int ret; + +#ifdef SOCK_CLOEXEC + ret = socket(domain, type | SOCK_CLOEXEC, protocol); + if (ret != -1 || errno != EINVAL) { + return ret; + } +#endif + ret = socket(domain, type, protocol); + if (ret >= 0) { + slirp_set_cloexec(ret); + } + + return ret; +} + +#ifdef _WIN32 +static int socket_error(void) +{ + switch (WSAGetLastError()) { + case 0: + return 0; + case WSAEINTR: + return EINTR; + case WSAEINVAL: + return EINVAL; + case WSA_INVALID_HANDLE: + return EBADF; + case WSA_NOT_ENOUGH_MEMORY: + return ENOMEM; + case WSA_INVALID_PARAMETER: + return EINVAL; + case WSAENAMETOOLONG: + return ENAMETOOLONG; + case WSAENOTEMPTY: + return ENOTEMPTY; + case WSAEWOULDBLOCK: + /* not using EWOULDBLOCK as we don't want code to have + * to check both EWOULDBLOCK and EAGAIN */ + return EAGAIN; + case WSAEINPROGRESS: + return EINPROGRESS; + case WSAEALREADY: + return EALREADY; + case WSAENOTSOCK: + return ENOTSOCK; + case WSAEDESTADDRREQ: + return EDESTADDRREQ; + case WSAEMSGSIZE: + return EMSGSIZE; + case WSAEPROTOTYPE: + return EPROTOTYPE; + case WSAENOPROTOOPT: + return ENOPROTOOPT; + case WSAEPROTONOSUPPORT: + return EPROTONOSUPPORT; + case WSAEOPNOTSUPP: + return EOPNOTSUPP; + case WSAEAFNOSUPPORT: + return EAFNOSUPPORT; + case WSAEADDRINUSE: + return EADDRINUSE; + case WSAEADDRNOTAVAIL: + return EADDRNOTAVAIL; + case WSAENETDOWN: + return ENETDOWN; + case WSAENETUNREACH: + return ENETUNREACH; + case WSAENETRESET: + return ENETRESET; + case WSAECONNABORTED: + return ECONNABORTED; + case WSAECONNRESET: + return ECONNRESET; + case WSAENOBUFS: + return ENOBUFS; + case WSAEISCONN: + return EISCONN; + case WSAENOTCONN: + return ENOTCONN; + case WSAETIMEDOUT: + return ETIMEDOUT; + case WSAECONNREFUSED: + return ECONNREFUSED; + case WSAELOOP: + return ELOOP; + case WSAEHOSTUNREACH: + return EHOSTUNREACH; + default: + return EIO; + } +} + +#undef ioctlsocket +int slirp_ioctlsocket_wrap(int fd, int req, void *val) +{ + int ret; + ret = ioctlsocket(fd, req, val); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef closesocket +int slirp_closesocket_wrap(int fd) +{ + int ret; + ret = closesocket(fd); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef connect +int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) +{ + int ret; + ret = connect(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef listen +int slirp_listen_wrap(int sockfd, int backlog) +{ + int ret; + ret = listen(sockfd, backlog); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef bind +int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) +{ + int ret; + ret = bind(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef socket +int slirp_socket_wrap(int domain, int type, int protocol) +{ + int ret; + ret = socket(domain, type, protocol); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef accept +int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) +{ + int ret; + ret = accept(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef shutdown +int slirp_shutdown_wrap(int sockfd, int how) +{ + int ret; + ret = shutdown(sockfd, how); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef getsockopt +int slirp_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, int *optlen) +{ + int ret; + ret = getsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef setsockopt +int slirp_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, int optlen) +{ + int ret; + ret = setsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef getpeername +int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, + int *addrlen) +{ + int ret; + ret = getpeername(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef getsockname +int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, + int *addrlen) +{ + int ret; + ret = getsockname(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef send +ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) +{ + int ret; + ret = send(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef sendto +ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, int addrlen) +{ + int ret; + ret = sendto(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef recv +ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) +{ + int ret; + ret = recv(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + +#undef recvfrom +ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, int *addrlen) +{ + int ret; + ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} +#endif /* WIN32 */ + +void slirp_pstrcpy(char *buf, int buf_size, const char *str) +{ + int c; + char *q = buf; + + if (buf_size <= 0) + return; + + for(;;) { + c = *str++; + if (c == 0 || q >= buf + buf_size - 1) + break; + *q++ = c; + } + *q = '\0'; +} diff --git a/slirp/src/util.h b/slirp/src/util.h new file mode 100644 index 0000000000..e94ee4e7f1 --- /dev/null +++ b/slirp/src/util.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010-2019 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef UTIL_H_ +#define UTIL_H_ + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <inttypes.h> + +#ifdef _WIN32 +#include <winsock2.h> +#include <windows.h> +#else +#include <sys/socket.h> +#include <netinet/tcp.h> +#include <netinet/in.h> +#endif + +#if defined(_WIN32) +# define SLIRP_PACKED __attribute__((gcc_struct, packed)) +#else +# define SLIRP_PACKED __attribute__((packed)) +#endif + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) __extension__ ({ \ + void *__mptr = (void *)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#if defined(_WIN32) /* CONFIG_IOVEC */ +# if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ +struct iovec { + void *iov_base; + size_t iov_len; +}; +# endif +#else +#include <sys/uio.h> +#endif + +#define SCALE_MS 1000000 + +#define ETH_ALEN 6 +#define ETH_HLEN 14 +#define ETH_P_IP (0x0800) /* Internet Protocol packet */ +#define ETH_P_ARP (0x0806) /* Address Resolution packet */ +#define ETH_P_IPV6 (0x86dd) +#define ETH_P_VLAN (0x8100) +#define ETH_P_DVLAN (0x88a8) +#define ETH_P_NCSI (0x88f8) +#define ETH_P_UNKNOWN (0xffff) + +/* FIXME: remove me when made standalone */ +#ifdef _WIN32 +#undef accept +#undef bind +#undef closesocket +#undef connect +#undef getpeername +#undef getsockname +#undef getsockopt +#undef ioctlsocket +#undef listen +#undef recv +#undef recvfrom +#undef send +#undef sendto +#undef setsockopt +#undef shutdown +#undef socket +#endif + +#ifdef _WIN32 +#define connect slirp_connect_wrap +int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); +#define listen slirp_listen_wrap +int slirp_listen_wrap(int fd, int backlog); +#define bind slirp_bind_wrap +int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); +#define socket slirp_socket_wrap +int slirp_socket_wrap(int domain, int type, int protocol); +#define accept slirp_accept_wrap +int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); +#define shutdown slirp_shutdown_wrap +int slirp_shutdown_wrap(int fd, int how); +#define getpeername slirp_getpeername_wrap +int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); +#define getsockname slirp_getsockname_wrap +int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); +#define send slirp_send_wrap +ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); +#define sendto slirp_sendto_wrap +ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, + const struct sockaddr *dest_addr, int addrlen); +#define recv slirp_recv_wrap +ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); +#define recvfrom slirp_recvfrom_wrap +ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, + struct sockaddr *src_addr, int *addrlen); +#define closesocket slirp_closesocket_wrap +int slirp_closesocket_wrap(int fd); +#define ioctlsocket slirp_ioctlsocket_wrap +int slirp_ioctlsocket_wrap(int fd, int req, void *val); +#define getsockopt slirp_getsockopt_wrap +int slirp_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, int *optlen); +#define setsockopt slirp_setsockopt_wrap +int slirp_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, int optlen); +#define inet_aton slirp_inet_aton +int slirp_inet_aton(const char *cp, struct in_addr *ia); +#else +#define closesocket(s) close(s) +#define ioctlsocket(s, r, v) ioctl(s, r, v) +#endif + +int slirp_socket(int domain, int type, int protocol); +void slirp_set_nonblock(int fd); + +static inline int slirp_socket_set_nodelay(int fd) +{ + int v = 1; + return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); +} + +static inline int slirp_socket_set_fast_reuse(int fd) +{ +#ifndef _WIN32 + int v = 1; + return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); +#else + /* Enabling the reuse of an endpoint that was used by a socket still in + * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows + * fast reuse is the default and SO_REUSEADDR does strange things. So we + * don't have to do anything here. More info can be found at: + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ + return 0; +#endif +} + +void slirp_pstrcpy(char *buf, int buf_size, const char *str); + +#endif diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c new file mode 100644 index 0000000000..4d08b47c61 --- /dev/null +++ b/slirp/src/vmstate.c @@ -0,0 +1,413 @@ +/* + * VMState interpreter + * + * Copyright (c) 2009-2018 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <glib.h> + +#include "stream.h" +#include "vmstate.h" + +static int get_nullptr(SlirpIStream *f, void *pv, size_t size, + const VMStateField *field) +{ + if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { + return 0; + } + g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); + return -EINVAL; +} + +static int put_nullptr(SlirpOStream *f, void *pv, size_t size, + const VMStateField *field) + +{ + if (pv == NULL) { + slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); + return 0; + } + g_warning("vmstate: put_nullptr must be called with pv == NULL"); + return -EINVAL; +} + +const VMStateInfo slirp_vmstate_info_nullptr = { + .name = "uint64", + .get = get_nullptr, + .put = put_nullptr, +}; + +/* 8 bit unsigned int */ + +static int get_uint8(SlirpIStream *f, void *pv, size_t size, const VMStateField *field) +{ + uint8_t *v = pv; + *v = slirp_istream_read_u8(f); + return 0; +} + +static int put_uint8(SlirpOStream *f, void *pv, size_t size, const VMStateField *field) +{ + uint8_t *v = pv; + slirp_ostream_write_u8(f, *v); + return 0; +} + +const VMStateInfo slirp_vmstate_info_uint8 = { + .name = "uint8", + .get = get_uint8, + .put = put_uint8, +}; + +/* 16 bit unsigned int */ + +static int get_uint16(SlirpIStream *f, void *pv, size_t size, + const VMStateField *field) +{ + uint16_t *v = pv; + *v = slirp_istream_read_u16(f); + return 0; +} + +static int put_uint16(SlirpOStream *f, void *pv, size_t size, + const VMStateField *field) +{ + uint16_t *v = pv; + slirp_ostream_write_u16(f, *v); + return 0; +} + +const VMStateInfo slirp_vmstate_info_uint16 = { + .name = "uint16", + .get = get_uint16, + .put = put_uint16, +}; + +/* 32 bit unsigned int */ + +static int get_uint32(SlirpIStream *f, void *pv, size_t size, + const VMStateField *field) +{ + uint32_t *v = pv; + *v = slirp_istream_read_u32(f); + return 0; +} + +static int put_uint32(SlirpOStream *f, void *pv, size_t size, + const VMStateField *field) +{ + uint32_t *v = pv; + slirp_ostream_write_u32(f, *v); + return 0; +} + +const VMStateInfo slirp_vmstate_info_uint32 = { + .name = "uint32", + .get = get_uint32, + .put = put_uint32, +}; + +/* 16 bit int */ + +static int get_int16(SlirpIStream *f, void *pv, size_t size, const VMStateField *field) +{ + int16_t *v = pv; + *v = slirp_istream_read_i16(f); + return 0; +} + +static int put_int16(SlirpOStream *f, void *pv, size_t size, const VMStateField *field) +{ + int16_t *v = pv; + slirp_ostream_write_i16(f, *v); + return 0; +} + +const VMStateInfo slirp_vmstate_info_int16 = { + .name = "int16", + .get = get_int16, + .put = put_int16, +}; + +/* 32 bit int */ + +static int get_int32(SlirpIStream *f, void *pv, size_t size, const VMStateField *field) +{ + int32_t *v = pv; + *v = slirp_istream_read_i32(f); + return 0; +} + +static int put_int32(SlirpOStream *f, void *pv, size_t size, const VMStateField *field) +{ + int32_t *v = pv; + slirp_ostream_write_i32(f, *v); + return 0; +} + +const VMStateInfo slirp_vmstate_info_int32 = { + .name = "int32", + .get = get_int32, + .put = put_int32, +}; + +/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate + * a temporary buffer and the pre_load/pre_save methods in the child vmsd + * copy stuff from the parent into the child and do calculations to fill + * in fields that don't really exist in the parent but need to be in the + * stream. + */ +static int get_tmp(SlirpIStream *f, void *pv, size_t size, const VMStateField *field) +{ + int ret; + const VMStateDescription *vmsd = field->vmsd; + int version_id = field->version_id; + void *tmp = g_malloc(size); + + /* Writes the parent field which is at the start of the tmp */ + *(void **)tmp = pv; + ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); + g_free(tmp); + return ret; +} + +static int put_tmp(SlirpOStream *f, void *pv, size_t size, const VMStateField *field) +{ + const VMStateDescription *vmsd = field->vmsd; + void *tmp = g_malloc(size); + int ret; + + /* Writes the parent field which is at the start of the tmp */ + *(void **)tmp = pv; + ret = slirp_vmstate_save_state(f, vmsd, tmp); + g_free(tmp); + + return ret; +} + +const VMStateInfo slirp_vmstate_info_tmp = { + .name = "tmp", + .get = get_tmp, + .put = put_tmp, +}; + +/* uint8_t buffers */ + +static int get_buffer(SlirpIStream *f, void *pv, size_t size, + const VMStateField *field) +{ + slirp_istream_read(f, pv, size); + return 0; +} + +static int put_buffer(SlirpOStream *f, void *pv, size_t size, + const VMStateField *field) +{ + slirp_ostream_write(f, pv, size); + return 0; +} + +const VMStateInfo slirp_vmstate_info_buffer = { + .name = "buffer", + .get = get_buffer, + .put = put_buffer, +}; + +static int vmstate_n_elems(void *opaque, const VMStateField *field) +{ + int n_elems = 1; + + if (field->flags & VMS_ARRAY) { + n_elems = field->num; + } else if (field->flags & VMS_VARRAY_INT32) { + n_elems = *(int32_t *)(opaque + field->num_offset); + } else if (field->flags & VMS_VARRAY_UINT32) { + n_elems = *(uint32_t *)(opaque + field->num_offset); + } else if (field->flags & VMS_VARRAY_UINT16) { + n_elems = *(uint16_t *)(opaque + field->num_offset); + } else if (field->flags & VMS_VARRAY_UINT8) { + n_elems = *(uint8_t *)(opaque + field->num_offset); + } + + if (field->flags & VMS_MULTIPLY_ELEMENTS) { + n_elems *= field->num; + } + + return n_elems; +} + +static int vmstate_size(void *opaque, const VMStateField *field) +{ + int size = field->size; + + if (field->flags & VMS_VBUFFER) { + size = *(int32_t *)(opaque + field->size_offset); + if (field->flags & VMS_MULTIPLY) { + size *= field->size; + } + } + + return size; +} + +static int +vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, + void *opaque, int version_id) +{ + int ret = 0; + const VMStateField *field = vmsd->fields; + + if (vmsd->pre_save) { + ret = vmsd->pre_save(opaque); + if (ret) { + g_warning("pre-save failed: %s", vmsd->name); + return ret; + } + } + + while (field->name) { + if ((field->field_exists && + field->field_exists(opaque, version_id)) || + (!field->field_exists && + field->version_id <= version_id)) { + void *first_elem = opaque + field->offset; + int i, n_elems = vmstate_n_elems(opaque, field); + int size = vmstate_size(opaque, field); + + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems || !size); + } + for (i = 0; i < n_elems; i++) { + void *curr_elem = first_elem + size * i; + ret = 0; + + if (field->flags & VMS_ARRAY_OF_POINTER) { + assert(curr_elem); + curr_elem = *(void **)curr_elem; + } + if (!curr_elem && size) { + /* if null pointer write placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, NULL); + } else if (field->flags & VMS_STRUCT) { + ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); + } else if (field->flags & VMS_VSTRUCT) { + ret = vmstate_save_state_v(f, field->vmsd, curr_elem, + field->struct_version_id); + } else { + ret = field->info->put(f, curr_elem, size, field); + } + if (ret) { + g_warning("Save of field %s/%s failed", + vmsd->name, field->name); + return ret; + } + } + } else { + if (field->flags & VMS_MUST_EXIST) { + g_warning("Output state validation failed: %s/%s", + vmsd->name, field->name); + assert(!(field->flags & VMS_MUST_EXIST)); + } + } + field++; + } + + return 0; +} + +int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, + void *opaque) +{ + return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); +} + +static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) +{ + if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { + size_t size = vmstate_size(opaque, field); + size *= vmstate_n_elems(opaque, field); + if (size) { + *(void **)ptr = g_malloc(size); + } + } +} + +int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, + void *opaque, int version_id) +{ + VMStateField *field = vmsd->fields; + int ret = 0; + + if (version_id > vmsd->version_id) { + g_warning("%s: incoming version_id %d is too new " + "for local version_id %d", + vmsd->name, version_id, vmsd->version_id); + return -EINVAL; + } + if (vmsd->pre_load) { + int ret = vmsd->pre_load(opaque); + if (ret) { + return ret; + } + } + while (field->name) { + if ((field->field_exists && + field->field_exists(opaque, version_id)) || + (!field->field_exists && + field->version_id <= version_id)) { + void *first_elem = opaque + field->offset; + int i, n_elems = vmstate_n_elems(opaque, field); + int size = vmstate_size(opaque, field); + + vmstate_handle_alloc(first_elem, field, opaque); + if (field->flags & VMS_POINTER) { + first_elem = *(void **)first_elem; + assert(first_elem || !n_elems || !size); + } + for (i = 0; i < n_elems; i++) { + void *curr_elem = first_elem + size * i; + + if (field->flags & VMS_ARRAY_OF_POINTER) { + curr_elem = *(void **)curr_elem; + } + if (!curr_elem && size) { + /* if null pointer check placeholder and do not follow */ + assert(field->flags & VMS_ARRAY_OF_POINTER); + ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, NULL); + } else if (field->flags & VMS_STRUCT) { + ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, + field->vmsd->version_id); + } else if (field->flags & VMS_VSTRUCT) { + ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, + field->struct_version_id); + } else { + ret = field->info->get(f, curr_elem, size, field); + } + if (ret < 0) { + g_warning("Failed to load %s:%s", vmsd->name, + field->name); + return ret; + } + } + } else if (field->flags & VMS_MUST_EXIST) { + g_warning("Input validation failed: %s/%s", + vmsd->name, field->name); + return -1; + } + field++; + } + if (vmsd->post_load) { + ret = vmsd->post_load(opaque, version_id); + } + return ret; +} diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h new file mode 100644 index 0000000000..cfa7b8c825 --- /dev/null +++ b/slirp/src/vmstate.h @@ -0,0 +1,396 @@ +/* + * QEMU migration/snapshot declarations + * + * Copyright (c) 2009-2011 Red Hat, Inc. + * + * Original author: Juan Quintela <quintela@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef VMSTATE_H_ +#define VMSTATE_H_ + +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include "slirp.h" +#include "stream.h" + +#define stringify(s) tostring(s) +#define tostring(s) #s + +typedef struct VMStateInfo VMStateInfo; +typedef struct VMStateDescription VMStateDescription; +typedef struct VMStateField VMStateField; + +int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, + void *opaque); +int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, + void *opaque, int version_id); + +/* VMStateInfo allows customized migration of objects that don't fit in + * any category in VMStateFlags. Additional information is always passed + * into get and put in terms of field and vmdesc parameters. However + * these two parameters should only be used in cases when customized + * handling is needed, such as QTAILQ. For primitive data types such as + * integer, field and vmdesc parameters should be ignored inside get/put. + */ +struct VMStateInfo { + const char *name; + int (*get)(SlirpIStream *f, void *pv, size_t size, const VMStateField *field); + int (*put)(SlirpOStream *f, void *pv, size_t size, const VMStateField *field); +}; + +enum VMStateFlags { + /* Ignored */ + VMS_SINGLE = 0x001, + + /* The struct member at opaque + VMStateField.offset is a pointer + * to the actual field (e.g. struct a { uint8_t *b; + * }). Dereference the pointer before using it as basis for + * further pointer arithmetic (see e.g. VMS_ARRAY). Does not + * affect the meaning of VMStateField.num_offset or + * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for + * those. */ + VMS_POINTER = 0x002, + + /* The field is an array of fixed size. VMStateField.num contains + * the number of entries in the array. The size of each entry is + * given by VMStateField.size and / or opaque + + * VMStateField.size_offset; see VMS_VBUFFER and + * VMS_MULTIPLY. Each array entry will be processed individually + * (VMStateField.info.get()/put() if VMS_STRUCT is not set, + * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not + * be combined with VMS_VARRAY*. */ + VMS_ARRAY = 0x004, + + /* The field is itself a struct, containing one or more + * fields. Recurse into VMStateField.vmsd. Most useful in + * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each + * array entry. */ + VMS_STRUCT = 0x008, + + /* The field is an array of variable size. The int32_t at opaque + + * VMStateField.num_offset contains the number of entries in the + * array. See the VMS_ARRAY description regarding array handling + * in general. May not be combined with VMS_ARRAY or any other + * VMS_VARRAY*. */ + VMS_VARRAY_INT32 = 0x010, + + /* Ignored */ + VMS_BUFFER = 0x020, + + /* The field is a (fixed-size or variable-size) array of pointers + * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry + * before using it. Note: Does not imply any one of VMS_ARRAY / + * VMS_VARRAY*; these need to be set explicitly. */ + VMS_ARRAY_OF_POINTER = 0x040, + + /* The field is an array of variable size. The uint16_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT16 = 0x080, + + /* The size of the individual entries (a single array entry if + * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if + * neither is set) is variable (i.e. not known at compile-time), + * but the same for all entries. Use the int32_t at opaque + + * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine + * the size of each (and every) entry. */ + VMS_VBUFFER = 0x100, + + /* Multiply the entry size given by the int32_t at opaque + + * VMStateField.size_offset (see VMS_VBUFFER description) with + * VMStateField.size to determine the number of bytes to be + * allocated. Only valid in combination with VMS_VBUFFER. */ + VMS_MULTIPLY = 0x200, + + /* The field is an array of variable size. The uint8_t at opaque + + * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT8 = 0x400, + + /* The field is an array of variable size. The uint32_t at opaque + * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) + * contains the number of entries in the array. See the VMS_ARRAY + * description regarding array handling in general. May not be + * combined with VMS_ARRAY or any other VMS_VARRAY*. */ + VMS_VARRAY_UINT32 = 0x800, + + /* Fail loading the serialised VM state if this field is missing + * from the input. */ + VMS_MUST_EXIST = 0x1000, + + /* When loading serialised VM state, allocate memory for the + * (entire) field. Only valid in combination with + * VMS_POINTER. Note: Not all combinations with other flags are + * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't + * cause the individual entries to be allocated. */ + VMS_ALLOC = 0x2000, + + /* Multiply the number of entries given by the integer at opaque + + * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num + * to determine the number of entries in the array. Only valid in + * combination with one of VMS_VARRAY*. */ + VMS_MULTIPLY_ELEMENTS = 0x4000, + + /* A structure field that is like VMS_STRUCT, but uses + * VMStateField.struct_version_id to tell which version of the + * structure we are referencing to use. */ + VMS_VSTRUCT = 0x8000, +}; + +struct VMStateField { + const char *name; + size_t offset; + size_t size; + size_t start; + int num; + size_t num_offset; + size_t size_offset; + const VMStateInfo *info; + enum VMStateFlags flags; + const VMStateDescription *vmsd; + int version_id; + int struct_version_id; + bool (*field_exists)(void *opaque, int version_id); +}; + +struct VMStateDescription { + const char *name; + int version_id; + int (*pre_load)(void *opaque); + int (*post_load)(void *opaque, int version_id); + int (*pre_save)(void *opaque); + VMStateField *fields; +}; + + +extern const VMStateInfo slirp_vmstate_info_int16; +extern const VMStateInfo slirp_vmstate_info_int32; +extern const VMStateInfo slirp_vmstate_info_uint8; +extern const VMStateInfo slirp_vmstate_info_uint16; +extern const VMStateInfo slirp_vmstate_info_uint32; + +/** Put this in the stream when migrating a null pointer.*/ +#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ +extern const VMStateInfo slirp_vmstate_info_nullptr; + +extern const VMStateInfo slirp_vmstate_info_buffer; +extern const VMStateInfo slirp_vmstate_info_tmp; + +#define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) +#define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0) +#define typeof_field(type, field) typeof(((type *)0)->field) +#define type_check(t1,t2) ((t1*)0 - (t2*)0) + +#define vmstate_offset_value(_state, _field, _type) \ + (offsetof(_state, _field) + \ + type_check(_type, typeof_field(_state, _field))) + +#define vmstate_offset_pointer(_state, _field, _type) \ + (offsetof(_state, _field) + \ + type_check_pointer(_type, typeof_field(_state, _field))) + +#define vmstate_offset_array(_state, _field, _type, _num) \ + (offsetof(_state, _field) + \ + type_check_array(_type, typeof_field(_state, _field), _num)) + +#define vmstate_offset_buffer(_state, _field) \ + vmstate_offset_array(_state, _field, uint8_t, \ + sizeof(typeof_field(_state, _field))) + +/* In the macros below, if there is a _version, that means the macro's + * field will be processed only if the version being received is >= + * the _version specified. In general, if you add a new field, you + * would increment the structure's version and put that version + * number into the new field so it would only be processed with the + * new version. + * + * In particular, for VMSTATE_STRUCT() and friends the _version does + * *NOT* pick the version of the sub-structure. It works just as + * specified above. The version of the top-level structure received + * is passed down to all sub-structures. This means that the + * sub-structures must have version that are compatible with all the + * structures that use them. + * + * If you want to specify the version of the sub-structure, use + * VMSTATE_VSTRUCT(), which allows the specific sub-structure version + * to be directly specified. + */ + +#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = sizeof(_type), \ + .info = &(_info), \ + .flags = VMS_SINGLE, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num = (_num), \ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_ARRAY, \ + .offset = vmstate_offset_array(_state, _field, _type, _num), \ +} + +#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type *), \ + .flags = VMS_STRUCT|VMS_POINTER, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + +#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \ + .name = (stringify(_field)), \ + .num = (_num), \ + .field_exists = (_test), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .flags = VMS_STRUCT|VMS_ARRAY, \ + .offset = vmstate_offset_array(_state, _field, _type, _num),\ +} + +#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size = (_size - _start), \ + .info = &slirp_vmstate_info_buffer, \ + .flags = VMS_BUFFER, \ + .offset = vmstate_offset_buffer(_state, _field) + _start, \ +} + +#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .field_exists = (_test), \ + .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\ + .info = &slirp_vmstate_info_buffer, \ + .flags = VMS_VBUFFER|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + +#define QEMU_BUILD_BUG_ON_STRUCT(x) \ + struct { \ + int:(x) ? -1 : 1; \ + } + +#define QEMU_BUILD_BUG_ON_ZERO(x) (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - \ + sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) + +/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state + * and execute the vmsd on the temporary. Note that we're working with + * the whole of _state here, not a field within it. + * We compile time check that: + * That _tmp_type contains a 'parent' member that's a pointer to the + * '_state' type + * That the pointer is right at the start of _tmp_type. + */ +#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) { \ + .name = "tmp", \ + .size = sizeof(_tmp_type) + \ + QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ + type_check_pointer(_state, \ + typeof_field(_tmp_type, parent)), \ + .vmsd = &(_vmsd), \ + .info = &slirp_vmstate_info_tmp, \ +} + +#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ + VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) + +#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ + VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) + +#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ + VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) + +#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ + VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, \ + _vmsd, _type) + +#define VMSTATE_INT16_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) +#define VMSTATE_INT32_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) + +#define VMSTATE_UINT8_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) +#define VMSTATE_UINT16_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) +#define VMSTATE_UINT32_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) + +#define VMSTATE_INT16(_f, _s) \ + VMSTATE_INT16_V(_f, _s, 0) +#define VMSTATE_INT32(_f, _s) \ + VMSTATE_INT32_V(_f, _s, 0) + +#define VMSTATE_UINT8(_f, _s) \ + VMSTATE_UINT8_V(_f, _s, 0) +#define VMSTATE_UINT16(_f, _s) \ + VMSTATE_UINT16_V(_f, _s, 0) +#define VMSTATE_UINT32(_f, _s) \ + VMSTATE_UINT32_V(_f, _s, 0) + +#define VMSTATE_UINT16_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) + +#define VMSTATE_UINT32_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) + +#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ + VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) + +#define VMSTATE_INT16_ARRAY(_f, _s, _n) \ + VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) + +#define VMSTATE_BUFFER_V(_f, _s, _v) \ + VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) + +#define VMSTATE_BUFFER(_f, _s) \ + VMSTATE_BUFFER_V(_f, _s, 0) + +#define VMSTATE_END_OF_LIST() \ + {} + +#endif |