diff options
author | Dmitry Fleytman <dmitry.fleytman@ravellosystems.com> | 2016-06-01 11:23:41 +0300 |
---|---|---|
committer | Jason Wang <jasowang@redhat.com> | 2016-06-02 10:42:28 +0800 |
commit | eb700029c7836798046191d62d595363d92c84d4 (patch) | |
tree | 7c6b3b8c3de169297291adba1ad959bc87d75a5b /hw | |
parent | 66409b7c8bd0ebb075a6af8cbc7846fc0a95107d (diff) |
net_pkt: Extend packet abstraction as required by e1000e functionality
This patch extends the TX/RX packet abstractions with features that will
be used by the e1000e device implementation.
Changes are:
1. Support iovec lists for RX buffers
2. Deeper RX packets parsing
3. Loopback option for TX packets
4. Extended VLAN headers handling
5. RSS processing for RX packets
Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/net/net_rx_pkt.c | 473 | ||||
-rw-r--r-- | hw/net/net_rx_pkt.h | 193 | ||||
-rw-r--r-- | hw/net/net_tx_pkt.c | 204 | ||||
-rw-r--r-- | hw/net/net_tx_pkt.h | 60 |
4 files changed, 813 insertions, 117 deletions
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c index 8a4f29fe04..1019b50c18 100644 --- a/hw/net/net_rx_pkt.c +++ b/hw/net/net_rx_pkt.c @@ -16,24 +16,16 @@ */ #include "qemu/osdep.h" +#include "trace.h" #include "net_rx_pkt.h" -#include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" -/* - * RX packet may contain up to 2 fragments - rebuilt eth header - * in case of VLAN tag stripping - * and payload received from QEMU - in any case - */ -#define NET_MAX_RX_PACKET_FRAGMENTS (2) - struct NetRxPkt { struct virtio_net_hdr virt_hdr; - uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; - struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS]; + uint8_t ehdr_buf[sizeof(struct eth_header)]; + struct iovec *vec; + uint16_t vec_len_total; uint16_t vec_len; uint32_t tot_len; uint16_t tci; @@ -46,17 +38,31 @@ struct NetRxPkt { bool isip6; bool isudp; bool istcp; + + size_t l3hdr_off; + size_t l4hdr_off; + size_t l5hdr_off; + + eth_ip6_hdr_info ip6hdr_info; + eth_ip4_hdr_info ip4hdr_info; + eth_l4_hdr_info l4hdr_info; }; void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) { struct NetRxPkt *p = g_malloc0(sizeof *p); p->has_virt_hdr = has_virt_hdr; + p->vec = NULL; + p->vec_len_total = 0; *pkt = p; } void net_rx_pkt_uninit(struct NetRxPkt *pkt) { + if (pkt->vec_len_total != 0) { + g_free(pkt->vec); + } + g_free(pkt); } @@ -66,33 +72,88 @@ struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) return &pkt->virt_hdr; } -void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan) +static inline void +net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, + int new_iov_len) +{ + if (pkt->vec_len_total < new_iov_len) { + g_free(pkt->vec); + pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); + pkt->vec_len_total = new_iov_len; + } +} + +static void +net_rx_pkt_pull_data(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t ploff) +{ + if (pkt->vlan_stripped) { + net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); + + pkt->vec[0].iov_base = pkt->ehdr_buf; + pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf); + + pkt->tot_len = + iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header); + + pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, + iov, iovcnt, ploff, pkt->tot_len); + } else { + net_rx_pkt_iovec_realloc(pkt, iovcnt); + + pkt->tot_len = iov_size(iov, iovcnt) - ploff; + pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, + iov, iovcnt, ploff, pkt->tot_len); + } + + eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); + + trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, + pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); +} + +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan) { uint16_t tci = 0; - uint16_t ploff; + uint16_t ploff = iovoff; assert(pkt); pkt->vlan_stripped = false; if (strip_vlan) { - pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); + pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, + &ploff, &tci); } - if (pkt->vlan_stripped) { - pkt->vec[0].iov_base = pkt->ehdr_buf; - pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); - pkt->vec[1].iov_base = (uint8_t *) data + ploff; - pkt->vec[1].iov_len = len - ploff; - pkt->vec_len = 2; - pkt->tot_len = len - ploff + sizeof(struct eth_header); - } else { - pkt->vec[0].iov_base = (void *)data; - pkt->vec[0].iov_len = len; - pkt->vec_len = 1; - pkt->tot_len = len; + pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); +} + +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet) +{ + uint16_t tci = 0; + uint16_t ploff = iovoff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, + pkt->ehdr_buf, + &ploff, &tci); } pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); } void net_rx_pkt_dump(struct NetRxPkt *pkt) @@ -132,10 +193,17 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, size_t len) { + const struct iovec iov = { + .iov_base = (void *)data, + .iov_len = len + }; + assert(pkt); - eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, - &pkt->isudp, &pkt->istcp); + eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); } void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, @@ -150,6 +218,180 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, *istcp = pkt->istcp; } +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l3hdr_off; +} + +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l4hdr_off; +} + +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l5hdr_off; +} + +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) +{ + return &pkt->ip6hdr_info; +} + +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) +{ + return &pkt->ip4hdr_info; +} + +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) +{ + return &pkt->l4hdr_info; +} + +static inline void +_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, + void *ptr, size_t size) +{ + memcpy(&rss_input[*bytes_written], ptr, size); + trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); + *bytes_written += size; +} + +static inline void +_net_rx_rss_prepare_ip4(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_src, sizeof(uint32_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_dst, sizeof(uint32_t)); +} + +static inline void +_net_rx_rss_prepare_ip6(uint8_t *rss_input, + struct NetRxPkt *pkt, + bool ipv6ex, size_t *bytes_written) +{ + eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src + : &ip6info->ip6_hdr.ip6_src, + sizeof(struct in6_address)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst + : &ip6info->ip6_hdr.ip6_dst, + sizeof(struct in6_address)); +} + +static inline void +_net_rx_rss_prepare_tcp(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_sport, sizeof(uint16_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_dport, sizeof(uint16_t)); +} + +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key) +{ + uint8_t rss_input[36]; + size_t rss_length = 0; + uint32_t rss_hash = 0; + net_toeplitz_key key_data; + + switch (type) { + case NetPktRssIpV4: + assert(pkt->isip4); + trace_net_rx_pkt_rss_ip4(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV4Tcp: + assert(pkt->isip4); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip4_tcp(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6Tcp: + assert(pkt->isip6); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip6_tcp(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); + break; + case NetPktRssIpV6Ex: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6_ex(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + break; + default: + assert(false); + break; + } + + net_toeplitz_key_init(&key_data, key); + net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); + + trace_net_rx_pkt_rss_hash(rss_length, rss_hash); + + return rss_hash; +} + +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->isip4) { + return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); + } + + return 0; +} + +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; + } + + return false; +} + +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return pkt->l4hdr_info.has_tcp_data; + } + + return false; +} + struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) { assert(pkt); @@ -157,6 +399,13 @@ struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) return pkt->vec; } +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec_len; +} + void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, struct virtio_net_hdr *vhdr) { @@ -165,6 +414,14 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); } +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt) +{ + assert(pkt); + + iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); +} + bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) { assert(pkt); @@ -185,3 +442,159 @@ uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) return pkt->tci; } + +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint32_t cntr; + uint16_t csum; + uint32_t csl; + + trace_net_rx_pkt_l3_csum_validate_entry(); + + if (!pkt->isip4) { + trace_net_rx_pkt_l3_csum_validate_not_ip4(); + return false; + } + + csl = pkt->l4hdr_off - pkt->l3hdr_off; + + cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l3hdr_off, + csl, 0); + + csum = net_checksum_finish(cntr); + + *csum_valid = (csum == 0); + + trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, + cntr, csum, *csum_valid); + + return true; +} + +static uint16_t +_net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) +{ + uint32_t cntr; + uint16_t csum; + uint16_t csl; + uint32_t cso; + + trace_net_rx_pkt_l4_csum_calc_entry(); + + if (pkt->isip4) { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip4_udp(); + } else { + csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - + IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); + trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); + } + + cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, + csl, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } else { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip6_udp(); + } else { + struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; + size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; + size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + + csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - + ip6opts_len; + trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); + } + + cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, + pkt->ip6hdr_info.l4proto, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } + + cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l4hdr_off, csl, cso); + + csum = net_checksum_finish(cntr); + + trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); + + return csum; +} + +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint16_t csum; + + trace_net_rx_pkt_l4_csum_validate_entry(); + + if (!pkt->istcp && !pkt->isudp) { + trace_net_rx_pkt_l4_csum_validate_not_xxp(); + return false; + } + + if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { + trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); + return false; + } + + csum = _net_rx_pkt_calc_l4_csum(pkt); + + *csum_valid = ((csum == 0) || (csum == 0xFFFF)); + + trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); + + return true; +} + +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) +{ + uint16_t csum = 0; + uint32_t l4_cso; + + trace_net_rx_pkt_l4_csum_fix_entry(); + + if (pkt->istcp) { + l4_cso = offsetof(struct tcp_header, th_sum); + trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); + } else if (pkt->isudp) { + if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { + trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); + return false; + } + l4_cso = offsetof(struct udp_header, uh_sum); + trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); + } else { + trace_net_rx_pkt_l4_csum_fix_not_xxp(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); + return false; + } + + /* Set zero to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + /* Calculate L4 checksum */ + csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); + + /* Set calculated checksum to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); + + return true; +} diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h index 897330a157..7adf0fad51 100644 --- a/hw/net/net_rx_pkt.h +++ b/hw/net/net_rx_pkt.h @@ -78,6 +78,103 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, bool *isudp, bool *istcp); /** +* fetches L3 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L4 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L5 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt); + +/** + * fetches IP6 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt); + +/** + * fetches IP4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt); + +/** + * fetches L4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt); + +typedef enum { + NetPktRssIpV4, + NetPktRssIpV4Tcp, + NetPktRssIpV6Tcp, + NetPktRssIpV6, + NetPktRssIpV6Ex +} NetRxPktRssType; + +/** +* calculates RSS hash for packet +* +* @pkt: packet +* @type: RSS hash type +* +* Return: Toeplitz RSS hash. +* +*/ +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key); + +/** +* fetches IP identification for the packet +* +* @pkt: packet +* +*/ +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt); + +/** +* check if given packet is a TCP ACK packet +* +* @pkt: packet +* +*/ +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt); + +/** +* check if given packet contains TCP data +* +* @pkt: packet +* +*/ +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt); + +/** * returns virtio header stored in rx context * * @pkt: packet @@ -123,6 +220,37 @@ bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt); bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); /** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* +*/ +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, + int iovcnt, size_t iovoff, + bool strip_vlan); + +/** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* @vet: VLAN tag Ethernet type +* +*/ +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet); + +/** * attach data to rx packet * * @pkt: packet @@ -131,8 +259,17 @@ bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); * @strip_vlan: should the module strip vlan from data * */ -void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan); +static inline void +net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan) +{ + const struct iovec iov = { + .iov_base = (void *) data, + .iov_len = len + }; + + net_rx_pkt_attach_iovec(pkt, &iov, 1, 0, strip_vlan); +} /** * returns io vector that holds the attached data @@ -144,6 +281,15 @@ void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt); /** +* returns io vector length that holds the attached data +* +* @pkt: packet +* @ret: IOVec length +* +*/ +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt); + +/** * prints rx packet data if debug is enabled * * @pkt: packet @@ -162,6 +308,17 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, struct virtio_net_hdr *vhdr); /** +* copy passed vhdr data to packet context +* +* @pkt: packet +* @iov: VHDR iov +* @iovcnt: VHDR iov array size +* +*/ +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt); + +/** * save packet type in packet context * * @pkt: packet @@ -171,4 +328,36 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, eth_pkt_types_e packet_type); +/** +* validate TCP/UDP checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* validate IPv4 checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* fix IPv4 checksum of the packet +* +* @pkt: packet +* @ret: true if checksum was fixed, false in case packet is +* not TCP/UDP or checksum correction is not possible +* +*/ +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt); + #endif diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index 94c7e3d3f6..a64f51cbef 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -15,12 +15,8 @@ * */ -#include "qemu/osdep.h" -#include "hw/hw.h" #include "net_tx_pkt.h" #include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" #include "net/net.h" @@ -44,6 +40,7 @@ struct NetTxPkt { struct iovec *vec; uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN]; uint32_t payload_len; @@ -53,6 +50,8 @@ struct NetTxPkt { uint16_t hdr_len; eth_pkt_types_e packet_type; uint8_t l4proto; + + bool is_loopback; }; void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, @@ -72,8 +71,7 @@ void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = p->has_virt_hdr ? sizeof p->virt_hdr : 0; p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; - p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - p->vec[NET_TX_PKT_L3HDR_FRAG].iov_len = 0; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; *pkt = p; } @@ -87,38 +85,52 @@ void net_tx_pkt_uninit(struct NetTxPkt *pkt) } } -void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) { uint16_t csum; - uint32_t ph_raw_csum; assert(pkt); - uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; struct ip_header *ip_hdr; - - if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && - VIRTIO_NET_HDR_GSO_UDP != gso_type) { - return; - } - ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > - ETH_MAX_IP_DGRAM_LEN) { - return; - } - ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); - /* Calculate IP header checksum */ ip_hdr->ip_sum = 0; csum = net_raw_checksum((uint8_t *)ip_hdr, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); ip_hdr->ip_sum = cpu_to_be16(csum); +} + +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +{ + uint16_t csum; + uint32_t cntr, cso; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + + if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > + ETH_MAX_IP_DGRAM_LEN) { + return; + } + + if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || + gso_type == VIRTIO_NET_HDR_GSO_UDP) { + /* Calculate IP header checksum */ + net_tx_pkt_update_ip_hdr_checksum(pkt); + + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, + IP_PROTO_TCP, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else { + return; + } - /* Calculate IP pseudo header checksum */ - ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); - csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); } @@ -160,15 +172,19 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) if (bytes_read < l2_hdr->iov_len) { l2_hdr->iov_len = 0; + l3_hdr->iov_len = 0; + pkt->packet_type = ETH_PKT_UCAST; return false; + } else { + l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; + l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); } - l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); switch (l3_proto) { case ETH_P_IP: - l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, sizeof(struct ip_header)); @@ -178,27 +194,45 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) } l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); - pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; - /* copy optional IPv4 header data */ - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, - l2_hdr->iov_len + sizeof(struct ip_header), - l3_hdr->iov_base + sizeof(struct ip_header), - l3_hdr->iov_len - sizeof(struct ip_header)); - if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + if (l3_hdr->iov_len < sizeof(struct ip_header)) { l3_hdr->iov_len = 0; return false; } + + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { + /* copy optional IPv4 header data if any*/ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + } + break; case ETH_P_IPV6: + { + eth_ip6_hdr_info hdrinfo; + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - &pkt->l4proto, &full_ip6hdr_len)) { + &hdrinfo)) { l3_hdr->iov_len = 0; return false; } - l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + pkt->l4proto = hdrinfo.l4proto; + full_ip6hdr_len = hdrinfo.full_hdr_len; + + if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { + l3_hdr->iov_len = 0; + return false; + } bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, full_ip6hdr_len); @@ -210,40 +244,35 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) l3_hdr->iov_len = full_ip6hdr_len; } break; - + } default: l3_hdr->iov_len = 0; break; } net_tx_pkt_calculate_hdr_len(pkt); - pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); return true; } -static bool net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) +static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) { - size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; - + pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->max_payload_frags, pkt->raw, pkt->raw_frags, - pkt->hdr_len, payload_len); + pkt->hdr_len, pkt->payload_len); +} - if (pkt->payload_frags != (uint32_t) -1) { - pkt->payload_len = payload_len; +bool net_tx_pkt_parse(struct NetTxPkt *pkt) +{ + if (net_tx_pkt_parse_headers(pkt)) { + net_tx_pkt_rebuild_payload(pkt); return true; } else { return false; } } -bool net_tx_pkt_parse(struct NetTxPkt *pkt) -{ - return net_tx_pkt_parse_headers(pkt) && - net_tx_pkt_rebuild_payload(pkt); -} - struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) { assert(pkt); @@ -256,7 +285,7 @@ static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; uint16_t l3_proto; - l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); if (!tso_enable) { @@ -288,7 +317,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, break; case VIRTIO_NET_HDR_GSO_UDP: - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); break; @@ -297,7 +326,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr)); pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; break; default: @@ -322,13 +351,14 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, } } -void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype) { bool is_new; assert(pkt); - eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, - vlan, &is_new); + eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, vlan_ethtype, &is_new); /* update l2hdrlen */ if (is_new) { @@ -354,14 +384,19 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, mapped_len = len; ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); - ventry->iov_len = mapped_len; - pkt->raw_frags += !!ventry->iov_base; - if ((ventry->iov_base == NULL) || (len != mapped_len)) { + if ((ventry->iov_base != NULL) && (len == mapped_len)) { + ventry->iov_len = mapped_len; + pkt->raw_frags++; + return true; + } else { return false; } +} - return true; +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) +{ + return pkt->raw_frags > 0; } eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) @@ -401,14 +436,8 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); - g_free(pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base); - pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - assert(pkt->vec); - for (i = NET_TX_PKT_L2HDR_FRAG; - i < pkt->payload_frags + NET_TX_PKT_PL_START_FRAG; i++) { - pkt->vec[i].iov_len = 0; - } + pkt->payload_len = 0; pkt->payload_frags = 0; @@ -417,12 +446,10 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) assert(pkt->raw[i].iov_base); cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, false, pkt->raw[i].iov_len); - pkt->raw[i].iov_len = 0; } pkt->raw_frags = 0; pkt->hdr_len = 0; - pkt->packet_type = 0; pkt->l4proto = 0; } @@ -431,6 +458,7 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; uint32_t csum_cntr; uint16_t csum = 0; + uint32_t cso; /* num of iovec without vhdr */ uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; uint16_t csl; @@ -443,12 +471,13 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) /* Calculate L4 TCP/UDP checksum */ csl = pkt->payload_len; - /* data checksum */ - csum_cntr = - net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); /* add pseudo header to csum */ iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso); + + /* data checksum */ + csum_cntr += + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); /* Put the checksum obtained into the packet */ csum = cpu_to_be16(net_checksum_finish(csum_cntr)); @@ -471,7 +500,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; - while (fetched < pkt->virt_hdr.gso_size) { + while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) { /* no more place in fragment iov */ if (*dst_idx == NET_MAX_FRAG_SG_LIST) { @@ -486,7 +515,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, - pkt->virt_hdr.gso_size - fetched); + IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size) - fetched); *src_offset += dst[*dst_idx].iov_len; fetched += dst[*dst_idx].iov_len; @@ -502,6 +531,16 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, return fetched; } +static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt, + NetClientState *nc, const struct iovec *iov, int iov_cnt) +{ + if (pkt->is_loopback) { + nc->info->receive_iov(nc, iov, iov_cnt); + } else { + qemu_sendv_packet(nc, iov, iov_cnt); + } +} + static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, NetClientState *nc) { @@ -540,7 +579,7 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); - qemu_sendv_packet(nc, fragment, dst_idx); + net_tx_pkt_sendv(pkt, nc, fragment, dst_idx); fragment_offset += fragment_len; @@ -572,10 +611,21 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) if (pkt->has_virt_hdr || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { - qemu_sendv_packet(nc, pkt->vec, + net_tx_pkt_sendv(pkt, nc, pkt->vec, pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); return true; } return net_tx_pkt_do_sw_fragmentation(pkt, nc); } + +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc) +{ + bool res; + + pkt->is_loopback = true; + res = net_tx_pkt_send(pkt, nc); + pkt->is_loopback = false; + + return res; +} diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index be2e117b7d..e49772d238 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -18,6 +18,7 @@ #ifndef NET_TX_PKT_H #define NET_TX_PKT_H +#include "qemu/osdep.h" #include "net/eth.h" #include "exec/hwaddr.h" @@ -64,13 +65,29 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, bool csum_enable, uint32_t gso_size); /** - * updates vlan tag, and adds vlan header in case it is missing - * - * @pkt: packet - * @vlan: VLAN tag - * - */ -void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan); +* updates vlan tag, and adds vlan header with custom ethernet type +* in case it is missing. +* +* @pkt: packet +* @vlan: VLAN tag +* @vlan_ethtype: VLAN header Ethernet type +* +*/ +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype); + +/** +* updates vlan tag, and adds vlan header in case it is missing +* +* @pkt: packet +* @vlan: VLAN tag +* +*/ +static inline void +net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +{ + net_tx_pkt_setup_vlan_header_ex(pkt, vlan, ETH_P_VLAN); +} /** * populate data fragment into pkt context. @@ -84,7 +101,7 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, size_t len); /** - * fix ip header fields and calculate checksums needed. + * Fix ip header fields and calculate IP header and pseudo header checksums. * * @pkt: packet * @@ -92,6 +109,14 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt); /** + * Calculate the IP header checksum. + * + * @pkt: packet + * + */ +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt); + +/** * get length of all populated data. * * @pkt: packet @@ -136,6 +161,17 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt); bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); /** +* Redirect packet directly to receive path (emulate loopback phy). +* Handles sw offloads if vhdr is not supported. +* +* @pkt: packet +* @nc: NetClientState +* @ret: operation result +* +*/ +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc); + +/** * parse raw packet data and analyze offload requirements. * * @pkt: packet @@ -143,4 +179,12 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); */ bool net_tx_pkt_parse(struct NetTxPkt *pkt); +/** +* indicates if there are data fragments held by this packet object. +* +* @pkt: packet +* +*/ +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt); + #endif |