diff options
46 files changed, 9306 insertions, 1636 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 091272e1a2..df990a8ff6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -954,6 +954,14 @@ S: Maintained F: hw/*/xilinx_* F: include/hw/xilinx.h +Network packet abstractions +M: Dmitry Fleytman <dmitry@daynix.com> +S: Maintained +F: include/net/eth.h +F: net/eth.c +F: hw/net/net_rx_pkt* +F: hw/net/net_tx_pkt* + Vmware M: Dmitry Fleytman <dmitry@daynix.com> S: Maintained @@ -973,6 +981,16 @@ F: hw/acpi/nvdimm.c F: hw/mem/nvdimm.c F: include/hw/mem/nvdimm.h +e1000x +M: Dmitry Fleytman <dmitry@daynix.com> +S: Maintained +F: hw/net/e1000x* + +e1000e +M: Dmitry Fleytman <dmitry@daynix.com> +S: Maintained +F: hw/net/e1000e* + Subsystems ---------- Audio diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 9c8bc68c4c..fff7ce3b14 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -18,6 +18,7 @@ CONFIG_MEGASAS_SCSI_PCI=y CONFIG_MPTSAS_SCSI_PCI=y CONFIG_RTL8139_PCI=y CONFIG_E1000_PCI=y +CONFIG_E1000E_PCI=y CONFIG_VMXNET3_PCI=y CONFIG_IDE_CORE=y CONFIG_IDE_QDEV=y diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c index 2f878b935d..1cd749aa4b 100644 --- a/hw/arm/fsl-imx25.c +++ b/hw/arm/fsl-imx25.c @@ -191,6 +191,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp) } qdev_set_nic_properties(DEVICE(&s->fec), &nd_table[0]); + object_property_set_bool(OBJECT(&s->fec), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c index a5331bfd33..0c00e7a560 100644 --- a/hw/arm/fsl-imx6.c +++ b/hw/arm/fsl-imx6.c @@ -105,6 +105,10 @@ static void fsl_imx6_init(Object *obj) snprintf(name, NAME_SIZE, "spi%d", i + 1); object_property_add_child(obj, name, OBJECT(&s->spi[i]), NULL); } + + object_initialize(&s->eth, sizeof(s->eth), TYPE_IMX_ENET); + qdev_set_parent_bus(DEVICE(&s->eth), sysbus_get_default()); + object_property_add_child(obj, "eth", OBJECT(&s->eth), NULL); } static void fsl_imx6_realize(DeviceState *dev, Error **errp) @@ -381,6 +385,19 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp) spi_table[i].irq)); } + object_property_set_bool(OBJECT(&s->eth), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->eth), 0, FSL_IMX6_ENET_ADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->eth), 0, + qdev_get_gpio_in(DEVICE(&s->a9mpcore), + FSL_IMX6_ENET_MAC_IRQ)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->eth), 1, + qdev_get_gpio_in(DEVICE(&s->a9mpcore), + FSL_IMX6_ENET_MAC_1588_IRQ)); + /* ROM memory */ memory_region_init_rom_device(&s->rom, NULL, NULL, NULL, "imx6.rom", FSL_IMX6_ROM_SIZE, &err); diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index 64d044923c..fe61e9fb2b 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -6,9 +6,10 @@ common-obj-$(CONFIG_NE2000_PCI) += ne2000.o common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o -common-obj-$(CONFIG_E1000_PCI) += e1000.o +common-obj-$(CONFIG_E1000_PCI) += e1000.o e1000x_common.o +common-obj-$(CONFIG_E1000E_PCI) += e1000e.o e1000e_core.o e1000x_common.o common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o -common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet_tx_pkt.o vmxnet_rx_pkt.o +common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o common-obj-$(CONFIG_SMC91C111) += smc91c111.o diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 8e79b550e6..36e3dbe347 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -36,7 +36,7 @@ #include "qemu/iov.h" #include "qemu/range.h" -#include "e1000_regs.h" +#include "e1000x_common.h" static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -64,11 +64,6 @@ static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); #define PNPMMIO_SIZE 0x20000 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */ -/* this is the size past which hardware will drop packets when setting LPE=0 */ -#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 -/* this is the size past which hardware will drop packets when setting LPE=1 */ -#define MAXIMUM_ETHERNET_LPE_SIZE 16384 - #define MAXIMUM_ETHERNET_HDR_LEN (14+4) /* @@ -102,22 +97,9 @@ typedef struct E1000State_st { unsigned char vlan[4]; unsigned char data[0x10000]; uint16_t size; - unsigned char sum_needed; unsigned char vlan_needed; - uint8_t ipcss; - uint8_t ipcso; - uint16_t ipcse; - uint8_t tucss; - uint8_t tucso; - uint16_t tucse; - uint8_t hdr_len; - uint16_t mss; - uint32_t paylen; + e1000x_txd_props props; uint16_t tso_frames; - char tse; - int8_t ip; - int8_t tcp; - char cptse; // current packet tse bit } tx; struct { @@ -162,52 +144,19 @@ typedef struct E1000BaseClass { #define E1000_DEVICE_GET_CLASS(obj) \ OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE) -#define defreg(x) x = (E1000_##x>>2) -enum { - defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), - defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), - defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), - defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), - defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), - defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), - defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), - defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), - defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), - defreg(RA), defreg(MTA), defreg(CRCERRS), defreg(VFTA), - defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), - defreg(ITR), defreg(FCRUC), defreg(TDFH), defreg(TDFT), - defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(RDFH), - defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC), - defreg(IPAV), defreg(WUC), defreg(WUS), defreg(AIT), - defreg(IP6AT), defreg(IP4AT), defreg(FFLT), defreg(FFMT), - defreg(FFVT), defreg(WUPM), defreg(PBM), defreg(SCC), - defreg(ECOL), defreg(MCC), defreg(LATECOL), defreg(COLC), - defreg(DC), defreg(TNCRS), defreg(SEC), defreg(CEXTERR), - defreg(RLEC), defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), - defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC), - defreg(TSCTFC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC), - defreg(RUC), defreg(ROC), defreg(GORCL), defreg(GORCH), - defreg(GOTCL), defreg(GOTCH), defreg(BPRC), defreg(MPRC), - defreg(TSCTC), defreg(PRC64), defreg(PRC127), defreg(PRC255), - defreg(PRC511), defreg(PRC1023), defreg(PRC1522), defreg(PTC64), - defreg(PTC127), defreg(PTC255), defreg(PTC511), defreg(PTC1023), - defreg(PTC1522), defreg(MPTC), defreg(BPTC) -}; - static void -e1000_link_down(E1000State *s) +e1000_link_up(E1000State *s) { - s->mac_reg[STATUS] &= ~E1000_STATUS_LU; - s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; - s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; - s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK; + e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg); + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static void -e1000_link_up(E1000State *s) +e1000_autoneg_done(E1000State *s) { - s->mac_reg[STATUS] |= E1000_STATUS_LU; - s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg); /* E1000_STATUS_LU is tested by e1000_can_receive() */ qemu_flush_queued_packets(qemu_get_queue(s->nic)); @@ -233,10 +182,7 @@ set_phy_ctrl(E1000State *s, int index, uint16_t val) * down. */ if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) { - e1000_link_down(s); - DBGOUT(PHY, "Start link auto negotiation\n"); - timer_mod(s->autoneg_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer); } } @@ -401,43 +347,16 @@ e1000_autoneg_timer(void *opaque) { E1000State *s = opaque; if (!qemu_get_queue(s->nic)->link_down) { - e1000_link_up(s); - s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; - s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; - DBGOUT(PHY, "Auto negotiation is completed\n"); + e1000_autoneg_done(s); set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */ } } -static int -rxbufsize(uint32_t v) -{ - v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | - E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | - E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; - switch (v) { - case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: - return 16384; - case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: - return 8192; - case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: - return 4096; - case E1000_RCTL_SZ_1024: - return 1024; - case E1000_RCTL_SZ_512: - return 512; - case E1000_RCTL_SZ_256: - return 256; - } - return 2048; -} - static void e1000_reset(void *opaque) { E1000State *d = opaque; E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d); uint8_t *macaddr = d->conf.macaddr.a; - int i; timer_del(d->autoneg_timer); timer_del(d->mit_timer); @@ -453,17 +372,10 @@ static void e1000_reset(void *opaque) memset(&d->tx, 0, sizeof d->tx); if (qemu_get_queue(d->nic)->link_down) { - e1000_link_down(d); + e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg); } - /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */ - d->mac_reg[RA] = 0; - d->mac_reg[RA + 1] = E1000_RAH_AV; - for (i = 0; i < 4; i++) { - d->mac_reg[RA] |= macaddr[i] << (8 * i); - d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0; - } - qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); + e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr); } static void @@ -477,7 +389,7 @@ static void set_rx_control(E1000State *s, int index, uint32_t val) { s->mac_reg[RCTL] = val; - s->rxbuf_size = rxbufsize(val); + s->rxbuf_size = e1000x_rxbufsize(val); s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], s->mac_reg[RCTL]); @@ -598,89 +510,15 @@ putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse) } static inline void -inc_reg_if_not_full(E1000State *s, int index) -{ - if (s->mac_reg[index] != 0xffffffff) { - s->mac_reg[index]++; - } -} - -static inline void inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr) { if (!memcmp(arr, bcast, sizeof bcast)) { - inc_reg_if_not_full(s, BPTC); + e1000x_inc_reg_if_not_full(s->mac_reg, BPTC); } else if (arr[0] & 1) { - inc_reg_if_not_full(s, MPTC); - } -} - -static void -grow_8reg_if_not_full(E1000State *s, int index, int size) -{ - uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32; - - if (sum + size < sum) { - sum = ~0ULL; - } else { - sum += size; - } - s->mac_reg[index] = sum; - s->mac_reg[index+1] = sum >> 32; -} - -static void -increase_size_stats(E1000State *s, const int *size_regs, int size) -{ - if (size > 1023) { - inc_reg_if_not_full(s, size_regs[5]); - } else if (size > 511) { - inc_reg_if_not_full(s, size_regs[4]); - } else if (size > 255) { - inc_reg_if_not_full(s, size_regs[3]); - } else if (size > 127) { - inc_reg_if_not_full(s, size_regs[2]); - } else if (size > 64) { - inc_reg_if_not_full(s, size_regs[1]); - } else if (size == 64) { - inc_reg_if_not_full(s, size_regs[0]); + e1000x_inc_reg_if_not_full(s->mac_reg, MPTC); } } -static inline int -vlan_enabled(E1000State *s) -{ - return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0); -} - -static inline int -vlan_rx_filter_enabled(E1000State *s) -{ - return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0); -} - -static inline int -is_vlan_packet(E1000State *s, const uint8_t *buf) -{ - return (be16_to_cpup((uint16_t *)(buf + 12)) == - le16_to_cpu(s->mac_reg[VET])); -} - -static inline int -is_vlan_txd(uint32_t txd_lower) -{ - return ((txd_lower & E1000_TXD_CMD_VLE) != 0); -} - -/* FCS aka Ethernet CRC-32. We don't get it from backends and can't - * fill it in, just pad descriptor length by 4 bytes unless guest - * told us to strip it off the packet. */ -static inline int -fcs_len(E1000State *s) -{ - return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; -} - static void e1000_send_packet(E1000State *s, const uint8_t *buf, int size) { @@ -694,7 +532,7 @@ e1000_send_packet(E1000State *s, const uint8_t *buf, int size) qemu_send_packet(nc, buf, size); } inc_tx_bcast_or_mcast_count(s, buf); - increase_size_stats(s, PTCregs, size); + e1000x_increase_size_stats(s->mac_reg, PTCregs, size); } static void @@ -704,34 +542,34 @@ xmit_seg(E1000State *s) unsigned int frames = s->tx.tso_frames, css, sofar; struct e1000_tx *tp = &s->tx; - if (tp->tse && tp->cptse) { - css = tp->ipcss; + if (tp->props.tse && tp->props.cptse) { + css = tp->props.ipcss; DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", frames, tp->size, css); - if (tp->ip) { /* IPv4 */ + if (tp->props.ip) { /* IPv4 */ stw_be_p(tp->data+css+2, tp->size - css); stw_be_p(tp->data+css+4, be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); } else { /* IPv6 */ stw_be_p(tp->data+css+4, tp->size - css); } - css = tp->tucss; + css = tp->props.tucss; len = tp->size - css; - DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len); - if (tp->tcp) { - sofar = frames * tp->mss; + DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->props.tcp, css, len); + if (tp->props.tcp) { + sofar = frames * tp->props.mss; stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */ - if (tp->paylen - sofar > tp->mss) { + if (tp->props.paylen - sofar > tp->props.mss) { tp->data[css + 13] &= ~9; /* PSH, FIN */ } else if (frames) { - inc_reg_if_not_full(s, TSCTC); + e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC); } } else /* UDP */ stw_be_p(tp->data+css+4, len); - if (tp->sum_needed & E1000_TXD_POPTS_TXSM) { + if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) { unsigned int phsum; // add pseudo-header length before checksum calculation - sp = (uint16_t *)(tp->data + tp->tucso); + sp = (uint16_t *)(tp->data + tp->props.tucso); phsum = be16_to_cpup(sp) + len; phsum = (phsum >> 16) + (phsum & 0xffff); stw_be_p(sp, phsum); @@ -739,10 +577,14 @@ xmit_seg(E1000State *s) tp->tso_frames++; } - if (tp->sum_needed & E1000_TXD_POPTS_TXSM) - putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse); - if (tp->sum_needed & E1000_TXD_POPTS_IXSM) - putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse); + if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) { + putsum(tp->data, tp->size, tp->props.tucso, + tp->props.tucss, tp->props.tucse); + } + if (tp->props.sum_needed & E1000_TXD_POPTS_IXSM) { + putsum(tp->data, tp->size, tp->props.ipcso, + tp->props.ipcss, tp->props.ipcse); + } if (tp->vlan_needed) { memmove(tp->vlan, tp->data, 4); memmove(tp->data, tp->data + 4, 8); @@ -752,8 +594,8 @@ xmit_seg(E1000State *s) e1000_send_packet(s, tp->data, tp->size); } - inc_reg_if_not_full(s, TPT); - grow_8reg_if_not_full(s, TOTL, s->tx.size); + e1000x_inc_reg_if_not_full(s->mac_reg, TPT); + e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size); s->mac_reg[GPTC] = s->mac_reg[TPT]; s->mac_reg[GOTCL] = s->mac_reg[TOTL]; s->mac_reg[GOTCH] = s->mac_reg[TOTH]; @@ -765,7 +607,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) PCIDevice *d = PCI_DEVICE(s); uint32_t txd_lower = le32_to_cpu(dp->lower.data); uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); - unsigned int split_size = txd_lower & 0xffff, bytes, sz, op; + unsigned int split_size = txd_lower & 0xffff, bytes, sz; unsigned int msh = 0xfffff; uint64_t addr; struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; @@ -773,38 +615,27 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */ - op = le32_to_cpu(xp->cmd_and_length); - tp->ipcss = xp->lower_setup.ip_fields.ipcss; - tp->ipcso = xp->lower_setup.ip_fields.ipcso; - tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse); - tp->tucss = xp->upper_setup.tcp_fields.tucss; - tp->tucso = xp->upper_setup.tcp_fields.tucso; - tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse); - tp->paylen = op & 0xfffff; - tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len; - tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss); - tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; - tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; - tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; + e1000x_read_tx_ctx_descr(xp, &tp->props); tp->tso_frames = 0; - if (tp->tucso == 0) { /* this is probably wrong */ + if (tp->props.tucso == 0) { /* this is probably wrong */ DBGOUT(TXSUM, "TCP/UDP: cso 0!\n"); - tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); + tp->props.tucso = tp->props.tucss + (tp->props.tcp ? 16 : 6); } return; } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { // data descriptor if (tp->size == 0) { - tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8; + tp->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8; } - tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; + tp->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0; } else { // legacy descriptor - tp->cptse = 0; + tp->props.cptse = 0; } - if (vlan_enabled(s) && is_vlan_txd(txd_lower) && - (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) { + if (e1000x_vlan_enabled(s->mac_reg) && + e1000x_is_vlan_txd(txd_lower) && + (tp->props.cptse || txd_lower & E1000_TXD_CMD_EOP)) { tp->vlan_needed = 1; stw_be_p(tp->vlan_header, le16_to_cpu(s->mac_reg[VET])); @@ -813,8 +644,8 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) } addr = le64_to_cpu(dp->buffer_addr); - if (tp->tse && tp->cptse) { - msh = tp->hdr_len + tp->mss; + if (tp->props.tse && tp->props.cptse) { + msh = tp->props.hdr_len + tp->props.mss; do { bytes = split_size; if (tp->size + bytes > msh) @@ -823,19 +654,19 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) bytes = MIN(sizeof(tp->data) - tp->size, bytes); pci_dma_read(d, addr, tp->data + tp->size, bytes); sz = tp->size + bytes; - if (sz >= tp->hdr_len && tp->size < tp->hdr_len) { - memmove(tp->header, tp->data, tp->hdr_len); + if (sz >= tp->props.hdr_len && tp->size < tp->props.hdr_len) { + memmove(tp->header, tp->data, tp->props.hdr_len); } tp->size = sz; addr += bytes; if (sz == msh) { xmit_seg(s); - memmove(tp->data, tp->header, tp->hdr_len); - tp->size = tp->hdr_len; + memmove(tp->data, tp->header, tp->props.hdr_len); + tp->size = tp->props.hdr_len; } split_size -= bytes; } while (bytes && split_size); - } else if (!tp->tse && tp->cptse) { + } else if (!tp->props.tse && tp->props.cptse) { // context descriptor TSE is not set, while data descriptor TSE is set DBGOUT(TXERR, "TCP segmentation error\n"); } else { @@ -846,14 +677,14 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) if (!(txd_lower & E1000_TXD_CMD_EOP)) return; - if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) { + if (!(tp->props.tse && tp->props.cptse && tp->size < tp->props.hdr_len)) { xmit_seg(s); } tp->tso_frames = 0; - tp->sum_needed = 0; + tp->props.sum_needed = 0; tp->vlan_needed = 0; tp->size = 0; - tp->cptse = 0; + tp->props.cptse = 0; } static uint32_t @@ -925,11 +756,11 @@ start_xmit(E1000State *s) static int receive_filter(E1000State *s, const uint8_t *buf, int size) { - static const int mta_shift[] = {4, 3, 2, 0}; - uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp; + uint32_t rctl = s->mac_reg[RCTL]; int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1); - if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) { + if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) && + e1000x_vlan_rx_filter_enabled(s->mac_reg)) { uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) + ((vid >> 5) & 0x7f)); @@ -942,44 +773,16 @@ receive_filter(E1000State *s, const uint8_t *buf, int size) } if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */ - inc_reg_if_not_full(s, MPRC); + e1000x_inc_reg_if_not_full(s->mac_reg, MPRC); return 1; } if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */ - inc_reg_if_not_full(s, BPRC); - return 1; - } - - for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) { - if (!(rp[1] & E1000_RAH_AV)) - continue; - ra[0] = cpu_to_le32(rp[0]); - ra[1] = cpu_to_le32(rp[1]); - if (!memcmp(buf, (uint8_t *)ra, 6)) { - DBGOUT(RXFILTER, - "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n", - (int)(rp - s->mac_reg - RA)/2, - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - return 1; - } - } - DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n", - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - - f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; - f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; - if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) { - inc_reg_if_not_full(s, MPRC); + e1000x_inc_reg_if_not_full(s->mac_reg, BPRC); return 1; } - DBGOUT(RXFILTER, - "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n", - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], - (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5, - s->mac_reg[MTA + (f >> 5)]); - return 0; + return e1000x_rx_group_filter(s->mac_reg, buf); } static void @@ -989,13 +792,11 @@ e1000_set_link_status(NetClientState *nc) uint32_t old_status = s->mac_reg[STATUS]; if (nc->link_down) { - e1000_link_down(s); + e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg); } else { if (have_autoneg(s) && !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { - /* emulate auto-negotiation if supported */ - timer_mod(s->autoneg_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer); } else { e1000_link_up(s); } @@ -1028,9 +829,7 @@ e1000_can_receive(NetClientState *nc) { E1000State *s = qemu_get_nic_opaque(nc); - return (s->mac_reg[STATUS] & E1000_STATUS_LU) && - (s->mac_reg[RCTL] & E1000_RCTL_EN) && - (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) && + return e1000x_rx_ready(&s->parent_obj, s->mac_reg) && e1000_has_rxbufs(s, 1); } @@ -1061,14 +860,8 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) size_t desc_offset; size_t desc_size; size_t total_size; - static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511, - PRC1023, PRC1522 }; - - if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) { - return -1; - } - if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) { + if (!e1000x_hw_rx_enabled(s->mac_reg)) { return -1; } @@ -1076,7 +869,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) if (size < sizeof(min_buf)) { iov_to_buf(iov, iovcnt, 0, min_buf, size); memset(&min_buf[size], 0, sizeof(min_buf) - size); - inc_reg_if_not_full(s, RUC); + e1000x_inc_reg_if_not_full(s->mac_reg, RUC); min_iov.iov_base = filter_buf = min_buf; min_iov.iov_len = size = sizeof(min_buf); iovcnt = 1; @@ -1088,11 +881,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) } /* Discard oversized packets if !LPE and !SBP. */ - if ((size > MAXIMUM_ETHERNET_LPE_SIZE || - (size > MAXIMUM_ETHERNET_VLAN_SIZE - && !(s->mac_reg[RCTL] & E1000_RCTL_LPE))) - && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) { - inc_reg_if_not_full(s, ROC); + if (e1000x_is_oversized(s->mac_reg, size)) { return size; } @@ -1100,7 +889,8 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) return size; } - if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) { + if (e1000x_vlan_enabled(s->mac_reg) && + e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) { vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf + 14))); iov_ofs = 4; @@ -1119,7 +909,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) rdh_start = s->mac_reg[RDH]; desc_offset = 0; - total_size = size + fcs_len(s); + total_size = size + e1000x_fcs_len(s->mac_reg); if (!e1000_has_rxbufs(s, total_size)) { set_ics(s, 0, E1000_ICS_RXO); return -1; @@ -1179,17 +969,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) } } while (desc_offset < total_size); - increase_size_stats(s, PRCregs, total_size); - inc_reg_if_not_full(s, TPR); - s->mac_reg[GPRC] = s->mac_reg[TPR]; - /* TOR - Total Octets Received: - * This register includes bytes received in a packet from the <Destination - * Address> field through the <CRC> field, inclusively. - * Always include FCS length (4) in size. - */ - grow_8reg_if_not_full(s, TORL, size+4); - s->mac_reg[GORCL] = s->mac_reg[TORL]; - s->mac_reg[GORCH] = s->mac_reg[TORH]; + e1000x_update_rx_total_stats(s->mac_reg, size, total_size); n = E1000_ICS_RXT0; if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) @@ -1670,20 +1450,20 @@ static const VMStateDescription vmstate_e1000 = { VMSTATE_UINT16(eecd_state.bitnum_out, E1000State), VMSTATE_UINT16(eecd_state.reading, E1000State), VMSTATE_UINT32(eecd_state.old_eecd, E1000State), - VMSTATE_UINT8(tx.ipcss, E1000State), - VMSTATE_UINT8(tx.ipcso, E1000State), - VMSTATE_UINT16(tx.ipcse, E1000State), - VMSTATE_UINT8(tx.tucss, E1000State), - VMSTATE_UINT8(tx.tucso, E1000State), - VMSTATE_UINT16(tx.tucse, E1000State), - VMSTATE_UINT32(tx.paylen, E1000State), - VMSTATE_UINT8(tx.hdr_len, E1000State), - VMSTATE_UINT16(tx.mss, E1000State), + VMSTATE_UINT8(tx.props.ipcss, E1000State), + VMSTATE_UINT8(tx.props.ipcso, E1000State), + VMSTATE_UINT16(tx.props.ipcse, E1000State), + VMSTATE_UINT8(tx.props.tucss, E1000State), + VMSTATE_UINT8(tx.props.tucso, E1000State), + VMSTATE_UINT16(tx.props.tucse, E1000State), + VMSTATE_UINT32(tx.props.paylen, E1000State), + VMSTATE_UINT8(tx.props.hdr_len, E1000State), + VMSTATE_UINT16(tx.props.mss, E1000State), VMSTATE_UINT16(tx.size, E1000State), VMSTATE_UINT16(tx.tso_frames, E1000State), - VMSTATE_UINT8(tx.sum_needed, E1000State), - VMSTATE_INT8(tx.ip, E1000State), - VMSTATE_INT8(tx.tcp, E1000State), + VMSTATE_UINT8(tx.props.sum_needed, E1000State), + VMSTATE_INT8(tx.props.ip, E1000State), + VMSTATE_INT8(tx.props.tcp, E1000State), VMSTATE_BUFFER(tx.header, E1000State), VMSTATE_BUFFER(tx.data, E1000State), VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64), @@ -1806,15 +1586,11 @@ static void e1000_write_config(PCIDevice *pci_dev, uint32_t address, } } - static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) { DeviceState *dev = DEVICE(pci_dev); E1000State *d = E1000(pci_dev); - PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev); uint8_t *pci_conf; - uint16_t checksum = 0; - int i; uint8_t *macaddr; pci_dev->config_write = e1000_write_config; @@ -1832,17 +1608,14 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io); - memmove(d->eeprom_data, e1000_eeprom_template, - sizeof e1000_eeprom_template); qemu_macaddr_default_if_unset(&d->conf.macaddr); macaddr = d->conf.macaddr.a; - for (i = 0; i < 3; i++) - d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i]; - d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id; - for (i = 0; i < EEPROM_CHECKSUM_REG; i++) - checksum += d->eeprom_data[i]; - checksum = (uint16_t) EEPROM_SUM - checksum; - d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum; + + e1000x_core_prepare_eeprom(d->eeprom_data, + e1000_eeprom_template, + sizeof(e1000_eeprom_template), + PCI_DEVICE_GET_CLASS(pci_dev)->device_id, + macaddr); d->nic = qemu_new_nic(&net_e1000_info, &d->conf, object_get_typename(OBJECT(d)), dev->id, d); diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index 1c40244ab5..c1acd458f2 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -85,6 +85,7 @@ #define E1000_DEV_ID_82573E 0x108B #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A +#define E1000_DEV_ID_82574L 0x10D3 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 @@ -104,6 +105,7 @@ #define E1000_PHY_ID2_82544x 0xC30 #define E1000_PHY_ID2_8254xx_DEFAULT 0xC20 /* 82540x, 82545x, and 82546x */ #define E1000_PHY_ID2_82573x 0xCC0 +#define E1000_PHY_ID2_82574x 0xCB1 /* Register Set. (82543, 82544) * @@ -135,8 +137,11 @@ #define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ #define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ #define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ +#define E1000_EIAC 0x000DC /* Ext. Interrupt Auto Clear - RW */ #define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ #define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ +#define E1000_IVAR 0x000E4 /* Interrupt Vector Allocation Register - RW */ +#define E1000_EITR 0x000E8 /* Extended Interrupt Throttling Rate - RW */ #define E1000_RCTL 0x00100 /* RX Control - RW */ #define E1000_RDTR1 0x02820 /* RX Delay Timer (1) - RW */ #define E1000_RDBAL1 0x02900 /* RX Descriptor Base Address Low (1) - RW */ @@ -145,6 +150,7 @@ #define E1000_RDH1 0x02910 /* RX Descriptor Head (1) - RW */ #define E1000_RDT1 0x02918 /* RX Descriptor Tail (1) - RW */ #define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ +#define E1000_FCRTV 0x05F40 /* Flow Control Refresh Timer Value - RW */ #define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ #define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ #define E1000_TCTL 0x00400 /* TX Control - RW */ @@ -161,6 +167,10 @@ #define E1000_PBM 0x10000 /* Packet Buffer Memory - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size - RW */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEMNGDATA 0x01014 /* MNG EEPROM Read/Write data */ +#define E1000_FLMNGCTL 0x01018 /* MNG Flash Control */ +#define E1000_FLMNGDATA 0x0101C /* MNG FLASH Read data */ +#define E1000_FLMNGCNT 0x01020 /* MNG FLASH Read Counter */ #define E1000_FLASH_UPDATES 1000 #define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ #define E1000_FLASHT 0x01028 /* FLASH Timer Register */ @@ -169,9 +179,12 @@ #define E1000_FLSWDATA 0x01034 /* FLASH data register */ #define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ #define E1000_FLOP 0x0103C /* FLASH Opcode Register */ +#define E1000_FLOL 0x01050 /* FEEP Auto Load */ #define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ #define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ +#define E1000_FCRTL_A 0x00168 /* Alias to FCRTL */ #define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ +#define E1000_FCRTH_A 0x00160 /* Alias to FCRTH */ #define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ #define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */ #define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */ @@ -179,11 +192,17 @@ #define E1000_RDH 0x02810 /* RX Descriptor Head - RW */ #define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */ #define E1000_RDTR 0x02820 /* RX Delay Timer - RW */ +#define E1000_RDTR_A 0x00108 /* Alias to RDTR */ #define E1000_RDBAL0 E1000_RDBAL /* RX Desc Base Address Low (0) - RW */ +#define E1000_RDBAL0_A 0x00110 /* Alias to RDBAL0 */ #define E1000_RDBAH0 E1000_RDBAH /* RX Desc Base Address High (0) - RW */ +#define E1000_RDBAH0_A 0x00114 /* Alias to RDBAH0 */ #define E1000_RDLEN0 E1000_RDLEN /* RX Desc Length (0) - RW */ +#define E1000_RDLEN0_A 0x00118 /* Alias to RDLEN0 */ #define E1000_RDH0 E1000_RDH /* RX Desc Head (0) - RW */ +#define E1000_RDH0_A 0x00120 /* Alias to RDH0 */ #define E1000_RDT0 E1000_RDT /* RX Desc Tail (0) - RW */ +#define E1000_RDT0_A 0x00128 /* Alias to RDT0 */ #define E1000_RDTR0 E1000_RDTR /* RX Delay Timer (0) - RW */ #define E1000_RXDCTL 0x02828 /* RX Descriptor Control queue 0 - RW */ #define E1000_RXDCTL1 0x02928 /* RX Descriptor Control queue 1 - RW */ @@ -192,22 +211,33 @@ #define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ #define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ #define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ +#define E1000_POEMB 0x00F10 /* PHY OEM Bits Register - RW */ #define E1000_RDFH 0x02410 /* Receive Data FIFO Head Register - RW */ +#define E1000_RDFH_A 0x08000 /* Alias to RDFH */ #define E1000_RDFT 0x02418 /* Receive Data FIFO Tail Register - RW */ +#define E1000_RDFT_A 0x08008 /* Alias to RDFT */ #define E1000_RDFHS 0x02420 /* Receive Data FIFO Head Saved Register - RW */ #define E1000_RDFTS 0x02428 /* Receive Data FIFO Tail Saved Register - RW */ #define E1000_RDFPC 0x02430 /* Receive Data FIFO Packet Count - RW */ #define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ +#define E1000_TDFH_A 0x08010 /* Alias to TDFH */ #define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ +#define E1000_TDFT_A 0x08018 /* Alias to TDFT */ #define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ #define E1000_TDFTS 0x03428 /* TX Data FIFO Tail Saved - RW */ #define E1000_TDFPC 0x03430 /* TX Data FIFO Packet Count - RW */ #define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */ +#define E1000_TDBAL_A 0x00420 /* Alias to TDBAL */ #define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */ +#define E1000_TDBAH_A 0x00424 /* Alias to TDBAH */ #define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */ +#define E1000_TDLEN_A 0x00428 /* Alias to TDLEN */ #define E1000_TDH 0x03810 /* TX Descriptor Head - RW */ +#define E1000_TDH_A 0x00430 /* Alias to TDH */ #define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */ +#define E1000_TDT_A 0x00438 /* Alias to TDT */ #define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */ +#define E1000_TIDV_A 0x00440 /* Alias to TIDV */ #define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */ #define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */ #define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ @@ -288,9 +318,15 @@ #define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ #define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ #define E1000_RFCTL 0x05008 /* Receive Filter Control*/ +#define E1000_MAVTV0 0x05010 /* Management VLAN TAG Value 0 */ +#define E1000_MAVTV1 0x05014 /* Management VLAN TAG Value 1 */ +#define E1000_MAVTV2 0x05018 /* Management VLAN TAG Value 2 */ +#define E1000_MAVTV3 0x0501c /* Management VLAN TAG Value 3 */ #define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ #define E1000_RA 0x05400 /* Receive Address - RW Array */ +#define E1000_RA_A 0x00040 /* Alias to RA */ #define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ +#define E1000_VFTA_A 0x00600 /* Alias to VFTA */ #define E1000_WUC 0x05800 /* Wakeup Control - RW */ #define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ #define E1000_WUS 0x05810 /* Wakeup Status - RO */ @@ -300,27 +336,57 @@ #define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ #define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ #define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ +#define E1000_MFUTP01 0x05828 /* Management Flex UDP/TCP Ports 0/1 - RW */ +#define E1000_MFUTP23 0x05830 /* Management Flex UDP/TCP Ports 2/3 - RW */ +#define E1000_MFVAL 0x05824 /* Manageability Filters Valid - RW */ +#define E1000_MDEF 0x05890 /* Manageability Decision Filters - RW Array */ #define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ #define E1000_HOST_IF 0x08800 /* Host Interface */ #define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ +#define E1000_FTFT 0x09400 /* Flexible TCO Filter Table - RW Array */ #define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ #define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */ -#define E1000_MDPHYA 0x0003C /* PHY address - RW */ -#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ +#define E1000_MDPHYA 0x0003C /* PHY address - RW */ +#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ #define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ #define E1000_GCR 0x05B00 /* PCI-Ex Control */ +#define E1000_FUNCTAG 0x05B08 /* Function-Tag Register */ #define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ #define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ #define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ #define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ +#define E1000_GSCN_0 0x05B20 /* 3GIO Statistic Counter Register #0 */ +#define E1000_GSCN_1 0x05B24 /* 3GIO Statistic Counter Register #1 */ +#define E1000_GSCN_2 0x05B28 /* 3GIO Statistic Counter Register #2 */ +#define E1000_GSCN_3 0x05B2C /* 3GIO Statistic Counter Register #3 */ #define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ #define E1000_SWSM 0x05B50 /* SW Semaphore */ +#define E1000_GCR2 0x05B64 /* 3GIO Control Register 2 */ #define E1000_FWSM 0x05B54 /* FW Semaphore */ +#define E1000_PBACLR 0x05B68 /* MSI-X PBA Clear */ #define E1000_FFLT_DBG 0x05F04 /* Debug Register */ #define E1000_HICR 0x08F00 /* Host Inteface Control */ +#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ +#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ +#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ +#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ +#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ +#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ +#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ +#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ +#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ +#define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ +#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ +#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ +#define E1000_TIMADJL 0x0B60C /* Time Adjustment Offset register Low - RW */ +#define E1000_TIMADJH 0x0B610 /* Time Adjustment Offset register High - RW */ +#define E1000_RXCFGL 0x0B634 /* RX Ethertype and Message Type - RW*/ + /* RSS registers */ #define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ #define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ @@ -329,6 +395,85 @@ #define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ #define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ +#define E1000_MRQC_ENABLED(mrqc) (((mrqc) & (BIT(0) | BIT(1))) == BIT(0)) + +#define E1000_RETA_IDX(hash) ((hash) & (BIT(7) - 1)) +#define E1000_RETA_VAL(reta, hash) (((uint8_t *)(reta))[E1000_RETA_IDX(hash)]) +#define E1000_RSS_QUEUE(reta, hash) ((E1000_RETA_VAL(reta, hash) & BIT(7)) >> 7) + +#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16)) +#define E1000_MRQC_EN_IPV4(mrqc) ((mrqc) & BIT(17)) +#define E1000_MRQC_EN_TCPIPV6(mrqc) ((mrqc) & BIT(18)) +#define E1000_MRQC_EN_IPV6EX(mrqc) ((mrqc) & BIT(19)) +#define E1000_MRQC_EN_IPV6(mrqc) ((mrqc) & BIT(20)) + +#define E1000_MRQ_RSS_TYPE_NONE (0) +#define E1000_MRQ_RSS_TYPE_IPV4TCP (1) +#define E1000_MRQ_RSS_TYPE_IPV4 (2) +#define E1000_MRQ_RSS_TYPE_IPV6TCP (3) +#define E1000_MRQ_RSS_TYPE_IPV6EX (4) +#define E1000_MRQ_RSS_TYPE_IPV6 (5) + +#define E1000_ICR_ASSERTED BIT(31) +#define E1000_EIAC_MASK 0x01F00000 + +/* [TR]DBAL and [TR]DLEN masks */ +#define E1000_XDBAL_MASK (~(BIT(4) - 1)) +#define E1000_XDLEN_MASK ((BIT(20) - 1) & (~(BIT(7) - 1))) + +/* IVAR register parsing helpers */ +#define E1000_IVAR_INT_ALLOC_VALID (0x8) + +#define E1000_IVAR_RXQ0_SHIFT (0) +#define E1000_IVAR_RXQ1_SHIFT (4) +#define E1000_IVAR_TXQ0_SHIFT (8) +#define E1000_IVAR_TXQ1_SHIFT (12) +#define E1000_IVAR_OTHER_SHIFT (16) + +#define E1000_IVAR_ENTRY_MASK (0xF) +#define E1000_IVAR_ENTRY_VALID_MASK E1000_IVAR_INT_ALLOC_VALID +#define E1000_IVAR_ENTRY_VEC_MASK (0x7) + +#define E1000_IVAR_RXQ0(x) ((x) >> E1000_IVAR_RXQ0_SHIFT) +#define E1000_IVAR_RXQ1(x) ((x) >> E1000_IVAR_RXQ1_SHIFT) +#define E1000_IVAR_TXQ0(x) ((x) >> E1000_IVAR_TXQ0_SHIFT) +#define E1000_IVAR_TXQ1(x) ((x) >> E1000_IVAR_TXQ1_SHIFT) +#define E1000_IVAR_OTHER(x) ((x) >> E1000_IVAR_OTHER_SHIFT) + +#define E1000_IVAR_ENTRY_VALID(x) ((x) & E1000_IVAR_ENTRY_VALID_MASK) +#define E1000_IVAR_ENTRY_VEC(x) ((x) & E1000_IVAR_ENTRY_VEC_MASK) + +#define E1000_IVAR_TX_INT_EVERY_WB BIT(31) + +/* RFCTL register bits */ +#define E1000_RFCTL_ISCSI_DIS 0x00000001 +#define E1000_RFCTL_NFSW_DIS 0x00000040 +#define E1000_RFCTL_NFSR_DIS 0x00000080 +#define E1000_RFCTL_IPV6_DIS 0x00000400 +#define E1000_RFCTL_IPV6_XSUM_DIS 0x00000800 +#define E1000_RFCTL_ACK_DIS 0x00001000 +#define E1000_RFCTL_ACK_DATA_DIS 0x00002000 +#define E1000_RFCTL_IPFRSP_DIS 0x00004000 +#define E1000_RFCTL_EXTEN 0x00008000 +#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 +#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 + +/* PSRCTL parsing */ +#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F +#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 +#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 +#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 + +#define E1000_PSRCTL_BSIZE0_SHIFT 0 +#define E1000_PSRCTL_BSIZE1_SHIFT 8 +#define E1000_PSRCTL_BSIZE2_SHIFT 16 +#define E1000_PSRCTL_BSIZE3_SHIFT 24 + +#define E1000_PSRCTL_BUFFS_PER_DESC 4 + +/* TARC* parsing */ +#define E1000_TARC_ENABLE BIT(10) + /* PHY 1000 MII Register/Bit Definitions */ /* PHY Registers defined by IEEE */ #define PHY_CTRL 0x00 /* Control Register */ @@ -344,6 +489,40 @@ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ #define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ +/* 82574-specific registers */ +#define PHY_COPPER_CTRL1 0x10 /* Copper Specific Control Register 1 */ +#define PHY_COPPER_STAT1 0x11 /* Copper Specific Status Register 1 */ +#define PHY_COPPER_INT_ENABLE 0x12 /* Interrupt Enable Register */ +#define PHY_COPPER_STAT2 0x13 /* Copper Specific Status Register 2 */ +#define PHY_COPPER_CTRL3 0x14 /* Copper Specific Control Register 3 */ +#define PHY_COPPER_CTRL2 0x1A /* Copper Specific Control Register 2 */ +#define PHY_RX_ERR_CNTR 0x15 /* Receive Error Counter */ +#define PHY_PAGE 0x16 /* Page Address (Any page) */ +#define PHY_OEM_BITS 0x19 /* OEM Bits (Page 0) */ +#define PHY_BIAS_1 0x1d /* Bias Setting Register */ +#define PHY_BIAS_2 0x1e /* Bias Setting Register */ + +/* 82574-specific registers - page 2 */ +#define PHY_MAC_CTRL1 0x10 /* MAC Specific Control Register 1 */ +#define PHY_MAC_INT_ENABLE 0x12 /* MAC Interrupt Enable Register */ +#define PHY_MAC_STAT 0x13 /* MAC Specific Status Register */ +#define PHY_MAC_CTRL2 0x15 /* MAC Specific Control Register 2 */ + +/* 82574-specific registers - page 3 */ +#define PHY_LED_03_FUNC_CTRL1 0x10 /* LED[3:0] Function Control */ +#define PHY_LED_03_POL_CTRL 0x11 /* LED[3:0] Polarity Control */ +#define PHY_LED_TIMER_CTRL 0x12 /* LED Timer Control */ +#define PHY_LED_45_CTRL 0x13 /* LED[5:4] Function Control and Polarity */ + +/* 82574-specific registers - page 5 */ +#define PHY_1000T_SKEW 0x14 /* 1000 BASE - T Pair Skew Register */ +#define PHY_1000T_SWAP 0x15 /* 1000 BASE - T Pair Swap and Polarity */ + +/* 82574-specific registers - page 6 */ +#define PHY_CRC_COUNTERS 0x11 /* CRC Counters */ + +#define PHY_PAGE_RW_MASK 0x7F /* R/W part of page address register */ + #define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ #define MAX_PHY_MULTI_PAGE_REG 0xF /* Registers equal on all pages */ @@ -423,6 +602,18 @@ #define E1000_ICR_DSW 0x00000020 /* FW changed the status of DISSW bit in the FWSM */ #define E1000_ICR_PHYINT 0x00001000 /* LAN connected device generates an interrupt */ #define E1000_ICR_EPRST 0x00100000 /* ME handware reset occurs */ +#define E1000_ICR_RXQ0 0x00100000 /* Rx Queue 0 Interrupt */ +#define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ +#define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ +#define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ +#define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ + +#define E1000_ICR_OTHER_CAUSES (E1000_ICR_LSC | \ + E1000_ICR_RXO | \ + E1000_ICR_MDAC | \ + E1000_ICR_SRPD | \ + E1000_ICR_ACK | \ + E1000_ICR_MNG) /* Interrupt Cause Set */ #define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ @@ -471,6 +662,11 @@ #define E1000_IMS_SRPD E1000_ICR_SRPD #define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ #define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ +#define E1000_IMS_RXQ0 E1000_ICR_RXQ0 +#define E1000_IMS_RXQ1 E1000_ICR_RXQ1 +#define E1000_IMS_TXQ0 E1000_ICR_TXQ0 +#define E1000_IMS_TXQ1 E1000_ICR_TXQ1 +#define E1000_IMS_OTHER E1000_ICR_OTHER #define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ #define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ #define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ @@ -562,6 +758,15 @@ #define E1000_EEPROM_RW_ADDR_SHIFT 8 /* Shift to the address bits */ #define E1000_EEPROM_POLL_WRITE 1 /* Flag for polling for write complete */ #define E1000_EEPROM_POLL_READ 0 /* Flag for polling for read complete */ + +/* 82574 EERD/EEWR registers layout */ +#define E1000_EERW_START BIT(0) +#define E1000_EERW_DONE BIT(1) +#define E1000_EERW_ADDR_SHIFT 2 +#define E1000_EERW_ADDR_MASK ((1L << 14) - 1) +#define E1000_EERW_DATA_SHIFT 16 +#define E1000_EERW_DATA_MASK ((1L << 16) - 1) + /* Register Bit Masks */ /* Device Control */ #define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ @@ -584,7 +789,17 @@ #define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ #define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ #define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ +#define E1000_CTRL_SPD_SHIFT 8 /* Speed Select Shift */ + +#define E1000_CTRL_EXT_ASDCHK 0x00001000 /* auto speed detection check */ +#define E1000_CTRL_EXT_EE_RST 0x00002000 /* EEPROM reset */ #define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ +#define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ +#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ +#define E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA 0x20000000 +#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ + #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ @@ -593,6 +808,7 @@ #define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ #define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ #define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ #define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ #define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ @@ -617,9 +833,13 @@ #define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by EEPROM/Flash */ #define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ +#define E1000_STATUS_ASDV_10 0x00000000 /* ASDV 10Mb */ +#define E1000_STATUS_ASDV_100 0x00000100 /* ASDV 100Mb */ +#define E1000_STATUS_ASDV_1000 0x00000200 /* ASDV 1Gb */ #define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */ #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ #define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ +#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ #define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ #define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ #define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ @@ -634,6 +854,8 @@ #define E1000_STATUS_FUSE_9 0x08000000 #define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ #define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ +#define E1000_STATUS_SPEED_SHIFT 6 +#define E1000_STATUS_ASDV_SHIFT 8 /* EEPROM/Flash Control */ #define E1000_EECD_SK 0x00000001 /* EEPROM Clock */ @@ -664,6 +886,8 @@ #define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ #define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ #define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ + + #define E1000_EECD_SECVAL_SHIFT 22 #define E1000_STM_OPCODE 0xDB00 #define E1000_HICR_FW_RESET 0xC0 @@ -684,6 +908,18 @@ #define E1000_MDIC_INT_EN 0x20000000 #define E1000_MDIC_ERROR 0x40000000 +/* Rx Interrupt Delay Timer */ +#define E1000_RDTR_FPD BIT(31) + +/* Tx Interrupt Delay Timer */ +#define E1000_TIDV_FPD BIT(31) + +/* Delay increments in nanoseconds for delayed interrupts registers */ +#define E1000_INTR_DELAY_NS_RES (1024) + +/* Delay increments in nanoseconds for interrupt throttling registers */ +#define E1000_INTR_THROTTLING_NS_RES (256) + /* EEPROM Commands - Microwire */ #define EEPROM_READ_OPCODE_MICROWIRE 0x6 /* EEPROM read opcode */ #define EEPROM_WRITE_OPCODE_MICROWIRE 0x5 /* EEPROM write opcode */ @@ -711,6 +947,21 @@ #define E1000_EEPROM_CFG_DONE 0x00040000 /* MNG config cycle done */ #define E1000_EEPROM_CFG_DONE_PORT_1 0x00080000 /* ...for second port */ +/* PCI Express Control */ +/* 3GIO Control Register - GCR (0x05B00; RW) */ +#define E1000_L0S_ADJUST (1 << 9) +#define E1000_L1_ENTRY_LATENCY_MSB (1 << 23) +#define E1000_L1_ENTRY_LATENCY_LSB (1 << 25 | 1 << 26) + +#define E1000_L0S_ADJUST (1 << 9) +#define E1000_L1_ENTRY_LATENCY_MSB (1 << 23) +#define E1000_L1_ENTRY_LATENCY_LSB (1 << 25 | 1 << 26) + +#define E1000_GCR_RO_BITS (1 << 23 | 1 << 25 | 1 << 26) + +/* MSI-X PBA Clear register */ +#define E1000_PBACLR_VALID_MASK (BIT(5) - 1) + /* Transmit Descriptor */ struct e1000_tx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ @@ -752,7 +1003,9 @@ struct e1000_tx_desc { #define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ #define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ #define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define E1000_TXD_CMD_SNAP 0x40000000 /* Update SNAP header */ #define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ +#define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ /* Transmit Control */ #define E1000_TCTL_RST 0x00000001 /* software reset */ @@ -767,7 +1020,7 @@ struct e1000_tx_desc { #define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ #define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ -/* Receive Descriptor */ +/* Legacy Receive Descriptor */ struct e1000_rx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ uint16_t length; /* Length of data DMAed into data buffer */ @@ -777,6 +1030,78 @@ struct e1000_rx_desc { uint16_t special; }; +/* Extended Receive Descriptor */ +union e1000_rx_desc_extended { + struct { + uint64_t buffer_addr; + uint64_t reserved; + } read; + struct { + struct { + uint32_t mrq; /* Multiple Rx Queues */ + union { + uint32_t rss; /* RSS Hash */ + struct { + uint16_t ip_id; /* IP id */ + uint16_t csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + uint32_t status_error; /* ext status/error */ + uint16_t length; + uint16_t vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +#define MAX_PS_BUFFERS 4 + +/* Number of packet split data buffers (not including the header buffer) */ +#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) + +/* Receive Descriptor - Packet Split */ +union e1000_rx_desc_packet_split { + struct { + /* one buffer for protocol header(s), three data buffers */ + uint64_t buffer_addr[MAX_PS_BUFFERS]; + } read; + struct { + struct { + uint32_t mrq; /* Multiple Rx Queues */ + union { + uint32_t rss; /* RSS Hash */ + struct { + uint16_t ip_id; /* IP id */ + uint16_t csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + uint32_t status_error; /* ext status/error */ + uint16_t length0; /* length of buffer 0 */ + uint16_t vlan; /* VLAN tag */ + } middle; + struct { + uint16_t header_status; + /* length of buffers 1-3 */ + uint16_t length[PS_PAGE_BUFFERS]; + } upper; + uint64_t reserved; + } wb; /* writeback */ +}; + +/* Receive Checksum Control bits */ +#define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ +#define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ +#define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ + +#define E1000_RING_DESC_LEN (16) +#define E1000_RING_DESC_LEN_SHIFT (4) + +#define E1000_MIN_RX_DESC_LEN E1000_RING_DESC_LEN +#define E1000_MAX_RX_DESC_LEN (sizeof(union e1000_rx_desc_packet_split)) + /* Receive Descriptor bit definitions */ #define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ #define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ @@ -802,6 +1127,15 @@ struct e1000_rx_desc { #define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ #define E1000_RXD_SPC_CFI_SHIFT 12 +/* RX packet types */ +#define E1000_RXD_PKT_MAC (0) +#define E1000_RXD_PKT_IP4 (1) +#define E1000_RXD_PKT_IP4_XDP (2) +#define E1000_RXD_PKT_IP6 (5) +#define E1000_RXD_PKT_IP6_XDP (6) + +#define E1000_RXD_PKT_TYPE(t) ((t) << 16) + #define E1000_RXDEXT_STATERR_CE 0x01000000 #define E1000_RXDEXT_STATERR_SE 0x02000000 #define E1000_RXDEXT_STATERR_SEQ 0x04000000 @@ -879,6 +1213,8 @@ struct e1000_data_desc { #define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery * Filtering */ #define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */ +#define E1000_MANC_DIS_IP_CHK_ARP 0x10000000 /* Disable IP address chacking */ + /*for ARP packets - in 82574 */ #define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ #define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ #define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ @@ -902,7 +1238,14 @@ struct e1000_data_desc { #define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ #define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ +/* FACTPS Control */ +#define E1000_FACTPS_LAN0_ON 0x00000004 /* Lan 0 enable */ + /* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ #define EEPROM_SUM 0xBABA +/* I/O-Mapped Access to Internal Registers, Memories, and Flash */ +#define E1000_IOADDR 0x00 +#define E1000_IODATA 0x04 + #endif /* _E1000_HW_H_ */ diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c new file mode 100644 index 0000000000..61bcbb6083 --- /dev/null +++ b/hw/net/e1000e.c @@ -0,0 +1,739 @@ +/* +* QEMU INTEL 82574 GbE NIC emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman <dmitry@daynix.com> +* Leonid Bloch <leonid@daynix.com> +* Yan Vugenfirer <yan@daynix.com> +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "qemu/osdep.h" +#include "net/net.h" +#include "net/tap.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#include "hw/net/e1000_regs.h" + +#include "e1000x_common.h" +#include "e1000e_core.h" + +#include "trace.h" + +#define TYPE_E1000E "e1000e" +#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E) + +typedef struct E1000EState { + PCIDevice parent_obj; + NICState *nic; + NICConf conf; + + MemoryRegion mmio; + MemoryRegion flash; + MemoryRegion io; + MemoryRegion msix; + + uint32_t ioaddr; + + uint16_t subsys_ven; + uint16_t subsys; + + uint16_t subsys_ven_used; + uint16_t subsys_used; + + uint32_t intr_state; + bool disable_vnet; + + E1000ECore core; + +} E1000EState; + +#define E1000E_MMIO_IDX 0 +#define E1000E_FLASH_IDX 1 +#define E1000E_IO_IDX 2 +#define E1000E_MSIX_IDX 3 + +#define E1000E_MMIO_SIZE (128 * 1024) +#define E1000E_FLASH_SIZE (128 * 1024) +#define E1000E_IO_SIZE (32) +#define E1000E_MSIX_SIZE (16 * 1024) + +#define E1000E_MSIX_TABLE (0x0000) +#define E1000E_MSIX_PBA (0x2000) + +#define E1000E_USE_MSI BIT(0) +#define E1000E_USE_MSIX BIT(1) + +static uint64_t +e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000EState *s = opaque; + return e1000e_core_read(&s->core, addr, size); +} + +static void +e1000e_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000EState *s = opaque; + e1000e_core_write(&s->core, addr, val, size); +} + +static bool +e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx) +{ + if (s->ioaddr < 0x1FFFF) { + *idx = s->ioaddr; + return true; + } + + if (s->ioaddr < 0x7FFFF) { + trace_e1000e_wrn_io_addr_undefined(s->ioaddr); + return false; + } + + if (s->ioaddr < 0xFFFFF) { + trace_e1000e_wrn_io_addr_flash(s->ioaddr); + return false; + } + + trace_e1000e_wrn_io_addr_unknown(s->ioaddr); + return false; +} + +static uint64_t +e1000e_io_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000EState *s = opaque; + uint32_t idx; + uint64_t val; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_read_addr(s->ioaddr); + return s->ioaddr; + case E1000_IODATA: + if (e1000e_io_get_reg_index(s, &idx)) { + val = e1000e_core_read(&s->core, idx, sizeof(val)); + trace_e1000e_io_read_data(idx, val); + return val; + } + return 0; + default: + trace_e1000e_wrn_io_read_unknown(addr); + return 0; + } +} + +static void +e1000e_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000EState *s = opaque; + uint32_t idx; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_write_addr(val); + s->ioaddr = (uint32_t) val; + return; + case E1000_IODATA: + if (e1000e_io_get_reg_index(s, &idx)) { + trace_e1000e_io_write_data(idx, val); + e1000e_core_write(&s->core, idx, val, sizeof(val)); + } + return; + default: + trace_e1000e_wrn_io_write_unknown(addr); + return; + } +} + +static const MemoryRegionOps mmio_ops = { + .read = e1000e_mmio_read, + .write = e1000e_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps io_ops = { + .read = e1000e_io_read, + .write = e1000e_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static int +e1000e_nc_can_receive(NetClientState *nc) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_can_receive(&s->core); +} + +static ssize_t +e1000e_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_receive_iov(&s->core, iov, iovcnt); +} + +static ssize_t +e1000e_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_receive(&s->core, buf, size); +} + +static void +e1000e_set_link_status(NetClientState *nc) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + e1000e_core_set_link_status(&s->core); +} + +static NetClientInfo net_e1000e_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = e1000e_nc_can_receive, + .receive = e1000e_nc_receive, + .receive_iov = e1000e_nc_receive_iov, + .link_status_changed = e1000e_set_link_status, +}; + +/* +* EEPROM (NVM) contents documented in Table 36, section 6.1 +* and generally 6.1.2 Software accessed words. +*/ +static const uint16_t e1000e_eeprom_template[64] = { + /* Address | Compat. | ImVer | Compat. */ + 0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff, + /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */ + 0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058, + /* NVM words 1,2,3 |-------------------------------|PCI-EID*/ + 0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704, + /* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */ + 0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706, + /* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/ + 0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff, + /* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */ + 0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff, + /* SW Section */ + 0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff, + /* SW Section |CHKSUM */ + 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000, +}; + +static void e1000e_core_realize(E1000EState *s) +{ + s->core.owner = &s->parent_obj; + s->core.owner_nic = s->nic; +} + +static void +e1000e_init_msi(E1000EState *s) +{ + int res; + + res = msi_init(PCI_DEVICE(s), + 0xD0, /* MSI capability offset */ + 1, /* MAC MSI interrupts */ + true, /* 64-bit message addresses supported */ + false); /* Per vector mask supported */ + + if (res > 0) { + s->intr_state |= E1000E_USE_MSI; + } else { + trace_e1000e_msi_init_fail(res); + } +} + +static void +e1000e_cleanup_msi(E1000EState *s) +{ + if (s->intr_state & E1000E_USE_MSI) { + msi_uninit(PCI_DEVICE(s)); + } +} + +static void +e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors) +{ + int i; + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(PCI_DEVICE(s), i); + } +} + +static bool +e1000e_use_msix_vectors(E1000EState *s, int num_vectors) +{ + int i; + for (i = 0; i < num_vectors; i++) { + int res = msix_vector_use(PCI_DEVICE(s), i); + if (res < 0) { + trace_e1000e_msix_use_vector_fail(i, res); + e1000e_unuse_msix_vectors(s, i); + return false; + } + } + return true; +} + +static void +e1000e_init_msix(E1000EState *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM, + &s->msix, + E1000E_MSIX_IDX, E1000E_MSIX_TABLE, + &s->msix, + E1000E_MSIX_IDX, E1000E_MSIX_PBA, + 0xA0); + + if (res < 0) { + trace_e1000e_msix_init_fail(res); + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); + } else { + s->intr_state |= E1000E_USE_MSIX; + } + } +} + +static void +e1000e_cleanup_msix(E1000EState *s) +{ + if (s->intr_state & E1000E_USE_MSIX) { + e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM); + msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix); + } +} + +static void +e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) +{ + DeviceState *dev = DEVICE(pci_dev); + NetClientState *nc; + int i; + + s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + + s->core.max_queue_num = s->conf.peers.queues - 1; + + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr); + + /* Setup virtio headers */ + if (s->disable_vnet) { + s->core.has_vnet = false; + trace_e1000e_cfg_support_virtio(false); + return; + } else { + s->core.has_vnet = true; + } + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) { + s->core.has_vnet = false; + trace_e1000e_cfg_support_virtio(false); + return; + } + } + + trace_e1000e_cfg_support_virtio(true); + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr)); + qemu_using_vnet_hdr(nc->peer, true); + } +} + +static inline uint64_t +e1000e_gen_dsn(uint8_t *mac) +{ + return (uint64_t)(mac[5]) | + (uint64_t)(mac[4]) << 8 | + (uint64_t)(mac[3]) << 16 | + (uint64_t)(0x00FF) << 24 | + (uint64_t)(0x00FF) << 32 | + (uint64_t)(mac[2]) << 40 | + (uint64_t)(mac[1]) << 48 | + (uint64_t)(mac[0]) << 56; +} + +static int +e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) +{ + int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF); + + if (ret >= 0) { + pci_set_word(pdev->config + offset + PCI_PM_PMC, + PCI_PM_CAP_VER_1_1 | + pmc); + + pci_set_word(pdev->wmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK | + PCI_PM_CTRL_PME_ENABLE | + PCI_PM_CTRL_DATA_SEL_MASK); + + pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_PME_STATUS); + } + + return ret; +} + +static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) +{ + E1000EState *s = E1000E(pci_dev); + + pci_default_write_config(pci_dev, address, val, len); + + if (range_covers_byte(address, len, PCI_COMMAND) && + (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } +} + +static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +{ + static const uint16_t e1000e_pmrb_offset = 0x0C8; + static const uint16_t e1000e_pcie_offset = 0x0E0; + static const uint16_t e1000e_aer_offset = 0x100; + static const uint16_t e1000e_dsn_offset = 0x140; + E1000EState *s = E1000E(pci_dev); + uint8_t *macaddr; + + trace_e1000e_cb_pci_realize(); + + pci_dev->config_write = e1000e_write_config; + + pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10; + pci_dev->config[PCI_INTERRUPT_PIN] = 1; + + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys); + + s->subsys_ven_used = s->subsys_ven; + s->subsys_used = s->subsys; + + /* Define IO/MMIO regions */ + memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s, + "e1000e-mmio", E1000E_MMIO_SIZE); + pci_register_bar(pci_dev, E1000E_MMIO_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); + + /* + * We provide a dummy implementation for the flash BAR + * for drivers that may theoretically probe for its presence. + */ + memory_region_init(&s->flash, OBJECT(s), + "e1000e-flash", E1000E_FLASH_SIZE); + pci_register_bar(pci_dev, E1000E_FLASH_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash); + + memory_region_init_io(&s->io, OBJECT(s), &io_ops, s, + "e1000e-io", E1000E_IO_SIZE); + pci_register_bar(pci_dev, E1000E_IO_IDX, + PCI_BASE_ADDRESS_SPACE_IO, &s->io); + + memory_region_init(&s->msix, OBJECT(s), "e1000e-msix", + E1000E_MSIX_SIZE); + pci_register_bar(pci_dev, E1000E_MSIX_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix); + + /* Create networking backend */ + qemu_macaddr_default_if_unset(&s->conf.macaddr); + macaddr = s->conf.macaddr.a; + + e1000e_init_msix(s); + + if (pcie_endpoint_cap_v1_init(pci_dev, e1000e_pcie_offset) < 0) { + hw_error("Failed to initialize PCIe capability"); + } + + e1000e_init_msi(s); + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, + PCI_PM_CAP_DSI) < 0) { + hw_error("Failed to initialize PM capability"); + } + + if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF) < 0) { + hw_error("Failed to initialize AER capability"); + } + + pcie_dev_ser_num_init(pci_dev, e1000e_dsn_offset, + e1000e_gen_dsn(macaddr)); + + e1000e_init_net_peer(s, pci_dev, macaddr); + + /* Initialize core */ + e1000e_core_realize(s); + + e1000e_core_pci_realize(&s->core, + e1000e_eeprom_template, + sizeof(e1000e_eeprom_template), + macaddr); +} + +static void e1000e_pci_uninit(PCIDevice *pci_dev) +{ + E1000EState *s = E1000E(pci_dev); + + trace_e1000e_cb_pci_uninit(); + + e1000e_core_pci_uninit(&s->core); + + pcie_aer_exit(pci_dev); + pcie_cap_exit(pci_dev); + + qemu_del_nic(s->nic); + + e1000e_cleanup_msix(s); + e1000e_cleanup_msi(s); +} + +static void e1000e_qdev_reset(DeviceState *dev) +{ + E1000EState *s = E1000E(dev); + + trace_e1000e_cb_qdev_reset(); + + e1000e_core_reset(&s->core); +} + +static void e1000e_pre_save(void *opaque) +{ + E1000EState *s = opaque; + + trace_e1000e_cb_pre_save(); + + e1000e_core_pre_save(&s->core); +} + +static int e1000e_post_load(void *opaque, int version_id) +{ + E1000EState *s = opaque; + + trace_e1000e_cb_post_load(); + + if ((s->subsys != s->subsys_used) || + (s->subsys_ven != s->subsys_ven_used)) { + fprintf(stderr, + "ERROR: Cannot migrate while device properties " + "(subsys/subsys_ven) differ"); + return -1; + } + + return e1000e_core_post_load(&s->core); +} + +static const VMStateDescription e1000e_vmstate_tx = { + .name = "e1000e-tx", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(props.sum_needed, struct e1000e_tx), + VMSTATE_UINT8(props.ipcss, struct e1000e_tx), + VMSTATE_UINT8(props.ipcso, struct e1000e_tx), + VMSTATE_UINT16(props.ipcse, struct e1000e_tx), + VMSTATE_UINT8(props.tucss, struct e1000e_tx), + VMSTATE_UINT8(props.tucso, struct e1000e_tx), + VMSTATE_UINT16(props.tucse, struct e1000e_tx), + VMSTATE_UINT8(props.hdr_len, struct e1000e_tx), + VMSTATE_UINT16(props.mss, struct e1000e_tx), + VMSTATE_UINT32(props.paylen, struct e1000e_tx), + VMSTATE_INT8(props.ip, struct e1000e_tx), + VMSTATE_INT8(props.tcp, struct e1000e_tx), + VMSTATE_BOOL(props.tse, struct e1000e_tx), + VMSTATE_BOOL(props.cptse, struct e1000e_tx), + VMSTATE_BOOL(skip_cp, struct e1000e_tx), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription e1000e_vmstate_intr_timer = { + .name = "e1000e-intr-timer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(timer, E1000IntrDelayTimer), + VMSTATE_BOOL(running, E1000IntrDelayTimer), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_E1000E_INTR_DELAY_TIMER(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +#define VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \ + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = e1000e_pre_save, + .post_load = e1000e_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCIE_DEVICE(parent_obj, E1000EState), + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), + VMSTATE_UINT32(intr_state, E1000EState), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, + E1000_PSRCTL_BUFFS_PER_DESC), + VMSTATE_UINT32(core.rx_desc_buf_size, E1000EState), + VMSTATE_UINT16_ARRAY(core.eeprom, E1000EState, E1000E_EEPROM_SIZE), + VMSTATE_UINT16_2DARRAY(core.phy, E1000EState, + E1000E_PHY_PAGES, E1000E_PHY_PAGE_SIZE), + VMSTATE_UINT32_ARRAY(core.mac, E1000EState, E1000E_MAC_SIZE), + VMSTATE_UINT8_ARRAY(core.permanent_mac, E1000EState, ETH_ALEN), + + VMSTATE_UINT32(core.delayed_causes, E1000EState), + + VMSTATE_UINT16(subsys, E1000EState), + VMSTATE_UINT16(subsys_ven, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER(core.rdtr, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.radv, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.raid, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.tadv, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.tidv, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER(core.itr, E1000EState), + VMSTATE_BOOL(core.itr_intr_pending, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(core.eitr, E1000EState, + E1000E_MSIX_VEC_NUM), + VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState, + E1000E_MSIX_VEC_NUM), + + VMSTATE_UINT32(core.itr_guest_value, E1000EState), + VMSTATE_UINT32_ARRAY(core.eitr_guest_value, E1000EState, + E1000E_MSIX_VEC_NUM), + + VMSTATE_UINT16(core.vet, E1000EState), + + VMSTATE_STRUCT_ARRAY(core.tx, E1000EState, E1000E_NUM_QUEUES, 0, + e1000e_vmstate_tx, struct e1000e_tx), + VMSTATE_END_OF_LIST() + } +}; + +static PropertyInfo e1000e_prop_disable_vnet, + e1000e_prop_subsys_ven, + e1000e_prop_subsys; + +static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), + DEFINE_PROP_DEFAULT("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_DEFAULT("subsys_ven", E1000EState, subsys_ven, + PCI_VENDOR_ID_INTEL, + e1000e_prop_subsys_ven, uint16_t), + DEFINE_PROP_DEFAULT("subsys", E1000EState, subsys, 0, + e1000e_prop_subsys, uint16_t), + DEFINE_PROP_END_OF_LIST(), +}; + +static void e1000e_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIDeviceClass *c = PCI_DEVICE_CLASS(class); + + c->realize = e1000e_pci_realize; + c->exit = e1000e_pci_uninit; + c->vendor_id = PCI_VENDOR_ID_INTEL; + c->device_id = E1000_DEV_ID_82574L; + c->revision = 0; + c->class_id = PCI_CLASS_NETWORK_ETHERNET; + c->is_express = 1; + + dc->desc = "Intel 82574L GbE Controller"; + dc->reset = e1000e_qdev_reset; + dc->vmsd = &e1000e_vmstate; + dc->props = e1000e_properties; + + e1000e_prop_disable_vnet = qdev_prop_uint8; + e1000e_prop_disable_vnet.description = "Do not use virtio headers, " + "perform SW offloads emulation " + "instead"; + + e1000e_prop_subsys_ven = qdev_prop_uint16; + e1000e_prop_subsys_ven.description = "PCI device Subsystem Vendor ID"; + + e1000e_prop_subsys = qdev_prop_uint16; + e1000e_prop_subsys.description = "PCI device Subsystem ID"; + + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static void e1000e_instance_init(Object *obj) +{ + E1000EState *s = E1000E(obj); + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static const TypeInfo e1000e_info = { + .name = TYPE_E1000E, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(E1000EState), + .class_init = e1000e_class_init, + .instance_init = e1000e_instance_init, +}; + +static void e1000e_register_types(void) +{ + type_register_static(&e1000e_info); +} + +type_init(e1000e_register_types) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c new file mode 100644 index 0000000000..6a44ea1c3f --- /dev/null +++ b/hw/net/e1000e_core.c @@ -0,0 +1,3476 @@ +/* +* Core code for QEMU e1000e emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman <dmitry@daynix.com> +* Leonid Bloch <leonid@daynix.com> +* Yan Vugenfirer <yan@daynix.com> +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "qemu/osdep.h" +#include "sysemu/sysemu.h" +#include "net/net.h" +#include "net/tap.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#include "net_tx_pkt.h" +#include "net_rx_pkt.h" + +#include "e1000x_common.h" +#include "e1000e_core.h" + +#include "trace.h" + +#define E1000E_MIN_XITR (500) /* No more then 7813 interrupts per + second according to spec 10.2.4.2 */ +#define E1000E_MAX_TX_FRAGS (64) + +static void +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val); + +static inline void +e1000e_process_ts_option(E1000ECore *core, struct e1000_tx_desc *dp) +{ + if (le32_to_cpu(dp->upper.data) & E1000_TXD_EXTCMD_TSTAMP) { + trace_e1000e_wrn_no_ts_support(); + } +} + +static inline void +e1000e_process_snap_option(E1000ECore *core, uint32_t cmd_and_length) +{ + if (cmd_and_length & E1000_TXD_CMD_SNAP) { + trace_e1000e_wrn_no_snap_support(); + } +} + +static inline void +e1000e_raise_legacy_irq(E1000ECore *core) +{ + trace_e1000e_irq_legacy_notify(true); + e1000x_inc_reg_if_not_full(core->mac, IAC); + pci_set_irq(core->owner, 1); +} + +static inline void +e1000e_lower_legacy_irq(E1000ECore *core) +{ + trace_e1000e_irq_legacy_notify(false); + pci_set_irq(core->owner, 0); +} + +static inline void +e1000e_intrmgr_rearm_timer(E1000IntrDelayTimer *timer) +{ + int64_t delay_ns = (int64_t) timer->core->mac[timer->delay_reg] * + timer->delay_resolution_ns; + + trace_e1000e_irq_rearm_timer(timer->delay_reg << 2, delay_ns); + + timer_mod(timer->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns); + + timer->running = true; +} + +static void +e1000e_intmgr_timer_resume(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + e1000e_intrmgr_rearm_timer(timer); + } +} + +static void +e1000e_intmgr_timer_pause(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + timer_del(timer->timer); + } +} + +static inline void +e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + timer_del(timer->timer); + timer->running = false; + } +} + +static inline void +e1000e_intrmgr_fire_delayed_interrupts(E1000ECore *core) +{ + trace_e1000e_irq_fire_delayed_interrupts(); + e1000e_set_interrupt_cause(core, 0); +} + +static void +e1000e_intrmgr_on_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + + trace_e1000e_irq_throttling_timer(timer->delay_reg << 2); + + timer->running = false; + e1000e_intrmgr_fire_delayed_interrupts(timer->core); +} + +static void +e1000e_intrmgr_on_throttling_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + + assert(!msix_enabled(timer->core->owner)); + + timer->running = false; + + if (!timer->core->itr_intr_pending) { + trace_e1000e_irq_throttling_no_pending_interrupts(); + return; + } + + if (msi_enabled(timer->core->owner)) { + trace_e1000e_irq_msi_notify_postponed(); + e1000e_set_interrupt_cause(timer->core, 0); + } else { + trace_e1000e_irq_legacy_notify_postponed(); + e1000e_set_interrupt_cause(timer->core, 0); + } +} + +static void +e1000e_intrmgr_on_msix_throttling_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + int idx = timer - &timer->core->eitr[0]; + + assert(msix_enabled(timer->core->owner)); + + timer->running = false; + + if (!timer->core->eitr_intr_pending[idx]) { + trace_e1000e_irq_throttling_no_pending_vec(idx); + return; + } + + trace_e1000e_irq_msix_notify_postponed_vec(idx); + msix_notify(timer->core->owner, idx); +} + +static void +e1000e_intrmgr_initialize_all_timers(E1000ECore *core, bool create) +{ + int i; + + core->radv.delay_reg = RADV; + core->rdtr.delay_reg = RDTR; + core->raid.delay_reg = RAID; + core->tadv.delay_reg = TADV; + core->tidv.delay_reg = TIDV; + + core->radv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->rdtr.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->raid.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->tadv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->tidv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + + core->radv.core = core; + core->rdtr.core = core; + core->raid.core = core; + core->tadv.core = core; + core->tidv.core = core; + + core->itr.core = core; + core->itr.delay_reg = ITR; + core->itr.delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES; + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + core->eitr[i].core = core; + core->eitr[i].delay_reg = EITR + i; + core->eitr[i].delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES; + } + + if (!create) { + return; + } + + core->radv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->radv); + core->rdtr.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->rdtr); + core->raid.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->raid); + + core->tadv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tadv); + core->tidv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tidv); + + core->itr.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + e1000e_intrmgr_on_throttling_timer, + &core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + core->eitr[i].timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + e1000e_intrmgr_on_msix_throttling_timer, + &core->eitr[i]); + } +} + +static inline void +e1000e_intrmgr_stop_delay_timers(E1000ECore *core) +{ + e1000e_intrmgr_stop_timer(&core->radv); + e1000e_intrmgr_stop_timer(&core->rdtr); + e1000e_intrmgr_stop_timer(&core->raid); + e1000e_intrmgr_stop_timer(&core->tidv); + e1000e_intrmgr_stop_timer(&core->tadv); +} + +static bool +e1000e_intrmgr_delay_rx_causes(E1000ECore *core, uint32_t *causes) +{ + uint32_t delayable_causes; + uint32_t rdtr = core->mac[RDTR]; + uint32_t radv = core->mac[RADV]; + uint32_t raid = core->mac[RAID]; + + if (msix_enabled(core->owner)) { + return false; + } + + delayable_causes = E1000_ICR_RXQ0 | + E1000_ICR_RXQ1 | + E1000_ICR_RXT0; + + if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS)) { + delayable_causes |= E1000_ICR_ACK; + } + + /* Clean up all causes that may be delayed */ + core->delayed_causes |= *causes & delayable_causes; + *causes &= ~delayable_causes; + + /* Check if delayed RX interrupts disabled by client + or if there are causes that cannot be delayed */ + if ((rdtr == 0) || (causes != 0)) { + return false; + } + + /* Check if delayed RX ACK interrupts disabled by client + and there is an ACK packet received */ + if ((raid == 0) && (core->delayed_causes & E1000_ICR_ACK)) { + return false; + } + + /* All causes delayed */ + e1000e_intrmgr_rearm_timer(&core->rdtr); + + if (!core->radv.running && (radv != 0)) { + e1000e_intrmgr_rearm_timer(&core->radv); + } + + if (!core->raid.running && (core->delayed_causes & E1000_ICR_ACK)) { + e1000e_intrmgr_rearm_timer(&core->raid); + } + + return true; +} + +static bool +e1000e_intrmgr_delay_tx_causes(E1000ECore *core, uint32_t *causes) +{ + static const uint32_t delayable_causes = E1000_ICR_TXQ0 | + E1000_ICR_TXQ1 | + E1000_ICR_TXQE | + E1000_ICR_TXDW; + + if (msix_enabled(core->owner)) { + return false; + } + + /* Clean up all causes that may be delayed */ + core->delayed_causes |= *causes & delayable_causes; + *causes &= ~delayable_causes; + + /* If there are causes that cannot be delayed */ + if (causes != 0) { + return false; + } + + /* All causes delayed */ + e1000e_intrmgr_rearm_timer(&core->tidv); + + if (!core->tadv.running && (core->mac[TADV] != 0)) { + e1000e_intrmgr_rearm_timer(&core->tadv); + } + + return true; +} + +static uint32_t +e1000e_intmgr_collect_delayed_causes(E1000ECore *core) +{ + uint32_t res; + + if (msix_enabled(core->owner)) { + assert(core->delayed_causes == 0); + return 0; + } + + res = core->delayed_causes; + core->delayed_causes = 0; + + e1000e_intrmgr_stop_delay_timers(core); + + return res; +} + +static void +e1000e_intrmgr_fire_all_timers(E1000ECore *core) +{ + int i; + uint32_t val = e1000e_intmgr_collect_delayed_causes(core); + + trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]); + core->mac[ICR] |= val; + + if (core->itr.running) { + timer_del(core->itr.timer); + e1000e_intrmgr_on_throttling_timer(&core->itr); + } + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + if (core->eitr[i].running) { + timer_del(core->eitr[i].timer); + e1000e_intrmgr_on_msix_throttling_timer(&core->eitr[i]); + } + } +} + +static void +e1000e_intrmgr_resume(E1000ECore *core) +{ + int i; + + e1000e_intmgr_timer_resume(&core->radv); + e1000e_intmgr_timer_resume(&core->rdtr); + e1000e_intmgr_timer_resume(&core->raid); + e1000e_intmgr_timer_resume(&core->tidv); + e1000e_intmgr_timer_resume(&core->tadv); + + e1000e_intmgr_timer_resume(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intmgr_timer_resume(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_pause(E1000ECore *core) +{ + int i; + + e1000e_intmgr_timer_pause(&core->radv); + e1000e_intmgr_timer_pause(&core->rdtr); + e1000e_intmgr_timer_pause(&core->raid); + e1000e_intmgr_timer_pause(&core->tidv); + e1000e_intmgr_timer_pause(&core->tadv); + + e1000e_intmgr_timer_pause(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intmgr_timer_pause(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_reset(E1000ECore *core) +{ + int i; + + core->delayed_causes = 0; + + e1000e_intrmgr_stop_delay_timers(core); + + e1000e_intrmgr_stop_timer(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intrmgr_stop_timer(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_pci_unint(E1000ECore *core) +{ + int i; + + timer_del(core->radv.timer); + timer_free(core->radv.timer); + timer_del(core->rdtr.timer); + timer_free(core->rdtr.timer); + timer_del(core->raid.timer); + timer_free(core->raid.timer); + + timer_del(core->tadv.timer); + timer_free(core->tadv.timer); + timer_del(core->tidv.timer); + timer_free(core->tidv.timer); + + timer_del(core->itr.timer); + timer_free(core->itr.timer); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + timer_del(core->eitr[i].timer); + timer_free(core->eitr[i].timer); + } +} + +static void +e1000e_intrmgr_pci_realize(E1000ECore *core) +{ + e1000e_intrmgr_initialize_all_timers(core, true); +} + +static inline bool +e1000e_rx_csum_enabled(E1000ECore *core) +{ + return (core->mac[RXCSUM] & E1000_RXCSUM_PCSD) ? false : true; +} + +static inline bool +e1000e_rx_use_legacy_descriptor(E1000ECore *core) +{ + return (core->mac[RFCTL] & E1000_RFCTL_EXTEN) ? false : true; +} + +static inline bool +e1000e_rx_use_ps_descriptor(E1000ECore *core) +{ + return !e1000e_rx_use_legacy_descriptor(core) && + (core->mac[RCTL] & E1000_RCTL_DTYP_PS); +} + +static inline bool +e1000e_rss_enabled(E1000ECore *core) +{ + return E1000_MRQC_ENABLED(core->mac[MRQC]) && + !e1000e_rx_csum_enabled(core) && + !e1000e_rx_use_legacy_descriptor(core); +} + +typedef struct E1000E_RSSInfo_st { + bool enabled; + uint32_t hash; + uint32_t queue; + uint32_t type; +} E1000E_RSSInfo; + +static uint32_t +e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt) +{ + bool isip4, isip6, isudp, istcp; + + assert(e1000e_rss_enabled(core)); + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + + if (isip4) { + bool fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + + trace_e1000e_rx_rss_ip4(fragment, istcp, core->mac[MRQC], + E1000_MRQC_EN_TCPIPV4(core->mac[MRQC]), + E1000_MRQC_EN_IPV4(core->mac[MRQC])); + + if (!fragment && istcp && E1000_MRQC_EN_TCPIPV4(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV4TCP; + } + + if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV4; + } + } else if (isip6) { + eth_ip6_hdr_info *ip6info = net_rx_pkt_get_ip6_info(pkt); + + bool ex_dis = core->mac[RFCTL] & E1000_RFCTL_IPV6_EX_DIS; + bool new_ex_dis = core->mac[RFCTL] & E1000_RFCTL_NEW_IPV6_EXT_DIS; + + trace_e1000e_rx_rss_ip6(core->mac[RFCTL], + ex_dis, new_ex_dis, istcp, + ip6info->has_ext_hdrs, + ip6info->rss_ex_dst_valid, + ip6info->rss_ex_src_valid, + core->mac[MRQC], + E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]), + E1000_MRQC_EN_IPV6EX(core->mac[MRQC]), + E1000_MRQC_EN_IPV6(core->mac[MRQC])); + + if ((!ex_dis || !ip6info->has_ext_hdrs) && + (!new_ex_dis || !(ip6info->rss_ex_dst_valid || + ip6info->rss_ex_src_valid))) { + + if (istcp && !ip6info->fragment && + E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6TCP; + } + + if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6EX; + } + + } + + if (E1000_MRQC_EN_IPV6(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6; + } + + } + + return E1000_MRQ_RSS_TYPE_NONE; +} + +static uint32_t +e1000e_rss_calc_hash(E1000ECore *core, + struct NetRxPkt *pkt, + E1000E_RSSInfo *info) +{ + NetRxPktRssType type; + + assert(e1000e_rss_enabled(core)); + + switch (info->type) { + case E1000_MRQ_RSS_TYPE_IPV4: + type = NetPktRssIpV4; + break; + case E1000_MRQ_RSS_TYPE_IPV4TCP: + type = NetPktRssIpV4Tcp; + break; + case E1000_MRQ_RSS_TYPE_IPV6TCP: + type = NetPktRssIpV6Tcp; + break; + case E1000_MRQ_RSS_TYPE_IPV6: + type = NetPktRssIpV6; + break; + case E1000_MRQ_RSS_TYPE_IPV6EX: + type = NetPktRssIpV6Ex; + break; + default: + assert(false); + return 0; + } + + return net_rx_pkt_calc_rss_hash(pkt, type, (uint8_t *) &core->mac[RSSRK]); +} + +static void +e1000e_rss_parse_packet(E1000ECore *core, + struct NetRxPkt *pkt, + E1000E_RSSInfo *info) +{ + trace_e1000e_rx_rss_started(); + + if (!e1000e_rss_enabled(core)) { + info->enabled = false; + info->hash = 0; + info->queue = 0; + info->type = 0; + trace_e1000e_rx_rss_disabled(); + return; + } + + info->enabled = true; + + info->type = e1000e_rss_get_hash_type(core, pkt); + + trace_e1000e_rx_rss_type(info->type); + + if (info->type == E1000_MRQ_RSS_TYPE_NONE) { + info->hash = 0; + info->queue = 0; + return; + } + + info->hash = e1000e_rss_calc_hash(core, pkt, info); + info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash); +} + +static void +e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx) +{ + if (tx->props.tse && tx->props.cptse) { + net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->props.mss); + net_tx_pkt_update_ip_checksums(tx->tx_pkt); + e1000x_inc_reg_if_not_full(core->mac, TSCTC); + return; + } + + if (tx->props.sum_needed & E1000_TXD_POPTS_TXSM) { + net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0); + } + + if (tx->props.sum_needed & E1000_TXD_POPTS_IXSM) { + net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt); + } +} + +static bool +e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index) +{ + int target_queue = MIN(core->max_queue_num, queue_index); + NetClientState *queue = qemu_get_subqueue(core->owner_nic, target_queue); + + e1000e_setup_tx_offloads(core, tx); + + net_tx_pkt_dump(tx->tx_pkt); + + if ((core->phy[0][PHY_CTRL] & MII_CR_LOOPBACK) || + ((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) { + return net_tx_pkt_send_loopback(tx->tx_pkt, queue); + } else { + return net_tx_pkt_send(tx->tx_pkt, queue); + } +} + +static void +e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt) +{ + static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, + PTC1023, PTC1522 }; + + size_t tot_len = net_tx_pkt_get_total_len(tx_pkt); + + e1000x_increase_size_stats(core->mac, PTCregs, tot_len); + e1000x_inc_reg_if_not_full(core->mac, TPT); + e1000x_grow_8reg_if_not_full(core->mac, TOTL, tot_len); + + switch (net_tx_pkt_get_packet_type(tx_pkt)) { + case ETH_PKT_BCAST: + e1000x_inc_reg_if_not_full(core->mac, BPTC); + break; + case ETH_PKT_MCAST: + e1000x_inc_reg_if_not_full(core->mac, MPTC); + break; + case ETH_PKT_UCAST: + break; + default: + g_assert_not_reached(); + } + + core->mac[GPTC] = core->mac[TPT]; + core->mac[GOTCL] = core->mac[TOTL]; + core->mac[GOTCH] = core->mac[TOTH]; +} + +static void +e1000e_process_tx_desc(E1000ECore *core, + struct e1000e_tx *tx, + struct e1000_tx_desc *dp, + int queue_index) +{ + uint32_t txd_lower = le32_to_cpu(dp->lower.data); + uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); + unsigned int split_size = txd_lower & 0xffff; + uint64_t addr; + struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; + bool eop = txd_lower & E1000_TXD_CMD_EOP; + + if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */ + e1000x_read_tx_ctx_descr(xp, &tx->props); + e1000e_process_snap_option(core, le32_to_cpu(xp->cmd_and_length)); + return; + } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { + /* data descriptor */ + tx->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8; + tx->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0; + e1000e_process_ts_option(core, dp); + } else { + /* legacy descriptor */ + e1000e_process_ts_option(core, dp); + tx->props.cptse = 0; + } + + addr = le64_to_cpu(dp->buffer_addr); + + if (!tx->skip_cp) { + if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) { + tx->skip_cp = true; + } + } + + if (eop) { + if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { + if (e1000x_vlan_enabled(core->mac) && + e1000x_is_vlan_txd(txd_lower)) { + net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, + le16_to_cpu(dp->upper.fields.special), core->vet); + } + if (e1000e_tx_pkt_send(core, tx, queue_index)) { + e1000e_on_tx_done_update_stats(core, tx->tx_pkt); + } + } + + tx->skip_cp = false; + net_tx_pkt_reset(tx->tx_pkt); + + tx->props.sum_needed = 0; + tx->props.cptse = 0; + } +} + +static inline uint32_t +e1000e_tx_wb_interrupt_cause(E1000ECore *core, int queue_idx) +{ + if (!msix_enabled(core->owner)) { + return E1000_ICR_TXDW; + } + + return (queue_idx == 0) ? E1000_ICR_TXQ0 : E1000_ICR_TXQ1; +} + +static inline uint32_t +e1000e_rx_wb_interrupt_cause(E1000ECore *core, int queue_idx, + bool min_threshold_hit) +{ + if (!msix_enabled(core->owner)) { + return E1000_ICS_RXT0 | (min_threshold_hit ? E1000_ICS_RXDMT0 : 0); + } + + return (queue_idx == 0) ? E1000_ICR_RXQ0 : E1000_ICR_RXQ1; +} + +static uint32_t +e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base, + struct e1000_tx_desc *dp, bool *ide, int queue_idx) +{ + uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data); + + if (!(txd_lower & E1000_TXD_CMD_RS) && + !(core->mac[IVAR] & E1000_IVAR_TX_INT_EVERY_WB)) { + return 0; + } + + *ide = (txd_lower & E1000_TXD_CMD_IDE) ? true : false; + + txd_upper = le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD; + + dp->upper.data = cpu_to_le32(txd_upper); + pci_dma_write(core->owner, base + ((char *)&dp->upper - (char *)dp), + &dp->upper, sizeof(dp->upper)); + return e1000e_tx_wb_interrupt_cause(core, queue_idx); +} + +typedef struct E1000E_RingInfo_st { + int dbah; + int dbal; + int dlen; + int dh; + int dt; + int idx; +} E1000E_RingInfo; + +static inline bool +e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dh] == core->mac[r->dt]; +} + +static inline uint64_t +e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) +{ + uint64_t bah = core->mac[r->dbah]; + uint64_t bal = core->mac[r->dbal]; + + return (bah << 32) + bal; +} + +static inline uint64_t +e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r) +{ + return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; +} + +static inline void +e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) +{ + core->mac[r->dh] += count; + + if (core->mac[r->dh] * E1000_RING_DESC_LEN >= core->mac[r->dlen]) { + core->mac[r->dh] = 0; + } +} + +static inline uint32_t +e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) +{ + trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], + core->mac[r->dh], core->mac[r->dt]); + + if (core->mac[r->dh] <= core->mac[r->dt]) { + return core->mac[r->dt] - core->mac[r->dh]; + } + + if (core->mac[r->dh] > core->mac[r->dt]) { + return core->mac[r->dlen] / E1000_RING_DESC_LEN + + core->mac[r->dt] - core->mac[r->dh]; + } + + g_assert_not_reached(); + return 0; +} + +static inline bool +e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dlen] > 0; +} + +static inline uint32_t +e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dlen]; +} + +typedef struct E1000E_TxRing_st { + const E1000E_RingInfo *i; + struct e1000e_tx *tx; +} E1000E_TxRing; + +static inline int +e1000e_mq_queue_idx(int base_reg_idx, int reg_idx) +{ + return (reg_idx - base_reg_idx) / (0x100 >> 2); +} + +static inline void +e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) +{ + static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + { TDBAH, TDBAL, TDLEN, TDH, TDT, 0 }, + { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 } + }; + + assert(idx < ARRAY_SIZE(i)); + + txr->i = &i[idx]; + txr->tx = &core->tx[idx]; +} + +typedef struct E1000E_RxRing_st { + const E1000E_RingInfo *i; +} E1000E_RxRing; + +static inline void +e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) +{ + static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, + { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 } + }; + + assert(idx < ARRAY_SIZE(i)); + + rxr->i = &i[idx]; +} + +static void +e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) +{ + dma_addr_t base; + struct e1000_tx_desc desc; + bool ide = false; + const E1000E_RingInfo *txi = txr->i; + uint32_t cause = E1000_ICS_TXQE; + + if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + trace_e1000e_tx_disabled(); + return; + } + + while (!e1000e_ring_empty(core, txi)) { + base = e1000e_ring_head_descr(core, txi); + + pci_dma_read(core->owner, base, &desc, sizeof(desc)); + + trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr, + desc.lower.data, desc.upper.data); + + e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx); + cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx); + + e1000e_ring_advance(core, txi, 1); + } + + if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) { + e1000e_set_interrupt_cause(core, cause); + } +} + +static bool +e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, + size_t total_size) +{ + uint32_t bufs = e1000e_ring_free_descr_num(core, r); + + trace_e1000e_rx_has_buffers(r->idx, bufs, total_size, + core->rx_desc_buf_size); + + return total_size <= bufs / (core->rx_desc_len / E1000_MIN_RX_DESC_LEN) * + core->rx_desc_buf_size; +} + +static inline void +e1000e_start_recv(E1000ECore *core) +{ + int i; + + trace_e1000e_rx_start_recv(); + + for (i = 0; i <= core->max_queue_num; i++) { + qemu_flush_queued_packets(qemu_get_subqueue(core->owner_nic, i)); + } +} + +int +e1000e_can_receive(E1000ECore *core) +{ + int i; + + if (!e1000x_rx_ready(core->owner, core->mac)) { + return false; + } + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + E1000E_RxRing rxr; + + e1000e_rx_ring_init(core, &rxr, i); + if (e1000e_ring_enabled(core, rxr.i) && + e1000e_has_rxbufs(core, rxr.i, 1)) { + trace_e1000e_rx_can_recv(); + return true; + } + } + + trace_e1000e_rx_can_recv_rings_full(); + return false; +} + +ssize_t +e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return e1000e_receive_iov(core, &iov, 1); +} + +static inline bool +e1000e_rx_l3_cso_enabled(E1000ECore *core) +{ + return !!(core->mac[RXCSUM] & E1000_RXCSUM_IPOFLD); +} + +static inline bool +e1000e_rx_l4_cso_enabled(E1000ECore *core) +{ + return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); +} + +static bool +e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size) +{ + uint32_t rctl = core->mac[RCTL]; + + if (e1000x_is_vlan_packet(buf, core->vet) && + e1000x_vlan_rx_filter_enabled(core->mac)) { + uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); + uint32_t vfta = le32_to_cpup((uint32_t *)(core->mac + VFTA) + + ((vid >> 5) & 0x7f)); + if ((vfta & (1 << (vid & 0x1f))) == 0) { + trace_e1000e_rx_flt_vlan_mismatch(vid); + return false; + } else { + trace_e1000e_rx_flt_vlan_match(vid); + } + } + + switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { + case ETH_PKT_UCAST: + if (rctl & E1000_RCTL_UPE) { + return true; /* promiscuous ucast */ + } + break; + + case ETH_PKT_BCAST: + if (rctl & E1000_RCTL_BAM) { + return true; /* broadcast enabled */ + } + break; + + case ETH_PKT_MCAST: + if (rctl & E1000_RCTL_MPE) { + return true; /* promiscuous mcast */ + } + break; + + default: + g_assert_not_reached(); + } + + return e1000x_rx_group_filter(core->mac, buf); +} + +static inline void +e1000e_read_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr) +{ + struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; + *buff_addr = le64_to_cpu(d->buffer_addr); +} + +static inline void +e1000e_read_ext_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr) +{ + union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; + *buff_addr = le64_to_cpu(d->read.buffer_addr); +} + +static inline void +e1000e_read_ps_rx_descr(E1000ECore *core, uint8_t *desc, + hwaddr (*buff_addr)[MAX_PS_BUFFERS]) +{ + int i; + union e1000_rx_desc_packet_split *d = + (union e1000_rx_desc_packet_split *) desc; + + for (i = 0; i < MAX_PS_BUFFERS; i++) { + (*buff_addr)[i] = le64_to_cpu(d->read.buffer_addr[i]); + } + + trace_e1000e_rx_desc_ps_read((*buff_addr)[0], (*buff_addr)[1], + (*buff_addr)[2], (*buff_addr)[3]); +} + +static inline void +e1000e_read_rx_descr(E1000ECore *core, uint8_t *desc, + hwaddr (*buff_addr)[MAX_PS_BUFFERS]) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + e1000e_read_lgcy_rx_descr(core, desc, &(*buff_addr)[0]); + (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0; + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + e1000e_read_ps_rx_descr(core, desc, buff_addr); + } else { + e1000e_read_ext_rx_descr(core, desc, &(*buff_addr)[0]); + (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0; + } + } +} + +static void +e1000e_verify_csum_in_sw(E1000ECore *core, + struct NetRxPkt *pkt, + uint32_t *status_flags, + bool istcp, bool isudp) +{ + bool csum_valid; + uint32_t csum_error; + + if (e1000e_rx_l3_cso_enabled(core)) { + if (!net_rx_pkt_validate_l3_csum(pkt, &csum_valid)) { + trace_e1000e_rx_metadata_l3_csum_validation_failed(); + } else { + csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_IPE; + *status_flags |= E1000_RXD_STAT_IPCS | csum_error; + } + } else { + trace_e1000e_rx_metadata_l3_cso_disabled(); + } + + if (!e1000e_rx_l4_cso_enabled(core)) { + trace_e1000e_rx_metadata_l4_cso_disabled(); + return; + } + + if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) { + trace_e1000e_rx_metadata_l4_csum_validation_failed(); + return; + } + + csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_TCPE; + + if (istcp) { + *status_flags |= E1000_RXD_STAT_TCPCS | + csum_error; + } else if (isudp) { + *status_flags |= E1000_RXD_STAT_TCPCS | + E1000_RXD_STAT_UDPCS | + csum_error; + } +} + +static inline bool +e1000e_is_tcp_ack(E1000ECore *core, struct NetRxPkt *rx_pkt) +{ + if (!net_rx_pkt_is_tcp_ack(rx_pkt)) { + return false; + } + + if (core->mac[RFCTL] & E1000_RFCTL_ACK_DATA_DIS) { + return !net_rx_pkt_has_tcp_data(rx_pkt); + } + + return true; +} + +static void +e1000e_build_rx_metadata(E1000ECore *core, + struct NetRxPkt *pkt, + bool is_eop, + const E1000E_RSSInfo *rss_info, + uint32_t *rss, uint32_t *mrq, + uint32_t *status_flags, + uint16_t *ip_id, + uint16_t *vlan_tag) +{ + struct virtio_net_hdr *vhdr; + bool isip4, isip6, istcp, isudp; + uint32_t pkt_type; + + *status_flags = E1000_RXD_STAT_DD; + + /* No additional metadata needed for non-EOP descriptors */ + if (!is_eop) { + goto func_exit; + } + + *status_flags |= E1000_RXD_STAT_EOP; + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + trace_e1000e_rx_metadata_protocols(isip4, isip6, isudp, istcp); + + /* VLAN state */ + if (net_rx_pkt_is_vlan_stripped(pkt)) { + *status_flags |= E1000_RXD_STAT_VP; + *vlan_tag = cpu_to_le16(net_rx_pkt_get_vlan_tag(pkt)); + trace_e1000e_rx_metadata_vlan(*vlan_tag); + } + + /* Packet parsing results */ + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { + if (rss_info->enabled) { + *rss = cpu_to_le32(rss_info->hash); + *mrq = cpu_to_le32(rss_info->type | (rss_info->queue << 8)); + trace_e1000e_rx_metadata_rss(*rss, *mrq); + } + } else if (isip4) { + *status_flags |= E1000_RXD_STAT_IPIDV; + *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id(*ip_id); + } + + if (istcp && e1000e_is_tcp_ack(core, pkt)) { + *status_flags |= E1000_RXD_STAT_ACK; + trace_e1000e_rx_metadata_ack(); + } + + if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + trace_e1000e_rx_metadata_ipv6_filtering_disabled(); + pkt_type = E1000_RXD_PKT_MAC; + } else if (istcp || isudp) { + pkt_type = isip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP; + } else if (isip4 || isip6) { + pkt_type = isip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6; + } else { + pkt_type = E1000_RXD_PKT_MAC; + } + + *status_flags |= E1000_RXD_PKT_TYPE(pkt_type); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + + /* RX CSO information */ + if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { + trace_e1000e_rx_metadata_ipv6_sum_disabled(); + goto func_exit; + } + + if (!net_rx_pkt_has_virt_hdr(pkt)) { + trace_e1000e_rx_metadata_no_virthdr(); + e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp); + goto func_exit; + } + + vhdr = net_rx_pkt_get_vhdr(pkt); + + if (!(vhdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) && + !(vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { + trace_e1000e_rx_metadata_virthdr_no_csum_info(); + e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp); + goto func_exit; + } + + if (e1000e_rx_l3_cso_enabled(core)) { + *status_flags |= isip4 ? E1000_RXD_STAT_IPCS : 0; + } else { + trace_e1000e_rx_metadata_l3_cso_disabled(); + } + + if (e1000e_rx_l4_cso_enabled(core)) { + if (istcp) { + *status_flags |= E1000_RXD_STAT_TCPCS; + } else if (isudp) { + *status_flags |= E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS; + } + } else { + trace_e1000e_rx_metadata_l4_cso_disabled(); + } + + trace_e1000e_rx_metadata_status_flags(*status_flags); + +func_exit: + *status_flags = cpu_to_le32(*status_flags); +} + +static inline void +e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t length) +{ + uint32_t status_flags, rss, mrq; + uint16_t ip_id; + + struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; + + memset(d, 0, sizeof(*d)); + + assert(!rss_info->enabled); + + d->length = cpu_to_le16(length); + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &rss, &mrq, + &status_flags, &ip_id, + &d->special); + d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); + d->status = (uint8_t) le32_to_cpu(status_flags); +} + +static inline void +e1000e_write_ext_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t length) +{ + union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; + + memset(d, 0, sizeof(*d)); + + d->wb.upper.length = cpu_to_le16(length); + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &d->wb.lower.hi_dword.rss, + &d->wb.lower.mrq, + &d->wb.upper.status_error, + &d->wb.lower.hi_dword.csum_ip.ip_id, + &d->wb.upper.vlan); +} + +static inline void +e1000e_write_ps_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + size_t ps_hdr_len, + uint16_t(*written)[MAX_PS_BUFFERS]) +{ + int i; + union e1000_rx_desc_packet_split *d = + (union e1000_rx_desc_packet_split *) desc; + + memset(d, 0, sizeof(*d)); + + d->wb.middle.length0 = cpu_to_le16((*written)[0]); + + for (i = 0; i < PS_PAGE_BUFFERS; i++) { + d->wb.upper.length[i] = cpu_to_le16((*written)[i + 1]); + } + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &d->wb.lower.hi_dword.rss, + &d->wb.lower.mrq, + &d->wb.middle.status_error, + &d->wb.lower.hi_dword.csum_ip.ip_id, + &d->wb.middle.vlan); + + d->wb.upper.header_status = + cpu_to_le16(ps_hdr_len | (ps_hdr_len ? E1000_RXDPS_HDRSTAT_HDRSP : 0)); + + trace_e1000e_rx_desc_ps_write((*written)[0], (*written)[1], + (*written)[2], (*written)[3]); +} + +static inline void +e1000e_write_rx_descr(E1000ECore *core, uint8_t *desc, +struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, + size_t ps_hdr_len, uint16_t(*written)[MAX_PS_BUFFERS]) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + assert(ps_hdr_len == 0); + e1000e_write_lgcy_rx_descr(core, desc, pkt, rss_info, (*written)[0]); + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + e1000e_write_ps_rx_descr(core, desc, pkt, rss_info, + ps_hdr_len, written); + } else { + assert(ps_hdr_len == 0); + e1000e_write_ext_rx_descr(core, desc, pkt, rss_info, + (*written)[0]); + } + } +} + +typedef struct e1000e_ba_state_st { + uint16_t written[MAX_PS_BUFFERS]; + uint8_t cur_idx; +} e1000e_ba_state; + +static inline void +e1000e_write_hdr_to_rx_buffers(E1000ECore *core, + hwaddr (*ba)[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]); + + pci_dma_write(core->owner, (*ba)[0] + bastate->written[0], data, data_len); + bastate->written[0] += data_len; + + bastate->cur_idx = 1; +} + +static void +e1000e_write_to_rx_buffers(E1000ECore *core, + hwaddr (*ba)[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) +{ + while (data_len > 0) { + uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; + uint32_t cur_buf_bytes_left = cur_buf_len - + bastate->written[bastate->cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_e1000e_rx_desc_buff_write(bastate->cur_idx, + (*ba)[bastate->cur_idx], + bastate->written[bastate->cur_idx], + data, + bytes_to_write); + + pci_dma_write(core->owner, + (*ba)[bastate->cur_idx] + bastate->written[bastate->cur_idx], + data, bytes_to_write); + + bastate->written[bastate->cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (bastate->written[bastate->cur_idx] == cur_buf_len) { + bastate->cur_idx++; + } + + assert(bastate->cur_idx < MAX_PS_BUFFERS); + } +} + +static void +e1000e_update_rx_stats(E1000ECore *core, + size_t data_size, + size_t data_fcs_size) +{ + e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); + + switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { + case ETH_PKT_BCAST: + e1000x_inc_reg_if_not_full(core->mac, BPRC); + break; + + case ETH_PKT_MCAST: + e1000x_inc_reg_if_not_full(core->mac, MPRC); + break; + + default: + break; + } +} + +static inline bool +e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi) +{ + return e1000e_ring_free_descr_num(core, rxi) == + e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift; +} + +static bool +e1000e_do_ps(E1000ECore *core, struct NetRxPkt *pkt, size_t *hdr_len) +{ + bool isip4, isip6, isudp, istcp; + bool fragment; + + if (!e1000e_rx_use_ps_descriptor(core)) { + return false; + } + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + + if (isip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (isip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + return false; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + return false; + } + + if (!fragment && (isudp || istcp)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + if ((*hdr_len > core->rxbuf_sizes[0]) || + (*hdr_len > net_rx_pkt_get_total_len(pkt))) { + return false; + } + + return true; +} + +static void +e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, + const E1000E_RxRing *rxr, + const E1000E_RSSInfo *rss_info) +{ + PCIDevice *d = core->owner; + dma_addr_t base; + uint8_t desc[E1000_MAX_RX_DESC_LEN]; + size_t desc_size; + size_t desc_offset = 0; + size_t iov_ofs = 0; + + struct iovec *iov = net_rx_pkt_get_iovec(pkt); + size_t size = net_rx_pkt_get_total_len(pkt); + size_t total_size = size + e1000x_fcs_len(core->mac); + const E1000E_RingInfo *rxi; + size_t ps_hdr_len = 0; + bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len); + + rxi = rxr->i; + + do { + hwaddr ba[MAX_PS_BUFFERS]; + e1000e_ba_state bastate = { { 0 } }; + bool is_last = false; + bool is_first = true; + + desc_size = total_size - desc_offset; + + if (desc_size > core->rx_desc_buf_size) { + desc_size = core->rx_desc_buf_size; + } + + base = e1000e_ring_head_descr(core, rxi); + + pci_dma_read(d, base, &desc, core->rx_desc_len); + + trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); + + e1000e_read_rx_descr(core, desc, &ba); + + if (ba[0]) { + if (desc_offset < size) { + static const uint32_t fcs_pad; + size_t iov_copy; + size_t copy_size = size - desc_offset; + if (copy_size > core->rx_desc_buf_size) { + copy_size = core->rx_desc_buf_size; + } + + /* For PS mode copy the packet header first */ + if (do_ps) { + if (is_first) { + size_t ps_hdr_copied = 0; + do { + iov_copy = MIN(ps_hdr_len - ps_hdr_copied, + iov->iov_len - iov_ofs); + + e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate, + iov->iov_base, iov_copy); + + copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } while (ps_hdr_copied < ps_hdr_len); + + is_first = false; + } else { + /* Leave buffer 0 of each descriptor except first */ + /* empty as per spec 7.1.5.1 */ + e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate, + NULL, 0); + } + } + + /* Copy packet payload */ + while (copy_size) { + iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); + + e1000e_write_to_rx_buffers(core, &ba, &bastate, + iov->iov_base + iov_ofs, iov_copy); + + copy_size -= iov_copy; + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } + + if (desc_offset + desc_size >= total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + e1000e_write_to_rx_buffers(core, &ba, &bastate, + (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); + } + } + desc_offset += desc_size; + if (desc_offset >= total_size) { + is_last = true; + } + } else { /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + } + + e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, + rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); + pci_dma_write(d, base, &desc, core->rx_desc_len); + + e1000e_ring_advance(core, rxi, + core->rx_desc_len / E1000_MIN_RX_DESC_LEN); + + } while (desc_offset < total_size); + + e1000e_update_rx_stats(core, size, total_size); +} + +static inline void +e1000e_rx_fix_l4_csum(E1000ECore *core, struct NetRxPkt *pkt) +{ + if (net_rx_pkt_has_virt_hdr(pkt)) { + struct virtio_net_hdr *vhdr = net_rx_pkt_get_vhdr(pkt); + + if (vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + net_rx_pkt_fix_l4_csum(pkt); + } + } +} + +ssize_t +e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt) +{ + static const int maximum_ethernet_hdr_len = (14 + 4); + /* Min. octets in an ethernet frame sans FCS */ + static const int min_buf_size = 60; + + uint32_t n = 0; + uint8_t min_buf[min_buf_size]; + struct iovec min_iov; + uint8_t *filter_buf; + size_t size, orig_size; + size_t iov_ofs = 0; + E1000E_RxRing rxr; + E1000E_RSSInfo rss_info; + size_t total_size; + ssize_t retval; + bool rdmts_hit; + + trace_e1000e_rx_receive_iov(iovcnt); + + if (!e1000x_hw_rx_enabled(core->mac)) { + return -1; + } + + /* Pull virtio header in */ + if (core->has_vnet) { + net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt); + iov_ofs = sizeof(struct virtio_net_hdr); + } + + filter_buf = iov->iov_base + iov_ofs; + orig_size = iov_size(iov, iovcnt); + size = orig_size - iov_ofs; + + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + e1000x_inc_reg_if_not_full(core->mac, RUC); + min_iov.iov_base = filter_buf = min_buf; + min_iov.iov_len = size = sizeof(min_buf); + iovcnt = 1; + iov = &min_iov; + iov_ofs = 0; + } else if (iov->iov_len < maximum_ethernet_hdr_len) { + /* This is very unlikely, but may happen. */ + iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len); + filter_buf = min_buf; + } + + /* Discard oversized packets if !LPE and !SBP. */ + if (e1000x_is_oversized(core->mac, size)) { + return orig_size; + } + + net_rx_pkt_set_packet_type(core->rx_pkt, + get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf))); + + if (!e1000e_receive_filter(core, filter_buf, size)) { + trace_e1000e_rx_flt_dropped(); + return orig_size; + } + + net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, + e1000x_vlan_enabled(core->mac), core->vet); + + e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info); + e1000e_rx_ring_init(core, &rxr, rss_info.queue); + + trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx); + + total_size = net_rx_pkt_get_total_len(core->rx_pkt) + + e1000x_fcs_len(core->mac); + + if (e1000e_has_rxbufs(core, rxr.i, total_size)) { + e1000e_rx_fix_l4_csum(core, core->rx_pkt); + + e1000e_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); + + retval = orig_size; + + /* Perform small receive detection (RSRPD) */ + if (total_size < core->mac[RSRPD]) { + n |= E1000_ICS_SRPD; + } + + /* Perform ACK receive detection */ + if (e1000e_is_tcp_ack(core, core->rx_pkt)) { + n |= E1000_ICS_ACK; + } + + /* Check if receive descriptor minimum threshold hit */ + rdmts_hit = e1000e_rx_descr_threshold_hit(core, rxr.i); + n |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit); + + trace_e1000e_rx_written_to_guest(n); + } else { + n |= E1000_ICS_RXO; + retval = 0; + + trace_e1000e_rx_not_written_to_guest(n); + } + + if (!e1000e_intrmgr_delay_rx_causes(core, &n)) { + trace_e1000e_rx_interrupt_set(n); + e1000e_set_interrupt_cause(core, n); + } else { + trace_e1000e_rx_interrupt_delayed(n); + } + + return retval; +} + +static inline bool +e1000e_have_autoneg(E1000ECore *core) +{ + return core->phy[0][PHY_CTRL] & MII_CR_AUTO_NEG_EN; +} + +static void e1000e_update_flowctl_status(E1000ECore *core) +{ + if (e1000e_have_autoneg(core) && + core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE) { + trace_e1000e_link_autoneg_flowctl(true); + core->mac[CTRL] |= E1000_CTRL_TFCE | E1000_CTRL_RFCE; + } else { + trace_e1000e_link_autoneg_flowctl(false); + } +} + +static inline void +e1000e_link_down(E1000ECore *core) +{ + e1000x_update_regs_on_link_down(core->mac, core->phy[0]); + e1000e_update_flowctl_status(core); +} + +static inline void +e1000e_set_phy_ctrl(E1000ECore *core, int index, uint16_t val) +{ + /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */ + core->phy[0][PHY_CTRL] = val & ~(0x3f | + MII_CR_RESET | + MII_CR_RESTART_AUTO_NEG); + + if ((val & MII_CR_RESTART_AUTO_NEG) && + e1000e_have_autoneg(core)) { + e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer); + } +} + +static void +e1000e_set_phy_oem_bits(E1000ECore *core, int index, uint16_t val) +{ + core->phy[0][PHY_OEM_BITS] = val & ~BIT(10); + + if (val & BIT(10)) { + e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer); + } +} + +static void +e1000e_set_phy_page(E1000ECore *core, int index, uint16_t val) +{ + core->phy[0][PHY_PAGE] = val & PHY_PAGE_RW_MASK; +} + +void +e1000e_core_set_link_status(E1000ECore *core) +{ + NetClientState *nc = qemu_get_queue(core->owner_nic); + uint32_t old_status = core->mac[STATUS]; + + trace_e1000e_link_status_changed(nc->link_down ? false : true); + + if (nc->link_down) { + e1000x_update_regs_on_link_down(core->mac, core->phy[0]); + } else { + if (e1000e_have_autoneg(core) && + !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + e1000x_restart_autoneg(core->mac, core->phy[0], + core->autoneg_timer); + } else { + e1000x_update_regs_on_link_up(core->mac, core->phy[0]); + } + } + + if (core->mac[STATUS] != old_status) { + e1000e_set_interrupt_cause(core, E1000_ICR_LSC); + } +} + +static void +e1000e_set_ctrl(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_core_ctrl_write(index, val); + + /* RST is self clearing */ + core->mac[CTRL] = val & ~E1000_CTRL_RST; + core->mac[CTRL_DUP] = core->mac[CTRL]; + + trace_e1000e_link_set_params( + !!(val & E1000_CTRL_ASDE), + (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT, + !!(val & E1000_CTRL_FRCSPD), + !!(val & E1000_CTRL_FRCDPX), + !!(val & E1000_CTRL_RFCE), + !!(val & E1000_CTRL_TFCE)); + + if (val & E1000_CTRL_RST) { + trace_e1000e_core_ctrl_sw_reset(); + e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + } + + if (val & E1000_CTRL_PHY_RST) { + trace_e1000e_core_ctrl_phy_reset(); + core->mac[STATUS] |= E1000_STATUS_PHYRA; + } +} + +static void +e1000e_set_rfctl(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_rx_set_rfctl(val); + + if (!(val & E1000_RFCTL_ISCSI_DIS)) { + trace_e1000e_wrn_iscsi_filtering_not_supported(); + } + + if (!(val & E1000_RFCTL_NFSW_DIS)) { + trace_e1000e_wrn_nfsw_filtering_not_supported(); + } + + if (!(val & E1000_RFCTL_NFSR_DIS)) { + trace_e1000e_wrn_nfsr_filtering_not_supported(); + } + + core->mac[RFCTL] = val; +} + +static void +e1000e_calc_per_desc_buf_size(E1000ECore *core) +{ + int i; + core->rx_desc_buf_size = 0; + + for (i = 0; i < ARRAY_SIZE(core->rxbuf_sizes); i++) { + core->rx_desc_buf_size += core->rxbuf_sizes[i]; + } +} + +static void +e1000e_parse_rxbufsize(E1000ECore *core) +{ + uint32_t rctl = core->mac[RCTL]; + + memset(core->rxbuf_sizes, 0, sizeof(core->rxbuf_sizes)); + + if (rctl & E1000_RCTL_DTYP_MASK) { + uint32_t bsize; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE0_MASK; + core->rxbuf_sizes[0] = (bsize >> E1000_PSRCTL_BSIZE0_SHIFT) * 128; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE1_MASK; + core->rxbuf_sizes[1] = (bsize >> E1000_PSRCTL_BSIZE1_SHIFT) * 1024; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE2_MASK; + core->rxbuf_sizes[2] = (bsize >> E1000_PSRCTL_BSIZE2_SHIFT) * 1024; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE3_MASK; + core->rxbuf_sizes[3] = (bsize >> E1000_PSRCTL_BSIZE3_SHIFT) * 1024; + } else if (rctl & E1000_RCTL_FLXBUF_MASK) { + int flxbuf = rctl & E1000_RCTL_FLXBUF_MASK; + core->rxbuf_sizes[0] = (flxbuf >> E1000_RCTL_FLXBUF_SHIFT) * 1024; + } else { + core->rxbuf_sizes[0] = e1000x_rxbufsize(rctl); + } + + trace_e1000e_rx_desc_buff_sizes(core->rxbuf_sizes[0], core->rxbuf_sizes[1], + core->rxbuf_sizes[2], core->rxbuf_sizes[3]); + + e1000e_calc_per_desc_buf_size(core); +} + +static void +e1000e_calc_rxdesclen(E1000ECore *core) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + core->rx_desc_len = sizeof(struct e1000_rx_desc); + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + core->rx_desc_len = sizeof(union e1000_rx_desc_packet_split); + } else { + core->rx_desc_len = sizeof(union e1000_rx_desc_extended); + } + } + trace_e1000e_rx_desc_len(core->rx_desc_len); +} + +static void +e1000e_set_rx_control(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RCTL] = val; + trace_e1000e_rx_set_rctl(core->mac[RCTL]); + + if (val & E1000_RCTL_EN) { + e1000e_parse_rxbufsize(core); + e1000e_calc_rxdesclen(core); + core->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1 + + E1000_RING_DESC_LEN_SHIFT; + + e1000e_start_recv(core); + } +} + +static +void(*e1000e_phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]) +(E1000ECore *, int, uint16_t) = { + [0] = { + [PHY_CTRL] = e1000e_set_phy_ctrl, + [PHY_PAGE] = e1000e_set_phy_page, + [PHY_OEM_BITS] = e1000e_set_phy_oem_bits + } +}; + +static inline void +e1000e_clear_ims_bits(E1000ECore *core, uint32_t bits) +{ + trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits); + core->mac[IMS] &= ~bits; +} + +static inline bool +e1000e_postpone_interrupt(bool *interrupt_pending, + E1000IntrDelayTimer *timer) +{ + if (timer->running) { + trace_e1000e_irq_postponed_by_xitr(timer->delay_reg << 2); + + *interrupt_pending = true; + return true; + } + + if (timer->core->mac[timer->delay_reg] != 0) { + e1000e_intrmgr_rearm_timer(timer); + } + + return false; +} + +static inline bool +e1000e_itr_should_postpone(E1000ECore *core) +{ + return e1000e_postpone_interrupt(&core->itr_intr_pending, &core->itr); +} + +static inline bool +e1000e_eitr_should_postpone(E1000ECore *core, int idx) +{ + return e1000e_postpone_interrupt(&core->eitr_intr_pending[idx], + &core->eitr[idx]); +} + +static void +e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) +{ + uint32_t effective_eiac; + + if (E1000_IVAR_ENTRY_VALID(int_cfg)) { + uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg); + if (vec < E1000E_MSIX_VEC_NUM) { + if (!e1000e_eitr_should_postpone(core, vec)) { + trace_e1000e_irq_msix_notify_vec(vec); + msix_notify(core->owner, vec); + } + } else { + trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg); + } + } else { + trace_e1000e_wrn_msix_invalid(cause, int_cfg); + } + + if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) { + trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause); + e1000e_clear_ims_bits(core, core->mac[IAM] & cause); + } + + trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]); + + if (core->mac[EIAC] & E1000_ICR_OTHER) { + effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) | + E1000_ICR_OTHER_CAUSES; + } else { + effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK; + } + core->mac[ICR] &= ~effective_eiac; +} + +static void +e1000e_msix_notify(E1000ECore *core, uint32_t causes) +{ + if (causes & E1000_ICR_RXQ0) { + e1000e_msix_notify_one(core, E1000_ICR_RXQ0, + E1000_IVAR_RXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_RXQ1) { + e1000e_msix_notify_one(core, E1000_ICR_RXQ1, + E1000_IVAR_RXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ0) { + e1000e_msix_notify_one(core, E1000_ICR_TXQ0, + E1000_IVAR_TXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ1) { + e1000e_msix_notify_one(core, E1000_ICR_TXQ1, + E1000_IVAR_TXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_OTHER) { + e1000e_msix_notify_one(core, E1000_ICR_OTHER, + E1000_IVAR_OTHER(core->mac[IVAR])); + } +} + +static void +e1000e_msix_clear_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) +{ + if (E1000_IVAR_ENTRY_VALID(int_cfg)) { + uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg); + if (vec < E1000E_MSIX_VEC_NUM) { + trace_e1000e_irq_msix_pending_clearing(cause, int_cfg, vec); + msix_clr_pending(core->owner, vec); + } else { + trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg); + } + } else { + trace_e1000e_wrn_msix_invalid(cause, int_cfg); + } +} + +static void +e1000e_msix_clear(E1000ECore *core, uint32_t causes) +{ + if (causes & E1000_ICR_RXQ0) { + e1000e_msix_clear_one(core, E1000_ICR_RXQ0, + E1000_IVAR_RXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_RXQ1) { + e1000e_msix_clear_one(core, E1000_ICR_RXQ1, + E1000_IVAR_RXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ0) { + e1000e_msix_clear_one(core, E1000_ICR_TXQ0, + E1000_IVAR_TXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ1) { + e1000e_msix_clear_one(core, E1000_ICR_TXQ1, + E1000_IVAR_TXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_OTHER) { + e1000e_msix_clear_one(core, E1000_ICR_OTHER, + E1000_IVAR_OTHER(core->mac[IVAR])); + } +} + +static inline void +e1000e_fix_icr_asserted(E1000ECore *core) +{ + core->mac[ICR] &= ~E1000_ICR_ASSERTED; + if (core->mac[ICR]) { + core->mac[ICR] |= E1000_ICR_ASSERTED; + } + + trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]); +} + +static void +e1000e_send_msi(E1000ECore *core, bool msix) +{ + uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED; + + if (msix) { + e1000e_msix_notify(core, causes); + } else { + if (!e1000e_itr_should_postpone(core)) { + trace_e1000e_irq_msi_notify(causes); + msi_notify(core->owner, 0); + } + } +} + +static void +e1000e_update_interrupt_state(E1000ECore *core) +{ + bool interrupts_pending; + bool is_msix = msix_enabled(core->owner); + + /* Set ICR[OTHER] for MSI-X */ + if (is_msix) { + if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) { + core->mac[ICR] |= E1000_ICR_OTHER; + trace_e1000e_irq_add_msi_other(core->mac[ICR]); + } + } + + e1000e_fix_icr_asserted(core); + + /* + * Make sure ICR and ICS registers have the same value. + * The spec says that the ICS register is write-only. However in practice, + * on real hardware ICS is readable, and for reads it has the same value as + * ICR (except that ICS does not have the clear on read behaviour of ICR). + * + * The VxWorks PRO/1000 driver uses this behaviour. + */ + core->mac[ICS] = core->mac[ICR]; + + interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false; + + trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], + core->mac[ICR], core->mac[IMS]); + + if (is_msix || msi_enabled(core->owner)) { + if (interrupts_pending) { + e1000e_send_msi(core, is_msix); + } + } else { + if (interrupts_pending) { + if (!e1000e_itr_should_postpone(core)) { + e1000e_raise_legacy_irq(core); + } + } else { + e1000e_lower_legacy_irq(core); + } + } +} + +static inline void +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) +{ + trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); + + val |= e1000e_intmgr_collect_delayed_causes(core); + core->mac[ICR] |= val; + + trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]); + + e1000e_update_interrupt_state(core); +} + +static inline void +e1000e_autoneg_timer(void *opaque) +{ + E1000ECore *core = opaque; + if (!qemu_get_queue(core->owner_nic)->link_down) { + e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]); + e1000e_update_flowctl_status(core); + /* signal link status change to the guest */ + e1000e_set_interrupt_cause(core, E1000_ICR_LSC); + } +} + +static inline uint16_t +e1000e_get_reg_index_with_offset(const uint16_t *mac_reg_access, hwaddr addr) +{ + uint16_t index = (addr & 0x1ffff) >> 2; + return index + (mac_reg_access[index] & 0xfffe); +} + +static const char e1000e_phy_regcap[E1000E_PHY_PAGES][0x20] = { + [0] = { + [PHY_CTRL] = PHY_ANYPAGE | PHY_RW, + [PHY_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_ID1] = PHY_ANYPAGE | PHY_R, + [PHY_ID2] = PHY_ANYPAGE | PHY_R, + [PHY_AUTONEG_ADV] = PHY_ANYPAGE | PHY_RW, + [PHY_LP_ABILITY] = PHY_ANYPAGE | PHY_R, + [PHY_AUTONEG_EXP] = PHY_ANYPAGE | PHY_R, + [PHY_NEXT_PAGE_TX] = PHY_ANYPAGE | PHY_RW, + [PHY_LP_NEXT_PAGE] = PHY_ANYPAGE | PHY_R, + [PHY_1000T_CTRL] = PHY_ANYPAGE | PHY_RW, + [PHY_1000T_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_EXT_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_PAGE] = PHY_ANYPAGE | PHY_RW, + + [PHY_COPPER_CTRL1] = PHY_RW, + [PHY_COPPER_STAT1] = PHY_R, + [PHY_COPPER_CTRL3] = PHY_RW, + [PHY_RX_ERR_CNTR] = PHY_R, + [PHY_OEM_BITS] = PHY_RW, + [PHY_BIAS_1] = PHY_RW, + [PHY_BIAS_2] = PHY_RW, + [PHY_COPPER_INT_ENABLE] = PHY_RW, + [PHY_COPPER_STAT2] = PHY_R, + [PHY_COPPER_CTRL2] = PHY_RW + }, + [2] = { + [PHY_MAC_CTRL1] = PHY_RW, + [PHY_MAC_INT_ENABLE] = PHY_RW, + [PHY_MAC_STAT] = PHY_R, + [PHY_MAC_CTRL2] = PHY_RW + }, + [3] = { + [PHY_LED_03_FUNC_CTRL1] = PHY_RW, + [PHY_LED_03_POL_CTRL] = PHY_RW, + [PHY_LED_TIMER_CTRL] = PHY_RW, + [PHY_LED_45_CTRL] = PHY_RW + }, + [5] = { + [PHY_1000T_SKEW] = PHY_R, + [PHY_1000T_SWAP] = PHY_R + }, + [6] = { + [PHY_CRC_COUNTERS] = PHY_R + } +}; + +static bool +e1000e_phy_reg_check_cap(E1000ECore *core, uint32_t addr, + char cap, uint8_t *page) +{ + *page = + (e1000e_phy_regcap[0][addr] & PHY_ANYPAGE) ? 0 + : core->phy[0][PHY_PAGE]; + + if (*page >= E1000E_PHY_PAGES) { + return false; + } + + return e1000e_phy_regcap[*page][addr] & cap; +} + +static void +e1000e_phy_reg_write(E1000ECore *core, uint8_t page, + uint32_t addr, uint16_t data) +{ + assert(page < E1000E_PHY_PAGES); + assert(addr < E1000E_PHY_PAGE_SIZE); + + if (e1000e_phyreg_writeops[page][addr]) { + e1000e_phyreg_writeops[page][addr](core, addr, data); + } else { + core->phy[page][addr] = data; + } +} + +static void +e1000e_set_mdic(E1000ECore *core, int index, uint32_t val) +{ + uint32_t data = val & E1000_MDIC_DATA_MASK; + uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + uint8_t page; + + if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) { /* phy # */ + val = core->mac[MDIC] | E1000_MDIC_ERROR; + } else if (val & E1000_MDIC_OP_READ) { + if (!e1000e_phy_reg_check_cap(core, addr, PHY_R, &page)) { + trace_e1000e_core_mdic_read_unhandled(page, addr); + val |= E1000_MDIC_ERROR; + } else { + val = (val ^ data) | core->phy[page][addr]; + trace_e1000e_core_mdic_read(page, addr, val); + } + } else if (val & E1000_MDIC_OP_WRITE) { + if (!e1000e_phy_reg_check_cap(core, addr, PHY_W, &page)) { + trace_e1000e_core_mdic_write_unhandled(page, addr); + val |= E1000_MDIC_ERROR; + } else { + trace_e1000e_core_mdic_write(page, addr, data); + e1000e_phy_reg_write(core, page, addr, data); + } + } + core->mac[MDIC] = val | E1000_MDIC_READY; + + if (val & E1000_MDIC_INT_EN) { + e1000e_set_interrupt_cause(core, E1000_ICR_MDAC); + } +} + +static void +e1000e_set_rdt(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xffff; + trace_e1000e_rx_set_rdt(e1000e_mq_queue_idx(RDT0, index), val); + e1000e_start_recv(core); +} + +static void +e1000e_set_status(E1000ECore *core, int index, uint32_t val) +{ + if ((val & E1000_STATUS_PHYRA) == 0) { + core->mac[index] &= ~E1000_STATUS_PHYRA; + } +} + +static void +e1000e_set_ctrlext(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), + !!(val & E1000_CTRL_EXT_SPD_BYPS)); + + /* Zero self-clearing bits */ + val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST); + core->mac[CTRL_EXT] = val; +} + +static void +e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val) +{ + int i; + + core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK; + + if (msix_enabled(core->owner)) { + return; + } + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + if (core->mac[PBACLR] & BIT(i)) { + msix_clr_pending(core->owner, i); + } + } +} + +static void +e1000e_set_fcrth(E1000ECore *core, int index, uint32_t val) +{ + core->mac[FCRTH] = val & 0xFFF8; +} + +static void +e1000e_set_fcrtl(E1000ECore *core, int index, uint32_t val) +{ + core->mac[FCRTL] = val & 0x8000FFF8; +} + +static inline void +e1000e_set_16bit(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xffff; +} + +static void +e1000e_set_12bit(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xfff; +} + +static void +e1000e_set_vet(E1000ECore *core, int index, uint32_t val) +{ + core->mac[VET] = val & 0xffff; + core->vet = le16_to_cpu(core->mac[VET]); + trace_e1000e_vlan_vet(core->vet); +} + +static void +e1000e_set_dlen(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & E1000_XDLEN_MASK; +} + +static void +e1000e_set_dbal(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & E1000_XDBAL_MASK; +} + +static void +e1000e_set_tctl(E1000ECore *core, int index, uint32_t val) +{ + E1000E_TxRing txr; + core->mac[index] = val; + + if (core->mac[TARC0] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, 0); + e1000e_start_xmit(core, &txr); + } + + if (core->mac[TARC1] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, 1); + e1000e_start_xmit(core, &txr); + } +} + +static void +e1000e_set_tdt(E1000ECore *core, int index, uint32_t val) +{ + E1000E_TxRing txr; + int qidx = e1000e_mq_queue_idx(TDT, index); + uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1; + + core->mac[index] = val & 0xffff; + + if (core->mac[tarc_reg] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, qidx); + e1000e_start_xmit(core, &txr); + } +} + +static void +e1000e_set_ics(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_irq_write_ics(val); + e1000e_set_interrupt_cause(core, val); +} + +static void +e1000e_set_icr(E1000ECore *core, int index, uint32_t val) +{ + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { + trace_e1000e_irq_icr_process_iame(); + e1000e_clear_ims_bits(core, core->mac[IAM]); + } + + trace_e1000e_irq_icr_write(val, core->mac[ICR], core->mac[ICR] & ~val); + core->mac[ICR] &= ~val; + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_imc(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_irq_ims_clear_set_imc(val); + e1000e_clear_ims_bits(core, val); + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_ims(E1000ECore *core, int index, uint32_t val) +{ + static const uint32_t ims_ext_mask = + E1000_IMS_RXQ0 | E1000_IMS_RXQ1 | + E1000_IMS_TXQ0 | E1000_IMS_TXQ1 | + E1000_IMS_OTHER; + + static const uint32_t ims_valid_mask = + E1000_IMS_TXDW | E1000_IMS_TXQE | E1000_IMS_LSC | + E1000_IMS_RXDMT0 | E1000_IMS_RXO | E1000_IMS_RXT0 | + E1000_IMS_MDAC | E1000_IMS_TXD_LOW | E1000_IMS_SRPD | + E1000_IMS_ACK | E1000_IMS_MNG | E1000_IMS_RXQ0 | + E1000_IMS_RXQ1 | E1000_IMS_TXQ0 | E1000_IMS_TXQ1 | + E1000_IMS_OTHER; + + uint32_t valid_val = val & ims_valid_mask; + + trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val); + core->mac[IMS] |= valid_val; + + if ((valid_val & ims_ext_mask) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) && + msix_enabled(core->owner)) { + e1000e_msix_clear(core, valid_val); + } + + if ((valid_val == ims_valid_mask) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA)) { + trace_e1000e_irq_fire_all_timers(val); + e1000e_intrmgr_fire_all_timers(core); + } + + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_rdtr(E1000ECore *core, int index, uint32_t val) +{ + e1000e_set_16bit(core, index, val); + + if ((val & E1000_RDTR_FPD) && (core->rdtr.running)) { + trace_e1000e_irq_rdtr_fpd_running(); + e1000e_intrmgr_fire_delayed_interrupts(core); + } else { + trace_e1000e_irq_rdtr_fpd_not_running(); + } +} + +static void +e1000e_set_tidv(E1000ECore *core, int index, uint32_t val) +{ + e1000e_set_16bit(core, index, val); + + if ((val & E1000_TIDV_FPD) && (core->tidv.running)) { + trace_e1000e_irq_tidv_fpd_running(); + e1000e_intrmgr_fire_delayed_interrupts(core); + } else { + trace_e1000e_irq_tidv_fpd_not_running(); + } +} + +static uint32_t +e1000e_mac_readreg(E1000ECore *core, int index) +{ + return core->mac[index]; +} + +static uint32_t +e1000e_mac_ics_read(E1000ECore *core, int index) +{ + trace_e1000e_irq_read_ics(core->mac[ICS]); + return core->mac[ICS]; +} + +static uint32_t +e1000e_mac_ims_read(E1000ECore *core, int index) +{ + trace_e1000e_irq_read_ims(core->mac[IMS]); + return core->mac[IMS]; +} + +#define E1000E_LOW_BITS_READ_FUNC(num) \ + static uint32_t \ + e1000e_mac_low##num##_read(E1000ECore *core, int index) \ + { \ + return core->mac[index] & (BIT(num) - 1); \ + } \ + +#define E1000E_LOW_BITS_READ(num) \ + e1000e_mac_low##num##_read + +E1000E_LOW_BITS_READ_FUNC(4); +E1000E_LOW_BITS_READ_FUNC(6); +E1000E_LOW_BITS_READ_FUNC(11); +E1000E_LOW_BITS_READ_FUNC(13); +E1000E_LOW_BITS_READ_FUNC(16); + +static uint32_t +e1000e_mac_swsm_read(E1000ECore *core, int index) +{ + uint32_t val = core->mac[SWSM]; + core->mac[SWSM] = val | 1; + return val; +} + +static uint32_t +e1000e_mac_itr_read(E1000ECore *core, int index) +{ + return core->itr_guest_value; +} + +static uint32_t +e1000e_mac_eitr_read(E1000ECore *core, int index) +{ + return core->eitr_guest_value[index - EITR]; +} + +static uint32_t +e1000e_mac_icr_read(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[ICR]; + trace_e1000e_irq_icr_read_entry(ret); + + if (core->mac[IMS] == 0) { + trace_e1000e_irq_icr_clear_zero_ims(); + core->mac[ICR] = 0; + } + + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { + trace_e1000e_irq_icr_clear_iame(); + core->mac[ICR] = 0; + trace_e1000e_irq_icr_process_iame(); + e1000e_clear_ims_bits(core, core->mac[IAM]); + } + + trace_e1000e_irq_icr_read_exit(core->mac[ICR]); + e1000e_update_interrupt_state(core); + return ret; +} + +static uint32_t +e1000e_mac_read_clr4(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[index]; + + core->mac[index] = 0; + return ret; +} + +static uint32_t +e1000e_mac_read_clr8(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[index]; + + core->mac[index] = 0; + core->mac[index - 1] = 0; + return ret; +} + +static uint32_t +e1000e_get_ctrl(E1000ECore *core, int index) +{ + uint32_t val = core->mac[CTRL]; + + trace_e1000e_link_read_params( + !!(val & E1000_CTRL_ASDE), + (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT, + !!(val & E1000_CTRL_FRCSPD), + !!(val & E1000_CTRL_FRCDPX), + !!(val & E1000_CTRL_RFCE), + !!(val & E1000_CTRL_TFCE)); + + return val; +} + +static uint32_t +e1000e_get_status(E1000ECore *core, int index) +{ + uint32_t res = core->mac[STATUS]; + + if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) { + res |= E1000_STATUS_GIO_MASTER_ENABLE; + } + + if (core->mac[CTRL] & E1000_CTRL_FRCDPX) { + res |= (core->mac[CTRL] & E1000_CTRL_FD) ? E1000_STATUS_FD : 0; + } else { + res |= E1000_STATUS_FD; + } + + if ((core->mac[CTRL] & E1000_CTRL_FRCSPD) || + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_SPD_BYPS)) { + switch (core->mac[CTRL] & E1000_CTRL_SPD_SEL) { + case E1000_CTRL_SPD_10: + res |= E1000_STATUS_SPEED_10; + break; + case E1000_CTRL_SPD_100: + res |= E1000_STATUS_SPEED_100; + break; + case E1000_CTRL_SPD_1000: + default: + res |= E1000_STATUS_SPEED_1000; + break; + } + } else { + res |= E1000_STATUS_SPEED_1000; + } + + trace_e1000e_link_status( + !!(res & E1000_STATUS_LU), + !!(res & E1000_STATUS_FD), + (res & E1000_STATUS_SPEED_MASK) >> E1000_STATUS_SPEED_SHIFT, + (res & E1000_STATUS_ASDV) >> E1000_STATUS_ASDV_SHIFT); + + return res; +} + +static uint32_t +e1000e_get_tarc(E1000ECore *core, int index) +{ + return core->mac[index] & ((BIT(11) - 1) | + BIT(27) | + BIT(28) | + BIT(29) | + BIT(30)); +} + +static void +e1000e_mac_writereg(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val; +} + +static void +e1000e_mac_setmacaddr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t macaddr[2]; + + core->mac[index] = val; + + macaddr[0] = cpu_to_le32(core->mac[RA]); + macaddr[1] = cpu_to_le32(core->mac[RA + 1]); + qemu_format_nic_info_str(qemu_get_queue(core->owner_nic), + (uint8_t *) macaddr); + + trace_e1000e_mac_set_sw(MAC_ARG(macaddr)); +} + +static void +e1000e_set_eecd(E1000ECore *core, int index, uint32_t val) +{ + static const uint32_t ro_bits = E1000_EECD_PRES | + E1000_EECD_AUTO_RD | + E1000_EECD_SIZE_EX_MASK; + + core->mac[EECD] = (core->mac[EECD] & ro_bits) | (val & ~ro_bits); +} + +static void +e1000e_set_eerd(E1000ECore *core, int index, uint32_t val) +{ + uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK; + uint32_t flags = 0; + uint32_t data = 0; + + if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) { + data = core->eeprom[addr]; + flags = E1000_EERW_DONE; + } + + core->mac[EERD] = flags | + (addr << E1000_EERW_ADDR_SHIFT) | + (data << E1000_EERW_DATA_SHIFT); +} + +static void +e1000e_set_eewr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK; + uint32_t data = (val >> E1000_EERW_DATA_SHIFT) & E1000_EERW_DATA_MASK; + uint32_t flags = 0; + + if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) { + core->eeprom[addr] = data; + flags = E1000_EERW_DONE; + } + + core->mac[EERD] = flags | + (addr << E1000_EERW_ADDR_SHIFT) | + (data << E1000_EERW_DATA_SHIFT); +} + +static void +e1000e_set_rxdctl(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RXDCTL] = core->mac[RXDCTL1] = val; +} + +static void +e1000e_set_itr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t interval = val & 0xffff; + + trace_e1000e_irq_itr_set(val); + + core->itr_guest_value = interval; + core->mac[index] = MAX(interval, E1000E_MIN_XITR); +} + +static void +e1000e_set_eitr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t interval = val & 0xffff; + uint32_t eitr_num = index - EITR; + + trace_e1000e_irq_eitr_set(eitr_num, val); + + core->eitr_guest_value[eitr_num] = interval; + core->mac[index] = MAX(interval, E1000E_MIN_XITR); +} + +static void +e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val) +{ + if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) { + hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero"); + } + + if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) { + hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero"); + } + + core->mac[PSRCTL] = val; +} + +static void +e1000e_update_rx_offloads(E1000ECore *core) +{ + int cso_state = e1000e_rx_l4_cso_enabled(core); + + trace_e1000e_rx_set_cso(cso_state); + + if (core->has_vnet) { + qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, + cso_state, 0, 0, 0, 0); + } +} + +static void +e1000e_set_rxcsum(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RXCSUM] = val; + e1000e_update_rx_offloads(core); +} + +static void +e1000e_set_gcr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t ro_bits = core->mac[GCR] & E1000_GCR_RO_BITS; + core->mac[GCR] = (val & ~E1000_GCR_RO_BITS) | ro_bits; +} + +#define e1000e_getreg(x) [x] = e1000e_mac_readreg +static uint32_t (*e1000e_macreg_readops[])(E1000ECore *, int) = { + e1000e_getreg(PBA), + e1000e_getreg(WUFC), + e1000e_getreg(MANC), + e1000e_getreg(TOTL), + e1000e_getreg(RDT0), + e1000e_getreg(RDBAH0), + e1000e_getreg(TDBAL1), + e1000e_getreg(RDLEN0), + e1000e_getreg(RDH1), + e1000e_getreg(LATECOL), + e1000e_getreg(SEC), + e1000e_getreg(XONTXC), + e1000e_getreg(WUS), + e1000e_getreg(GORCL), + e1000e_getreg(MGTPRC), + e1000e_getreg(EERD), + e1000e_getreg(EIAC), + e1000e_getreg(PSRCTL), + e1000e_getreg(MANC2H), + e1000e_getreg(RXCSUM), + e1000e_getreg(GSCL_3), + e1000e_getreg(GSCN_2), + e1000e_getreg(RSRPD), + e1000e_getreg(RDBAL1), + e1000e_getreg(FCAH), + e1000e_getreg(FCRTH), + e1000e_getreg(FLOP), + e1000e_getreg(FLASHT), + e1000e_getreg(RXSTMPH), + e1000e_getreg(TXSTMPL), + e1000e_getreg(TIMADJL), + e1000e_getreg(TXDCTL), + e1000e_getreg(RDH0), + e1000e_getreg(TDT1), + e1000e_getreg(TNCRS), + e1000e_getreg(RJC), + e1000e_getreg(IAM), + e1000e_getreg(GSCL_2), + e1000e_getreg(RDBAH1), + e1000e_getreg(FLSWDATA), + e1000e_getreg(RXSATRH), + e1000e_getreg(TIPG), + e1000e_getreg(FLMNGCTL), + e1000e_getreg(FLMNGCNT), + e1000e_getreg(TSYNCTXCTL), + e1000e_getreg(EXTCNF_SIZE), + e1000e_getreg(EXTCNF_CTRL), + e1000e_getreg(EEMNGDATA), + e1000e_getreg(CTRL_EXT), + e1000e_getreg(SYSTIMH), + e1000e_getreg(EEMNGCTL), + e1000e_getreg(FLMNGDATA), + e1000e_getreg(TSYNCRXCTL), + e1000e_getreg(TDH), + e1000e_getreg(LEDCTL), + e1000e_getreg(STATUS), + e1000e_getreg(TCTL), + e1000e_getreg(TDBAL), + e1000e_getreg(TDLEN), + e1000e_getreg(TDH1), + e1000e_getreg(RADV), + e1000e_getreg(ECOL), + e1000e_getreg(DC), + e1000e_getreg(RLEC), + e1000e_getreg(XOFFTXC), + e1000e_getreg(RFC), + e1000e_getreg(RNBC), + e1000e_getreg(MGTPTC), + e1000e_getreg(TIMINCA), + e1000e_getreg(RXCFGL), + e1000e_getreg(MFUTP01), + e1000e_getreg(FACTPS), + e1000e_getreg(GSCL_1), + e1000e_getreg(GSCN_0), + e1000e_getreg(GCR2), + e1000e_getreg(RDT1), + e1000e_getreg(PBACLR), + e1000e_getreg(FCTTV), + e1000e_getreg(EEWR), + e1000e_getreg(FLSWCTL), + e1000e_getreg(RXDCTL1), + e1000e_getreg(RXSATRL), + e1000e_getreg(SYSTIML), + e1000e_getreg(RXUDP), + e1000e_getreg(TORL), + e1000e_getreg(TDLEN1), + e1000e_getreg(MCC), + e1000e_getreg(WUC), + e1000e_getreg(EECD), + e1000e_getreg(MFUTP23), + e1000e_getreg(RAID), + e1000e_getreg(FCRTV), + e1000e_getreg(TXDCTL1), + e1000e_getreg(RCTL), + e1000e_getreg(TDT), + e1000e_getreg(MDIC), + e1000e_getreg(FCRUC), + e1000e_getreg(VET), + e1000e_getreg(RDBAL0), + e1000e_getreg(TDBAH1), + e1000e_getreg(RDTR), + e1000e_getreg(SCC), + e1000e_getreg(COLC), + e1000e_getreg(CEXTERR), + e1000e_getreg(XOFFRXC), + e1000e_getreg(IPAV), + e1000e_getreg(GOTCL), + e1000e_getreg(MGTPDC), + e1000e_getreg(GCR), + e1000e_getreg(IVAR), + e1000e_getreg(POEMB), + e1000e_getreg(MFVAL), + e1000e_getreg(FUNCTAG), + e1000e_getreg(GSCL_4), + e1000e_getreg(GSCN_3), + e1000e_getreg(MRQC), + e1000e_getreg(RDLEN1), + e1000e_getreg(FCT), + e1000e_getreg(FLA), + e1000e_getreg(FLOL), + e1000e_getreg(RXDCTL), + e1000e_getreg(RXSTMPL), + e1000e_getreg(TXSTMPH), + e1000e_getreg(TIMADJH), + e1000e_getreg(FCRTL), + e1000e_getreg(TDBAH), + e1000e_getreg(TADV), + e1000e_getreg(XONRXC), + e1000e_getreg(TSCTFC), + e1000e_getreg(RFCTL), + e1000e_getreg(GSCN_1), + e1000e_getreg(FCAL), + e1000e_getreg(FLSWCNT), + + [TOTH] = e1000e_mac_read_clr8, + [GOTCH] = e1000e_mac_read_clr8, + [PRC64] = e1000e_mac_read_clr4, + [PRC255] = e1000e_mac_read_clr4, + [PRC1023] = e1000e_mac_read_clr4, + [PTC64] = e1000e_mac_read_clr4, + [PTC255] = e1000e_mac_read_clr4, + [PTC1023] = e1000e_mac_read_clr4, + [GPRC] = e1000e_mac_read_clr4, + [TPT] = e1000e_mac_read_clr4, + [RUC] = e1000e_mac_read_clr4, + [BPRC] = e1000e_mac_read_clr4, + [MPTC] = e1000e_mac_read_clr4, + [IAC] = e1000e_mac_read_clr4, + [ICR] = e1000e_mac_icr_read, + [RDFH] = E1000E_LOW_BITS_READ(13), + [RDFHS] = E1000E_LOW_BITS_READ(13), + [RDFPC] = E1000E_LOW_BITS_READ(13), + [TDFH] = E1000E_LOW_BITS_READ(13), + [TDFHS] = E1000E_LOW_BITS_READ(13), + [STATUS] = e1000e_get_status, + [TARC0] = e1000e_get_tarc, + [PBS] = E1000E_LOW_BITS_READ(6), + [ICS] = e1000e_mac_ics_read, + [AIT] = E1000E_LOW_BITS_READ(16), + [TORH] = e1000e_mac_read_clr8, + [GORCH] = e1000e_mac_read_clr8, + [PRC127] = e1000e_mac_read_clr4, + [PRC511] = e1000e_mac_read_clr4, + [PRC1522] = e1000e_mac_read_clr4, + [PTC127] = e1000e_mac_read_clr4, + [PTC511] = e1000e_mac_read_clr4, + [PTC1522] = e1000e_mac_read_clr4, + [GPTC] = e1000e_mac_read_clr4, + [TPR] = e1000e_mac_read_clr4, + [ROC] = e1000e_mac_read_clr4, + [MPRC] = e1000e_mac_read_clr4, + [BPTC] = e1000e_mac_read_clr4, + [TSCTC] = e1000e_mac_read_clr4, + [ITR] = e1000e_mac_itr_read, + [RDFT] = E1000E_LOW_BITS_READ(13), + [RDFTS] = E1000E_LOW_BITS_READ(13), + [TDFPC] = E1000E_LOW_BITS_READ(13), + [TDFT] = E1000E_LOW_BITS_READ(13), + [TDFTS] = E1000E_LOW_BITS_READ(13), + [CTRL] = e1000e_get_ctrl, + [TARC1] = e1000e_get_tarc, + [SWSM] = e1000e_mac_swsm_read, + [IMS] = e1000e_mac_ims_read, + + [CRCERRS ... MPC] = e1000e_mac_readreg, + [IP6AT ... IP6AT + 3] = e1000e_mac_readreg, + [IP4AT ... IP4AT + 6] = e1000e_mac_readreg, + [RA ... RA + 31] = e1000e_mac_readreg, + [WUPM ... WUPM + 31] = e1000e_mac_readreg, + [MTA ... MTA + 127] = e1000e_mac_readreg, + [VFTA ... VFTA + 127] = e1000e_mac_readreg, + [FFMT ... FFMT + 254] = E1000E_LOW_BITS_READ(4), + [FFVT ... FFVT + 254] = e1000e_mac_readreg, + [MDEF ... MDEF + 7] = e1000e_mac_readreg, + [FFLT ... FFLT + 10] = E1000E_LOW_BITS_READ(11), + [FTFT ... FTFT + 254] = e1000e_mac_readreg, + [PBM ... PBM + 10239] = e1000e_mac_readreg, + [RETA ... RETA + 31] = e1000e_mac_readreg, + [RSSRK ... RSSRK + 31] = e1000e_mac_readreg, + [MAVTV0 ... MAVTV3] = e1000e_mac_readreg, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_mac_eitr_read +}; +enum { E1000E_NREADOPS = ARRAY_SIZE(e1000e_macreg_readops) }; + +#define e1000e_putreg(x) [x] = e1000e_mac_writereg +static void (*e1000e_macreg_writeops[])(E1000ECore *, int, uint32_t) = { + e1000e_putreg(PBA), + e1000e_putreg(SWSM), + e1000e_putreg(WUFC), + e1000e_putreg(RDBAH1), + e1000e_putreg(TDBAH), + e1000e_putreg(TXDCTL), + e1000e_putreg(RDBAH0), + e1000e_putreg(LEDCTL), + e1000e_putreg(FCAL), + e1000e_putreg(FCRUC), + e1000e_putreg(AIT), + e1000e_putreg(TDFH), + e1000e_putreg(TDFT), + e1000e_putreg(TDFHS), + e1000e_putreg(TDFTS), + e1000e_putreg(TDFPC), + e1000e_putreg(WUC), + e1000e_putreg(WUS), + e1000e_putreg(RDFH), + e1000e_putreg(RDFT), + e1000e_putreg(RDFHS), + e1000e_putreg(RDFTS), + e1000e_putreg(RDFPC), + e1000e_putreg(IPAV), + e1000e_putreg(TDBAH1), + e1000e_putreg(TIMINCA), + e1000e_putreg(IAM), + e1000e_putreg(EIAC), + e1000e_putreg(IVAR), + e1000e_putreg(TARC0), + e1000e_putreg(TARC1), + e1000e_putreg(FLSWDATA), + e1000e_putreg(POEMB), + e1000e_putreg(PBS), + e1000e_putreg(MFUTP01), + e1000e_putreg(MFUTP23), + e1000e_putreg(MANC), + e1000e_putreg(MANC2H), + e1000e_putreg(MFVAL), + e1000e_putreg(EXTCNF_CTRL), + e1000e_putreg(FACTPS), + e1000e_putreg(FUNCTAG), + e1000e_putreg(GSCL_1), + e1000e_putreg(GSCL_2), + e1000e_putreg(GSCL_3), + e1000e_putreg(GSCL_4), + e1000e_putreg(GSCN_0), + e1000e_putreg(GSCN_1), + e1000e_putreg(GSCN_2), + e1000e_putreg(GSCN_3), + e1000e_putreg(GCR2), + e1000e_putreg(MRQC), + e1000e_putreg(FLOP), + e1000e_putreg(FLOL), + e1000e_putreg(FLSWCTL), + e1000e_putreg(FLSWCNT), + e1000e_putreg(FLA), + e1000e_putreg(RXDCTL1), + e1000e_putreg(TXDCTL1), + e1000e_putreg(TIPG), + e1000e_putreg(RXSTMPH), + e1000e_putreg(RXSTMPL), + e1000e_putreg(RXSATRL), + e1000e_putreg(RXSATRH), + e1000e_putreg(TXSTMPL), + e1000e_putreg(TXSTMPH), + e1000e_putreg(SYSTIML), + e1000e_putreg(SYSTIMH), + e1000e_putreg(TIMADJL), + e1000e_putreg(TIMADJH), + e1000e_putreg(RXUDP), + e1000e_putreg(RXCFGL), + e1000e_putreg(TSYNCRXCTL), + e1000e_putreg(TSYNCTXCTL), + e1000e_putreg(FLSWDATA), + e1000e_putreg(EXTCNF_SIZE), + e1000e_putreg(EEMNGCTL), + e1000e_putreg(RA), + + [TDH1] = e1000e_set_16bit, + [TDT1] = e1000e_set_tdt, + [TCTL] = e1000e_set_tctl, + [TDT] = e1000e_set_tdt, + [MDIC] = e1000e_set_mdic, + [ICS] = e1000e_set_ics, + [TDH] = e1000e_set_16bit, + [RDH0] = e1000e_set_16bit, + [RDT0] = e1000e_set_rdt, + [IMC] = e1000e_set_imc, + [IMS] = e1000e_set_ims, + [ICR] = e1000e_set_icr, + [EECD] = e1000e_set_eecd, + [RCTL] = e1000e_set_rx_control, + [CTRL] = e1000e_set_ctrl, + [RDTR] = e1000e_set_rdtr, + [RADV] = e1000e_set_16bit, + [TADV] = e1000e_set_16bit, + [ITR] = e1000e_set_itr, + [EERD] = e1000e_set_eerd, + [GCR] = e1000e_set_gcr, + [PSRCTL] = e1000e_set_psrctl, + [RXCSUM] = e1000e_set_rxcsum, + [RAID] = e1000e_set_16bit, + [RSRPD] = e1000e_set_12bit, + [TIDV] = e1000e_set_tidv, + [TDLEN1] = e1000e_set_dlen, + [TDLEN] = e1000e_set_dlen, + [RDLEN0] = e1000e_set_dlen, + [RDLEN1] = e1000e_set_dlen, + [TDBAL] = e1000e_set_dbal, + [TDBAL1] = e1000e_set_dbal, + [RDBAL0] = e1000e_set_dbal, + [RDBAL1] = e1000e_set_dbal, + [RDH1] = e1000e_set_16bit, + [RDT1] = e1000e_set_rdt, + [STATUS] = e1000e_set_status, + [PBACLR] = e1000e_set_pbaclr, + [CTRL_EXT] = e1000e_set_ctrlext, + [FCAH] = e1000e_set_16bit, + [FCT] = e1000e_set_16bit, + [FCTTV] = e1000e_set_16bit, + [FCRTV] = e1000e_set_16bit, + [FCRTH] = e1000e_set_fcrth, + [FCRTL] = e1000e_set_fcrtl, + [VET] = e1000e_set_vet, + [RXDCTL] = e1000e_set_rxdctl, + [FLASHT] = e1000e_set_16bit, + [EEWR] = e1000e_set_eewr, + [CTRL_DUP] = e1000e_set_ctrl, + [RFCTL] = e1000e_set_rfctl, + [RA + 1] = e1000e_mac_setmacaddr, + + [IP6AT ... IP6AT + 3] = e1000e_mac_writereg, + [IP4AT ... IP4AT + 6] = e1000e_mac_writereg, + [RA + 2 ... RA + 31] = e1000e_mac_writereg, + [WUPM ... WUPM + 31] = e1000e_mac_writereg, + [MTA ... MTA + 127] = e1000e_mac_writereg, + [VFTA ... VFTA + 127] = e1000e_mac_writereg, + [FFMT ... FFMT + 254] = e1000e_mac_writereg, + [FFVT ... FFVT + 254] = e1000e_mac_writereg, + [PBM ... PBM + 10239] = e1000e_mac_writereg, + [MDEF ... MDEF + 7] = e1000e_mac_writereg, + [FFLT ... FFLT + 10] = e1000e_mac_writereg, + [FTFT ... FTFT + 254] = e1000e_mac_writereg, + [RETA ... RETA + 31] = e1000e_mac_writereg, + [RSSRK ... RSSRK + 31] = e1000e_mac_writereg, + [MAVTV0 ... MAVTV3] = e1000e_mac_writereg, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_set_eitr +}; +enum { E1000E_NWRITEOPS = ARRAY_SIZE(e1000e_macreg_writeops) }; + +enum { MAC_ACCESS_PARTIAL = 1 }; + +/* The array below combines alias offsets of the index values for the + * MAC registers that have aliases, with the indication of not fully + * implemented registers (lowest bit). This combination is possible + * because all of the offsets are even. */ +static const uint16_t mac_reg_access[E1000E_MAC_SIZE] = { + /* Alias index offsets */ + [FCRTL_A] = 0x07fe, [FCRTH_A] = 0x0802, + [RDH0_A] = 0x09bc, [RDT0_A] = 0x09bc, [RDTR_A] = 0x09c6, + [RDFH_A] = 0xe904, [RDFT_A] = 0xe904, + [TDH_A] = 0x0cf8, [TDT_A] = 0x0cf8, [TIDV_A] = 0x0cf8, + [TDFH_A] = 0xed00, [TDFT_A] = 0xed00, + [RA_A ... RA_A + 31] = 0x14f0, + [VFTA_A ... VFTA_A + 127] = 0x1400, + [RDBAL0_A ... RDLEN0_A] = 0x09bc, + [TDBAL_A ... TDLEN_A] = 0x0cf8, + /* Access options */ + [RDFH] = MAC_ACCESS_PARTIAL, [RDFT] = MAC_ACCESS_PARTIAL, + [RDFHS] = MAC_ACCESS_PARTIAL, [RDFTS] = MAC_ACCESS_PARTIAL, + [RDFPC] = MAC_ACCESS_PARTIAL, + [TDFH] = MAC_ACCESS_PARTIAL, [TDFT] = MAC_ACCESS_PARTIAL, + [TDFHS] = MAC_ACCESS_PARTIAL, [TDFTS] = MAC_ACCESS_PARTIAL, + [TDFPC] = MAC_ACCESS_PARTIAL, [EECD] = MAC_ACCESS_PARTIAL, + [PBM] = MAC_ACCESS_PARTIAL, [FLA] = MAC_ACCESS_PARTIAL, + [FCAL] = MAC_ACCESS_PARTIAL, [FCAH] = MAC_ACCESS_PARTIAL, + [FCT] = MAC_ACCESS_PARTIAL, [FCTTV] = MAC_ACCESS_PARTIAL, + [FCRTV] = MAC_ACCESS_PARTIAL, [FCRTL] = MAC_ACCESS_PARTIAL, + [FCRTH] = MAC_ACCESS_PARTIAL, [TXDCTL] = MAC_ACCESS_PARTIAL, + [TXDCTL1] = MAC_ACCESS_PARTIAL, + [MAVTV0 ... MAVTV3] = MAC_ACCESS_PARTIAL +}; + +void +e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size) +{ + uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr); + + if (index < E1000E_NWRITEOPS && e1000e_macreg_writeops[index]) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + trace_e1000e_wrn_regs_write_trivial(index << 2); + } + trace_e1000e_core_write(index << 2, size, val); + e1000e_macreg_writeops[index](core, index, val); + } else if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) { + trace_e1000e_wrn_regs_write_ro(index << 2, size, val); + } else { + trace_e1000e_wrn_regs_write_unknown(index << 2, size, val); + } +} + +uint64_t +e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size) +{ + uint64_t val; + uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr); + + if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + trace_e1000e_wrn_regs_read_trivial(index << 2); + } + val = e1000e_macreg_readops[index](core, index); + trace_e1000e_core_read(index << 2, size, val); + return val; + } else { + trace_e1000e_wrn_regs_read_unknown(index << 2, size); + } + return 0; +} + +static inline void +e1000e_autoneg_pause(E1000ECore *core) +{ + timer_del(core->autoneg_timer); +} + +static void +e1000e_autoneg_resume(E1000ECore *core) +{ + if (e1000e_have_autoneg(core) && + !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + qemu_get_queue(core->owner_nic)->link_down = false; + timer_mod(core->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static void +e1000e_vm_state_change(void *opaque, int running, RunState state) +{ + E1000ECore *core = opaque; + + if (running) { + trace_e1000e_vm_state_running(); + e1000e_intrmgr_resume(core); + e1000e_autoneg_resume(core); + } else { + trace_e1000e_vm_state_stopped(); + e1000e_autoneg_pause(core); + e1000e_intrmgr_pause(core); + } +} + +void +e1000e_core_pci_realize(E1000ECore *core, + const uint16_t *eeprom_templ, + uint32_t eeprom_size, + const uint8_t *macaddr) +{ + int i; + + core->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + e1000e_autoneg_timer, core); + e1000e_intrmgr_pci_realize(core); + + core->vmstate = + qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, + E1000E_MAX_TX_FRAGS, core->has_vnet); + } + + net_rx_pkt_init(&core->rx_pkt, core->has_vnet); + + e1000x_core_prepare_eeprom(core->eeprom, + eeprom_templ, + eeprom_size, + PCI_DEVICE_GET_CLASS(core->owner)->device_id, + macaddr); + e1000e_update_rx_offloads(core); +} + +void +e1000e_core_pci_uninit(E1000ECore *core) +{ + int i; + + timer_del(core->autoneg_timer); + timer_free(core->autoneg_timer); + + e1000e_intrmgr_pci_unint(core); + + qemu_del_vm_change_state_handler(core->vmstate); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_uninit(core->tx[i].tx_pkt); + } + + net_rx_pkt_uninit(core->rx_pkt); +} + +static const uint16_t +e1000e_phy_reg_init[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE] = { + [0] = { + [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB | + MII_CR_FULL_DUPLEX | + MII_CR_AUTO_NEG_EN, + + [PHY_STATUS] = MII_SR_EXTENDED_CAPS | + MII_SR_LINK_STATUS | + MII_SR_AUTONEG_CAPS | + MII_SR_PREAMBLE_SUPPRESS | + MII_SR_EXTENDED_STATUS | + MII_SR_10T_HD_CAPS | + MII_SR_10T_FD_CAPS | + MII_SR_100X_HD_CAPS | + MII_SR_100X_FD_CAPS, + + [PHY_ID1] = 0x141, + [PHY_ID2] = E1000_PHY_ID2_82574x, + [PHY_AUTONEG_ADV] = 0xde1, + [PHY_LP_ABILITY] = 0x7e0, + [PHY_AUTONEG_EXP] = BIT(2), + [PHY_NEXT_PAGE_TX] = BIT(0) | BIT(13), + [PHY_1000T_CTRL] = BIT(8) | BIT(9) | BIT(10) | BIT(11), + [PHY_1000T_STATUS] = 0x3c00, + [PHY_EXT_STATUS] = BIT(12) | BIT(13), + + [PHY_COPPER_CTRL1] = BIT(5) | BIT(6) | BIT(8) | BIT(9) | + BIT(12) | BIT(13), + [PHY_COPPER_STAT1] = BIT(3) | BIT(10) | BIT(11) | BIT(13) | BIT(15) + }, + [2] = { + [PHY_MAC_CTRL1] = BIT(3) | BIT(7), + [PHY_MAC_CTRL2] = BIT(1) | BIT(2) | BIT(6) | BIT(12) + }, + [3] = { + [PHY_LED_TIMER_CTRL] = BIT(0) | BIT(2) | BIT(14) + } +}; + +static const uint32_t e1000e_mac_reg_init[] = { + [PBA] = 0x00140014, + [LEDCTL] = BIT(1) | BIT(8) | BIT(9) | BIT(15) | BIT(17) | BIT(18), + [EXTCNF_CTRL] = BIT(3), + [EEMNGCTL] = BIT(31), + [FLASHT] = 0x2, + [FLSWCTL] = BIT(30) | BIT(31), + [FLOL] = BIT(0), + [RXDCTL] = BIT(16), + [RXDCTL1] = BIT(16), + [TIPG] = 0x8 | (0x8 << 10) | (0x6 << 20), + [RXCFGL] = 0x88F7, + [RXUDP] = 0x319, + [CTRL] = E1000_CTRL_FD | E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | + E1000_CTRL_SPD_1000 | E1000_CTRL_SLU | + E1000_CTRL_ADVD3WUC, + [STATUS] = E1000_STATUS_ASDV_1000 | E1000_STATUS_LU, + [PSRCTL] = (2 << E1000_PSRCTL_BSIZE0_SHIFT) | + (4 << E1000_PSRCTL_BSIZE1_SHIFT) | + (4 << E1000_PSRCTL_BSIZE2_SHIFT), + [TARC0] = 0x3 | E1000_TARC_ENABLE, + [TARC1] = 0x3 | E1000_TARC_ENABLE, + [EECD] = E1000_EECD_AUTO_RD | E1000_EECD_PRES, + [EERD] = E1000_EERW_DONE, + [EEWR] = E1000_EERW_DONE, + [GCR] = E1000_L0S_ADJUST | + E1000_L1_ENTRY_LATENCY_MSB | + E1000_L1_ENTRY_LATENCY_LSB, + [TDFH] = 0x600, + [TDFT] = 0x600, + [TDFHS] = 0x600, + [TDFTS] = 0x600, + [POEMB] = 0x30D, + [PBS] = 0x028, + [MANC] = E1000_MANC_DIS_IP_CHK_ARP, + [FACTPS] = E1000_FACTPS_LAN0_ON | 0x20000000, + [SWSM] = 1, + [RXCSUM] = E1000_RXCSUM_IPOFLD | E1000_RXCSUM_TUOFLD, + [ITR] = E1000E_MIN_XITR, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = E1000E_MIN_XITR, +}; + +void +e1000e_core_reset(E1000ECore *core) +{ + int i; + + timer_del(core->autoneg_timer); + + e1000e_intrmgr_reset(core); + + memset(core->phy, 0, sizeof core->phy); + memmove(core->phy, e1000e_phy_reg_init, sizeof e1000e_phy_reg_init); + memset(core->mac, 0, sizeof core->mac); + memmove(core->mac, e1000e_mac_reg_init, sizeof e1000e_mac_reg_init); + + core->rxbuf_min_shift = 1 + E1000_RING_DESC_LEN_SHIFT; + + if (qemu_get_queue(core->owner_nic)->link_down) { + e1000e_link_down(core); + } + + e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); + core->tx[i].skip_cp = false; + } +} + +void e1000e_core_pre_save(E1000ECore *core) +{ + int i; + NetClientState *nc = qemu_get_queue(core->owner_nic); + + /* + * If link is down and auto-negotiation is supported and ongoing, + * complete auto-negotiation immediately. This allows us to look + * at MII_SR_AUTONEG_COMPLETE to infer link status on load. + */ + if (nc->link_down && e1000e_have_autoneg(core)) { + core->phy[0][PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + e1000e_update_flowctl_status(core); + } + + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { + if (net_tx_pkt_has_fragments(core->tx[i].tx_pkt)) { + core->tx[i].skip_cp = true; + } + } +} + +int +e1000e_core_post_load(E1000ECore *core) +{ + NetClientState *nc = qemu_get_queue(core->owner_nic); + + /* nc.link_down can't be migrated, so infer link_down according + * to link status bit in core.mac[STATUS]. + */ + nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0; + + return 0; +} diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h new file mode 100644 index 0000000000..5f413a9e08 --- /dev/null +++ b/hw/net/e1000e_core.h @@ -0,0 +1,146 @@ +/* +* Core code for QEMU e1000e emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman <dmitry@daynix.com> +* Leonid Bloch <leonid@daynix.com> +* Yan Vugenfirer <yan@daynix.com> +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#define E1000E_PHY_PAGE_SIZE (0x20) +#define E1000E_PHY_PAGES (0x07) +#define E1000E_MAC_SIZE (0x8000) +#define E1000E_EEPROM_SIZE (64) +#define E1000E_MSIX_VEC_NUM (5) +#define E1000E_NUM_QUEUES (2) + +typedef struct E1000Core E1000ECore; + +enum { PHY_R = BIT(0), + PHY_W = BIT(1), + PHY_RW = PHY_R | PHY_W, + PHY_ANYPAGE = BIT(2) }; + +typedef struct E1000IntrDelayTimer_st { + QEMUTimer *timer; + bool running; + uint32_t delay_reg; + uint32_t delay_resolution_ns; + E1000ECore *core; +} E1000IntrDelayTimer; + +struct E1000Core { + uint32_t mac[E1000E_MAC_SIZE]; + uint16_t phy[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]; + uint16_t eeprom[E1000E_EEPROM_SIZE]; + + uint32_t rxbuf_sizes[E1000_PSRCTL_BUFFS_PER_DESC]; + uint32_t rx_desc_buf_size; + uint32_t rxbuf_min_shift; + uint8_t rx_desc_len; + + QEMUTimer *autoneg_timer; + + struct e1000e_tx { + e1000x_txd_props props; + + bool skip_cp; + struct NetTxPkt *tx_pkt; + } tx[E1000E_NUM_QUEUES]; + + struct NetRxPkt *rx_pkt; + + bool has_vnet; + int max_queue_num; + + /* Interrupt moderation management */ + uint32_t delayed_causes; + + E1000IntrDelayTimer radv; + E1000IntrDelayTimer rdtr; + E1000IntrDelayTimer raid; + + E1000IntrDelayTimer tadv; + E1000IntrDelayTimer tidv; + + E1000IntrDelayTimer itr; + bool itr_intr_pending; + + E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM]; + bool eitr_intr_pending[E1000E_MSIX_VEC_NUM]; + + VMChangeStateEntry *vmstate; + + uint32_t itr_guest_value; + uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM]; + + uint16_t vet; + + uint8_t permanent_mac[ETH_ALEN]; + + NICState *owner_nic; + PCIDevice *owner; + void (*owner_start_recv)(PCIDevice *d); +}; + +void +e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size); + +uint64_t +e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size); + +void +e1000e_core_pci_realize(E1000ECore *regs, + const uint16_t *eeprom_templ, + uint32_t eeprom_size, + const uint8_t *macaddr); + +void +e1000e_core_reset(E1000ECore *core); + +void +e1000e_core_pre_save(E1000ECore *core); + +int +e1000e_core_post_load(E1000ECore *core); + +void +e1000e_core_set_link_status(E1000ECore *core); + +void +e1000e_core_pci_uninit(E1000ECore *core); + +int +e1000e_can_receive(E1000ECore *core); + +ssize_t +e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size); + +ssize_t +e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt); diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c new file mode 100644 index 0000000000..94f85c98c8 --- /dev/null +++ b/hw/net/e1000x_common.c @@ -0,0 +1,267 @@ +/* +* QEMU e1000(e) emulation - shared code +* +* Copyright (c) 2008 Qumranet +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "net/net.h" + +#include "e1000x_common.h" + +#include "trace.h" + +bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac) +{ + bool link_up = mac[STATUS] & E1000_STATUS_LU; + bool rx_enabled = mac[RCTL] & E1000_RCTL_EN; + bool pci_master = d->config[PCI_COMMAND] & PCI_COMMAND_MASTER; + + if (!link_up || !rx_enabled || !pci_master) { + trace_e1000x_rx_can_recv_disabled(link_up, rx_enabled, pci_master); + return false; + } + + return true; +} + +bool e1000x_is_vlan_packet(const uint8_t *buf, uint16_t vet) +{ + uint16_t eth_proto = be16_to_cpup((uint16_t *)(buf + 12)); + bool res = (eth_proto == vet); + + trace_e1000x_vlan_is_vlan_pkt(res, eth_proto, vet); + + return res; +} + +bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf) +{ + static const int mta_shift[] = { 4, 3, 2, 0 }; + uint32_t f, ra[2], *rp, rctl = mac[RCTL]; + + for (rp = mac + RA; rp < mac + RA + 32; rp += 2) { + if (!(rp[1] & E1000_RAH_AV)) { + continue; + } + ra[0] = cpu_to_le32(rp[0]); + ra[1] = cpu_to_le32(rp[1]); + if (!memcmp(buf, (uint8_t *)ra, 6)) { + trace_e1000x_rx_flt_ucast_match((int)(rp - mac - RA) / 2, + MAC_ARG(buf)); + return true; + } + } + trace_e1000x_rx_flt_ucast_mismatch(MAC_ARG(buf)); + + f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; + f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; + if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) { + e1000x_inc_reg_if_not_full(mac, MPRC); + return true; + } + + trace_e1000x_rx_flt_inexact_mismatch(MAC_ARG(buf), + (rctl >> E1000_RCTL_MO_SHIFT) & 3, + f >> 5, + mac[MTA + (f >> 5)]); + + return false; +} + +bool e1000x_hw_rx_enabled(uint32_t *mac) +{ + if (!(mac[STATUS] & E1000_STATUS_LU)) { + trace_e1000x_rx_link_down(mac[STATUS]); + return false; + } + + if (!(mac[RCTL] & E1000_RCTL_EN)) { + trace_e1000x_rx_disabled(mac[RCTL]); + return false; + } + + return true; +} + +bool e1000x_is_oversized(uint32_t *mac, size_t size) +{ + /* this is the size past which hardware will + drop packets when setting LPE=0 */ + static const int maximum_ethernet_vlan_size = 1522; + /* this is the size past which hardware will + drop packets when setting LPE=1 */ + static const int maximum_ethernet_lpe_size = 16384; + + if ((size > maximum_ethernet_lpe_size || + (size > maximum_ethernet_vlan_size + && !(mac[RCTL] & E1000_RCTL_LPE))) + && !(mac[RCTL] & E1000_RCTL_SBP)) { + e1000x_inc_reg_if_not_full(mac, ROC); + trace_e1000x_rx_oversized(size); + return true; + } + + return false; +} + +void e1000x_restart_autoneg(uint32_t *mac, uint16_t *phy, QEMUTimer *timer) +{ + e1000x_update_regs_on_link_down(mac, phy); + trace_e1000x_link_negotiation_start(); + timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); +} + +void e1000x_reset_mac_addr(NICState *nic, uint32_t *mac_regs, + uint8_t *mac_addr) +{ + int i; + + mac_regs[RA] = 0; + mac_regs[RA + 1] = E1000_RAH_AV; + for (i = 0; i < 4; i++) { + mac_regs[RA] |= mac_addr[i] << (8 * i); + mac_regs[RA + 1] |= + (i < 2) ? mac_addr[i + 4] << (8 * i) : 0; + } + + qemu_format_nic_info_str(qemu_get_queue(nic), mac_addr); + trace_e1000x_mac_indicate(MAC_ARG(mac_addr)); +} + +void e1000x_update_regs_on_autoneg_done(uint32_t *mac, uint16_t *phy) +{ + e1000x_update_regs_on_link_up(mac, phy); + phy[PHY_LP_ABILITY] |= MII_LPAR_LPACK; + phy[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + trace_e1000x_link_negotiation_done(); +} + +void +e1000x_core_prepare_eeprom(uint16_t *eeprom, + const uint16_t *templ, + uint32_t templ_size, + uint16_t dev_id, + const uint8_t *macaddr) +{ + uint16_t checksum = 0; + int i; + + memmove(eeprom, templ, templ_size); + + for (i = 0; i < 3; i++) { + eeprom[i] = (macaddr[2 * i + 1] << 8) | macaddr[2 * i]; + } + + eeprom[11] = eeprom[13] = dev_id; + + for (i = 0; i < EEPROM_CHECKSUM_REG; i++) { + checksum += eeprom[i]; + } + + checksum = (uint16_t) EEPROM_SUM - checksum; + + eeprom[EEPROM_CHECKSUM_REG] = checksum; +} + +uint32_t +e1000x_rxbufsize(uint32_t rctl) +{ + rctl &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | + E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | + E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; + switch (rctl) { + case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: + return 16384; + case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: + return 8192; + case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: + return 4096; + case E1000_RCTL_SZ_1024: + return 1024; + case E1000_RCTL_SZ_512: + return 512; + case E1000_RCTL_SZ_256: + return 256; + } + return 2048; +} + +void +e1000x_update_rx_total_stats(uint32_t *mac, + size_t data_size, + size_t data_fcs_size) +{ + static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511, + PRC1023, PRC1522 }; + + e1000x_increase_size_stats(mac, PRCregs, data_fcs_size); + e1000x_inc_reg_if_not_full(mac, TPR); + mac[GPRC] = mac[TPR]; + /* TOR - Total Octets Received: + * This register includes bytes received in a packet from the <Destination + * Address> field through the <CRC> field, inclusively. + * Always include FCS length (4) in size. + */ + e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4); + mac[GORCL] = mac[TORL]; + mac[GORCH] = mac[TORH]; +} + +void +e1000x_increase_size_stats(uint32_t *mac, const int *size_regs, int size) +{ + if (size > 1023) { + e1000x_inc_reg_if_not_full(mac, size_regs[5]); + } else if (size > 511) { + e1000x_inc_reg_if_not_full(mac, size_regs[4]); + } else if (size > 255) { + e1000x_inc_reg_if_not_full(mac, size_regs[3]); + } else if (size > 127) { + e1000x_inc_reg_if_not_full(mac, size_regs[2]); + } else if (size > 64) { + e1000x_inc_reg_if_not_full(mac, size_regs[1]); + } else if (size == 64) { + e1000x_inc_reg_if_not_full(mac, size_regs[0]); + } +} + +void +e1000x_read_tx_ctx_descr(struct e1000_context_desc *d, + e1000x_txd_props *props) +{ + uint32_t op = le32_to_cpu(d->cmd_and_length); + + props->ipcss = d->lower_setup.ip_fields.ipcss; + props->ipcso = d->lower_setup.ip_fields.ipcso; + props->ipcse = le16_to_cpu(d->lower_setup.ip_fields.ipcse); + props->tucss = d->upper_setup.tcp_fields.tucss; + props->tucso = d->upper_setup.tcp_fields.tucso; + props->tucse = le16_to_cpu(d->upper_setup.tcp_fields.tucse); + props->paylen = op & 0xfffff; + props->hdr_len = d->tcp_seg_setup.fields.hdr_len; + props->mss = le16_to_cpu(d->tcp_seg_setup.fields.mss); + props->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; + props->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; + props->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; +} diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h new file mode 100644 index 0000000000..21bf28e0cc --- /dev/null +++ b/hw/net/e1000x_common.h @@ -0,0 +1,213 @@ +/* +* QEMU e1000(e) emulation - shared code +* +* Copyright (c) 2008 Qumranet +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "e1000_regs.h" + +#define defreg(x) x = (E1000_##x >> 2) +enum { + defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), + defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), + defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), + defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH0), + defreg(RDBAL0), defreg(RDH0), defreg(RDLEN0), defreg(RDT0), + defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), + defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), + defreg(TDLEN1), defreg(TDBAL1), defreg(TDBAH1), defreg(TDH1), + defreg(TDT1), defreg(TORH), defreg(TORL), defreg(TOTH), + defreg(TOTL), defreg(TPR), defreg(TPT), defreg(TXDCTL), + defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS), + defreg(VFTA), defreg(VET), defreg(RDTR), defreg(RADV), + defreg(TADV), defreg(ITR), defreg(SCC), defreg(ECOL), + defreg(MCC), defreg(LATECOL), defreg(COLC), defreg(DC), + defreg(TNCRS), defreg(SEC), defreg(CEXTERR), defreg(RLEC), + defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), defreg(XOFFTXC), + defreg(FCRUC), defreg(AIT), defreg(TDFH), defreg(TDFT), + defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(WUC), + defreg(WUS), defreg(POEMB), defreg(PBS), defreg(RDFH), + defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC), + defreg(PBM), defreg(IPAV), defreg(IP4AT), defreg(IP6AT), + defreg(WUPM), defreg(FFLT), defreg(FFMT), defreg(FFVT), + defreg(TARC0), defreg(TARC1), defreg(IAM), defreg(EXTCNF_CTRL), + defreg(GCR), defreg(TIMINCA), defreg(EIAC), defreg(CTRL_EXT), + defreg(IVAR), defreg(MFUTP01), defreg(MFUTP23), defreg(MANC2H), + defreg(MFVAL), defreg(MDEF), defreg(FACTPS), defreg(FTFT), + defreg(RUC), defreg(ROC), defreg(RFC), defreg(RJC), + defreg(PRC64), defreg(PRC127), defreg(PRC255), defreg(PRC511), + defreg(PRC1023), defreg(PRC1522), defreg(PTC64), defreg(PTC127), + defreg(PTC255), defreg(PTC511), defreg(PTC1023), defreg(PTC1522), + defreg(GORCL), defreg(GORCH), defreg(GOTCL), defreg(GOTCH), + defreg(RNBC), defreg(BPRC), defreg(MPRC), defreg(RFCTL), + defreg(PSRCTL), defreg(MPTC), defreg(BPTC), defreg(TSCTFC), + defreg(IAC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC), + defreg(TSCTC), defreg(RXCSUM), defreg(FUNCTAG), defreg(GSCL_1), + defreg(GSCL_2), defreg(GSCL_3), defreg(GSCL_4), defreg(GSCN_0), + defreg(GSCN_1), defreg(GSCN_2), defreg(GSCN_3), defreg(GCR2), + defreg(RAID), defreg(RSRPD), defreg(TIDV), defreg(EITR), + defreg(MRQC), defreg(RETA), defreg(RSSRK), defreg(RDBAH1), + defreg(RDBAL1), defreg(RDLEN1), defreg(RDH1), defreg(RDT1), + defreg(PBACLR), defreg(FCAL), defreg(FCAH), defreg(FCT), + defreg(FCRTH), defreg(FCRTL), defreg(FCTTV), defreg(FCRTV), + defreg(FLA), defreg(EEWR), defreg(FLOP), defreg(FLOL), + defreg(FLSWCTL), defreg(FLSWCNT), defreg(RXDCTL), defreg(RXDCTL1), + defreg(MAVTV0), defreg(MAVTV1), defreg(MAVTV2), defreg(MAVTV3), + defreg(TXSTMPL), defreg(TXSTMPH), defreg(SYSTIML), defreg(SYSTIMH), + defreg(RXCFGL), defreg(RXUDP), defreg(TIMADJL), defreg(TIMADJH), + defreg(RXSTMPH), defreg(RXSTMPL), defreg(RXSATRL), defreg(RXSATRH), + defreg(FLASHT), defreg(TIPG), defreg(RDH), defreg(RDT), + defreg(RDLEN), defreg(RDBAH), defreg(RDBAL), + defreg(TXDCTL1), + defreg(FLSWDATA), + defreg(CTRL_DUP), + defreg(EXTCNF_SIZE), + defreg(EEMNGCTL), + defreg(EEMNGDATA), + defreg(FLMNGCTL), + defreg(FLMNGDATA), + defreg(FLMNGCNT), + defreg(TSYNCRXCTL), + defreg(TSYNCTXCTL), + + /* Aliases */ + defreg(RDH0_A), defreg(RDT0_A), defreg(RDTR_A), defreg(RDFH_A), + defreg(RDFT_A), defreg(TDH_A), defreg(TDT_A), defreg(TIDV_A), + defreg(TDFH_A), defreg(TDFT_A), defreg(RA_A), defreg(RDBAL0_A), + defreg(TDBAL_A), defreg(TDLEN_A), defreg(VFTA_A), defreg(RDLEN0_A), + defreg(FCRTL_A), defreg(FCRTH_A) +}; + +static inline void +e1000x_inc_reg_if_not_full(uint32_t *mac, int index) +{ + if (mac[index] != 0xffffffff) { + mac[index]++; + } +} + +static inline void +e1000x_grow_8reg_if_not_full(uint32_t *mac, int index, int size) +{ + uint64_t sum = mac[index] | (uint64_t)mac[index + 1] << 32; + + if (sum + size < sum) { + sum = ~0ULL; + } else { + sum += size; + } + mac[index] = sum; + mac[index + 1] = sum >> 32; +} + +static inline int +e1000x_vlan_enabled(uint32_t *mac) +{ + return ((mac[CTRL] & E1000_CTRL_VME) != 0); +} + +static inline int +e1000x_is_vlan_txd(uint32_t txd_lower) +{ + return ((txd_lower & E1000_TXD_CMD_VLE) != 0); +} + +static inline int +e1000x_vlan_rx_filter_enabled(uint32_t *mac) +{ + return ((mac[RCTL] & E1000_RCTL_VFE) != 0); +} + +static inline int +e1000x_fcs_len(uint32_t *mac) +{ + /* FCS aka Ethernet CRC-32. We don't get it from backends and can't + * fill it in, just pad descriptor length by 4 bytes unless guest + * told us to strip it off the packet. */ + return (mac[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; +} + +static inline void +e1000x_update_regs_on_link_down(uint32_t *mac, uint16_t *phy) +{ + mac[STATUS] &= ~E1000_STATUS_LU; + phy[PHY_STATUS] &= ~MII_SR_LINK_STATUS; + phy[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; + phy[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK; +} + +static inline void +e1000x_update_regs_on_link_up(uint32_t *mac, uint16_t *phy) +{ + mac[STATUS] |= E1000_STATUS_LU; + phy[PHY_STATUS] |= MII_SR_LINK_STATUS; +} + +void e1000x_update_rx_total_stats(uint32_t *mac, + size_t data_size, + size_t data_fcs_size); + +void e1000x_core_prepare_eeprom(uint16_t *eeprom, + const uint16_t *templ, + uint32_t templ_size, + uint16_t dev_id, + const uint8_t *macaddr); + +uint32_t e1000x_rxbufsize(uint32_t rctl); + +bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac); + +bool e1000x_is_vlan_packet(const uint8_t *buf, uint16_t vet); + +bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf); + +bool e1000x_hw_rx_enabled(uint32_t *mac); + +bool e1000x_is_oversized(uint32_t *mac, size_t size); + +void e1000x_restart_autoneg(uint32_t *mac, uint16_t *phy, QEMUTimer *timer); + +void e1000x_reset_mac_addr(NICState *nic, uint32_t *mac_regs, + uint8_t *mac_addr); + +void e1000x_update_regs_on_autoneg_done(uint32_t *mac, uint16_t *phy); + +void e1000x_increase_size_stats(uint32_t *mac, const int *size_regs, int size); + +typedef struct e1000x_txd_props { + unsigned char sum_needed; + uint8_t ipcss; + uint8_t ipcso; + uint16_t ipcse; + uint8_t tucss; + uint8_t tucso; + uint16_t tucse; + uint32_t paylen; + uint8_t hdr_len; + uint16_t mss; + int8_t ip; + int8_t tcp; + bool tse; + bool cptse; +} e1000x_txd_props; + +void e1000x_read_tx_ctx_descr(struct e1000_context_desc *d, + e1000x_txd_props *props); diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 9055ea89a9..d91e029382 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -25,6 +25,8 @@ #include "hw/net/imx_fec.h" #include "sysemu/dma.h" #include "qemu/log.h" +#include "net/checksum.h" +#include "net/eth.h" /* For crc32 */ #include <zlib.h> @@ -53,30 +55,153 @@ } \ } while (0) -static const VMStateDescription vmstate_imx_fec = { +static const char *imx_default_reg_name(IMXFECState *s, uint32_t index) +{ + static char tmp[20]; + sprintf(tmp, "index %d", index); + return tmp; +} + +static const char *imx_fec_reg_name(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_FRBR: + return "FRBR"; + case ENET_FRSR: + return "FRSR"; + case ENET_MIIGSK_CFGR: + return "MIIGSK_CFGR"; + case ENET_MIIGSK_ENR: + return "MIIGSK_ENR"; + default: + return imx_default_reg_name(s, index); + } +} + +static const char *imx_enet_reg_name(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_RSFL: + return "RSFL"; + case ENET_RSEM: + return "RSEM"; + case ENET_RAEM: + return "RAEM"; + case ENET_RAFL: + return "RAFL"; + case ENET_TSEM: + return "TSEM"; + case ENET_TAEM: + return "TAEM"; + case ENET_TAFL: + return "TAFL"; + case ENET_TIPG: + return "TIPG"; + case ENET_FTRL: + return "FTRL"; + case ENET_TACC: + return "TACC"; + case ENET_RACC: + return "RACC"; + case ENET_ATCR: + return "ATCR"; + case ENET_ATVR: + return "ATVR"; + case ENET_ATOFF: + return "ATOFF"; + case ENET_ATPER: + return "ATPER"; + case ENET_ATCOR: + return "ATCOR"; + case ENET_ATINC: + return "ATINC"; + case ENET_ATSTMP: + return "ATSTMP"; + case ENET_TGSR: + return "TGSR"; + case ENET_TCSR0: + return "TCSR0"; + case ENET_TCCR0: + return "TCCR0"; + case ENET_TCSR1: + return "TCSR1"; + case ENET_TCCR1: + return "TCCR1"; + case ENET_TCSR2: + return "TCSR2"; + case ENET_TCCR2: + return "TCCR2"; + case ENET_TCSR3: + return "TCSR3"; + case ENET_TCCR3: + return "TCCR3"; + default: + return imx_default_reg_name(s, index); + } +} + +static const char *imx_eth_reg_name(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_EIR: + return "EIR"; + case ENET_EIMR: + return "EIMR"; + case ENET_RDAR: + return "RDAR"; + case ENET_TDAR: + return "TDAR"; + case ENET_ECR: + return "ECR"; + case ENET_MMFR: + return "MMFR"; + case ENET_MSCR: + return "MSCR"; + case ENET_MIBC: + return "MIBC"; + case ENET_RCR: + return "RCR"; + case ENET_TCR: + return "TCR"; + case ENET_PALR: + return "PALR"; + case ENET_PAUR: + return "PAUR"; + case ENET_OPD: + return "OPD"; + case ENET_IAUR: + return "IAUR"; + case ENET_IALR: + return "IALR"; + case ENET_GAUR: + return "GAUR"; + case ENET_GALR: + return "GALR"; + case ENET_TFWR: + return "TFWR"; + case ENET_RDSR: + return "RDSR"; + case ENET_TDSR: + return "TDSR"; + case ENET_MRBR: + return "MRBR"; + default: + if (s->is_fec) { + return imx_fec_reg_name(s, index); + } else { + return imx_enet_reg_name(s, index); + } + } +} + +static const VMStateDescription vmstate_imx_eth = { .name = TYPE_IMX_FEC, - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (VMStateField[]) { - VMSTATE_UINT32(irq_state, IMXFECState), - VMSTATE_UINT32(eir, IMXFECState), - VMSTATE_UINT32(eimr, IMXFECState), - VMSTATE_UINT32(rx_enabled, IMXFECState), + VMSTATE_UINT32_ARRAY(regs, IMXFECState, ENET_MAX), VMSTATE_UINT32(rx_descriptor, IMXFECState), VMSTATE_UINT32(tx_descriptor, IMXFECState), - VMSTATE_UINT32(ecr, IMXFECState), - VMSTATE_UINT32(mmfr, IMXFECState), - VMSTATE_UINT32(mscr, IMXFECState), - VMSTATE_UINT32(mibc, IMXFECState), - VMSTATE_UINT32(rcr, IMXFECState), - VMSTATE_UINT32(tcr, IMXFECState), - VMSTATE_UINT32(tfwr, IMXFECState), - VMSTATE_UINT32(frsr, IMXFECState), - VMSTATE_UINT32(erdsr, IMXFECState), - VMSTATE_UINT32(etdsr, IMXFECState), - VMSTATE_UINT32(emrbr, IMXFECState), - VMSTATE_UINT32(miigsk_cfgr, IMXFECState), - VMSTATE_UINT32(miigsk_enr, IMXFECState), VMSTATE_UINT32(phy_status, IMXFECState), VMSTATE_UINT32(phy_control, IMXFECState), @@ -95,7 +220,7 @@ static const VMStateDescription vmstate_imx_fec = { #define PHY_INT_PARFAULT (1 << 2) #define PHY_INT_AUTONEG_PAGE (1 << 1) -static void imx_fec_update(IMXFECState *s); +static void imx_eth_update(IMXFECState *s); /* * The MII phy could raise a GPIO to the processor which in turn @@ -105,7 +230,7 @@ static void imx_fec_update(IMXFECState *s); */ static void phy_update_irq(IMXFECState *s) { - imx_fec_update(s); + imx_eth_update(s); } static void phy_update_link(IMXFECState *s) @@ -124,7 +249,7 @@ static void phy_update_link(IMXFECState *s) phy_update_irq(s); } -static void imx_fec_set_link(NetClientState *nc) +static void imx_eth_set_link(NetClientState *nc) { phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc))); } @@ -250,23 +375,35 @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr) dma_memory_write(&address_space_memory, addr, bd, sizeof(*bd)); } -static void imx_fec_update(IMXFECState *s) +static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr) { - uint32_t active; - uint32_t changed; + dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd)); +} - active = s->eir & s->eimr; - changed = active ^ s->irq_state; - if (changed) { - qemu_set_irq(s->irq, active); +static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr) +{ + dma_memory_write(&address_space_memory, addr, bd, sizeof(*bd)); +} + +static void imx_eth_update(IMXFECState *s) +{ + if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] & ENET_INT_TS_TIMER) { + qemu_set_irq(s->irq[1], 1); + } else { + qemu_set_irq(s->irq[1], 0); + } + + if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] & ENET_INT_MAC) { + qemu_set_irq(s->irq[0], 1); + } else { + qemu_set_irq(s->irq[0], 0); } - s->irq_state = active; } static void imx_fec_do_tx(IMXFECState *s) { int frame_size = 0; - uint8_t frame[FEC_MAX_FRAME_SIZE]; + uint8_t frame[ENET_MAX_FRAME_SIZE]; uint8_t *ptr = frame; uint32_t addr = s->tx_descriptor; @@ -277,272 +414,521 @@ static void imx_fec_do_tx(IMXFECState *s) imx_fec_read_bd(&bd, addr); FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n", addr, bd.flags, bd.length, bd.data); - if ((bd.flags & FEC_BD_R) == 0) { + if ((bd.flags & ENET_BD_R) == 0) { /* Run out of descriptors to transmit. */ + FEC_PRINTF("tx_bd ran out of descriptors to transmit\n"); break; } len = bd.length; - if (frame_size + len > FEC_MAX_FRAME_SIZE) { - len = FEC_MAX_FRAME_SIZE - frame_size; - s->eir |= FEC_INT_BABT; + if (frame_size + len > ENET_MAX_FRAME_SIZE) { + len = ENET_MAX_FRAME_SIZE - frame_size; + s->regs[ENET_EIR] |= ENET_INT_BABT; } dma_memory_read(&address_space_memory, bd.data, ptr, len); ptr += len; frame_size += len; - if (bd.flags & FEC_BD_L) { + if (bd.flags & ENET_BD_L) { /* Last buffer in frame. */ qemu_send_packet(qemu_get_queue(s->nic), frame, len); ptr = frame; frame_size = 0; - s->eir |= FEC_INT_TXF; + s->regs[ENET_EIR] |= ENET_INT_TXF; } - s->eir |= FEC_INT_TXB; - bd.flags &= ~FEC_BD_R; + s->regs[ENET_EIR] |= ENET_INT_TXB; + bd.flags &= ~ENET_BD_R; /* Write back the modified descriptor. */ imx_fec_write_bd(&bd, addr); /* Advance to the next descriptor. */ - if ((bd.flags & FEC_BD_W) != 0) { - addr = s->etdsr; + if ((bd.flags & ENET_BD_W) != 0) { + addr = s->regs[ENET_TDSR]; } else { - addr += 8; + addr += sizeof(bd); } } s->tx_descriptor = addr; - imx_fec_update(s); + imx_eth_update(s); } -static void imx_fec_enable_rx(IMXFECState *s) +static void imx_enet_do_tx(IMXFECState *s) +{ + int frame_size = 0; + uint8_t frame[ENET_MAX_FRAME_SIZE]; + uint8_t *ptr = frame; + uint32_t addr = s->tx_descriptor; + + while (1) { + IMXENETBufDesc bd; + int len; + + imx_enet_read_bd(&bd, addr); + FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x " + "status %04x\n", addr, bd.flags, bd.length, bd.data, + bd.option, bd.status); + if ((bd.flags & ENET_BD_R) == 0) { + /* Run out of descriptors to transmit. */ + break; + } + len = bd.length; + if (frame_size + len > ENET_MAX_FRAME_SIZE) { + len = ENET_MAX_FRAME_SIZE - frame_size; + s->regs[ENET_EIR] |= ENET_INT_BABT; + } + dma_memory_read(&address_space_memory, bd.data, ptr, len); + ptr += len; + frame_size += len; + if (bd.flags & ENET_BD_L) { + if (bd.option & ENET_BD_PINS) { + struct ip_header *ip_hd = PKT_GET_IP_HDR(frame); + if (IP_HEADER_VERSION(ip_hd) == 4) { + net_checksum_calculate(frame, frame_size); + } + } + if (bd.option & ENET_BD_IINS) { + struct ip_header *ip_hd = PKT_GET_IP_HDR(frame); + /* We compute checksum only for IPv4 frames */ + if (IP_HEADER_VERSION(ip_hd) == 4) { + uint16_t csum; + ip_hd->ip_sum = 0; + csum = net_raw_checksum((uint8_t *)ip_hd, sizeof(*ip_hd)); + ip_hd->ip_sum = cpu_to_be16(csum); + } + } + /* Last buffer in frame. */ + qemu_send_packet(qemu_get_queue(s->nic), frame, len); + ptr = frame; + frame_size = 0; + if (bd.option & ENET_BD_TX_INT) { + s->regs[ENET_EIR] |= ENET_INT_TXF; + } + } + if (bd.option & ENET_BD_TX_INT) { + s->regs[ENET_EIR] |= ENET_INT_TXB; + } + bd.flags &= ~ENET_BD_R; + /* Write back the modified descriptor. */ + imx_enet_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if ((bd.flags & ENET_BD_W) != 0) { + addr = s->regs[ENET_TDSR]; + } else { + addr += sizeof(bd); + } + } + + s->tx_descriptor = addr; + + imx_eth_update(s); +} + +static void imx_eth_do_tx(IMXFECState *s) +{ + if (!s->is_fec && (s->regs[ENET_ECR] & ENET_ECR_EN1588)) { + imx_enet_do_tx(s); + } else { + imx_fec_do_tx(s); + } +} + +static void imx_eth_enable_rx(IMXFECState *s) { IMXFECBufDesc bd; - uint32_t tmp; + bool tmp; imx_fec_read_bd(&bd, s->rx_descriptor); - tmp = ((bd.flags & FEC_BD_E) != 0); + tmp = ((bd.flags & ENET_BD_E) != 0); if (!tmp) { FEC_PRINTF("RX buffer full\n"); - } else if (!s->rx_enabled) { + } else if (!s->regs[ENET_RDAR]) { qemu_flush_queued_packets(qemu_get_queue(s->nic)); } - s->rx_enabled = tmp; + s->regs[ENET_RDAR] = tmp ? ENET_RDAR_RDAR : 0; } -static void imx_fec_reset(DeviceState *d) +static void imx_eth_reset(DeviceState *d) { IMXFECState *s = IMX_FEC(d); - /* Reset the FEC */ - s->eir = 0; - s->eimr = 0; - s->rx_enabled = 0; - s->ecr = 0; - s->mscr = 0; - s->mibc = 0xc0000000; - s->rcr = 0x05ee0001; - s->tcr = 0; - s->tfwr = 0; - s->frsr = 0x500; - s->miigsk_cfgr = 0; - s->miigsk_enr = 0x6; + /* Reset the Device */ + memset(s->regs, 0, sizeof(s->regs)); + s->regs[ENET_ECR] = 0xf0000000; + s->regs[ENET_MIBC] = 0xc0000000; + s->regs[ENET_RCR] = 0x05ee0001; + s->regs[ENET_OPD] = 0x00010000; + + s->regs[ENET_PALR] = (s->conf.macaddr.a[0] << 24) + | (s->conf.macaddr.a[1] << 16) + | (s->conf.macaddr.a[2] << 8) + | s->conf.macaddr.a[3]; + s->regs[ENET_PAUR] = (s->conf.macaddr.a[4] << 24) + | (s->conf.macaddr.a[5] << 16) + | 0x8808; + + if (s->is_fec) { + s->regs[ENET_FRBR] = 0x00000600; + s->regs[ENET_FRSR] = 0x00000500; + s->regs[ENET_MIIGSK_ENR] = 0x00000006; + } else { + s->regs[ENET_RAEM] = 0x00000004; + s->regs[ENET_RAFL] = 0x00000004; + s->regs[ENET_TAEM] = 0x00000004; + s->regs[ENET_TAFL] = 0x00000008; + s->regs[ENET_TIPG] = 0x0000000c; + s->regs[ENET_FTRL] = 0x000007ff; + s->regs[ENET_ATPER] = 0x3b9aca00; + } + + s->rx_descriptor = 0; + s->tx_descriptor = 0; /* We also reset the PHY */ phy_reset(s); } -static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size) +static uint32_t imx_default_read(IMXFECState *s, uint32_t index) +{ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%" + PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); + return 0; +} + +static uint32_t imx_fec_read(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_FRBR: + case ENET_FRSR: + case ENET_MIIGSK_CFGR: + case ENET_MIIGSK_ENR: + return s->regs[index]; + default: + return imx_default_read(s, index); + } +} + +static uint32_t imx_enet_read(IMXFECState *s, uint32_t index) { + switch (index) { + case ENET_RSFL: + case ENET_RSEM: + case ENET_RAEM: + case ENET_RAFL: + case ENET_TSEM: + case ENET_TAEM: + case ENET_TAFL: + case ENET_TIPG: + case ENET_FTRL: + case ENET_TACC: + case ENET_RACC: + case ENET_ATCR: + case ENET_ATVR: + case ENET_ATOFF: + case ENET_ATPER: + case ENET_ATCOR: + case ENET_ATINC: + case ENET_ATSTMP: + case ENET_TGSR: + case ENET_TCSR0: + case ENET_TCCR0: + case ENET_TCSR1: + case ENET_TCCR1: + case ENET_TCSR2: + case ENET_TCCR2: + case ENET_TCSR3: + case ENET_TCCR3: + return s->regs[index]; + default: + return imx_default_read(s, index); + } +} + +static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size) +{ + uint32_t value = 0; IMXFECState *s = IMX_FEC(opaque); + uint32_t index = offset >> 2; + + switch (index) { + case ENET_EIR: + case ENET_EIMR: + case ENET_RDAR: + case ENET_TDAR: + case ENET_ECR: + case ENET_MMFR: + case ENET_MSCR: + case ENET_MIBC: + case ENET_RCR: + case ENET_TCR: + case ENET_PALR: + case ENET_PAUR: + case ENET_OPD: + case ENET_IAUR: + case ENET_IALR: + case ENET_GAUR: + case ENET_GALR: + case ENET_TFWR: + case ENET_RDSR: + case ENET_TDSR: + case ENET_MRBR: + value = s->regs[index]; + break; + default: + if (s->is_fec) { + value = imx_fec_read(s, index); + } else { + value = imx_enet_read(s, index); + } + break; + } - FEC_PRINTF("reading from @ 0x%" HWADDR_PRIx "\n", addr); - - switch (addr & 0x3ff) { - case 0x004: - return s->eir; - case 0x008: - return s->eimr; - case 0x010: - return s->rx_enabled ? (1 << 24) : 0; /* RDAR */ - case 0x014: - return 0; /* TDAR */ - case 0x024: - return s->ecr; - case 0x040: - return s->mmfr; - case 0x044: - return s->mscr; - case 0x064: - return s->mibc; /* MIBC */ - case 0x084: - return s->rcr; - case 0x0c4: - return s->tcr; - case 0x0e4: /* PALR */ - return (s->conf.macaddr.a[0] << 24) - | (s->conf.macaddr.a[1] << 16) - | (s->conf.macaddr.a[2] << 8) - | s->conf.macaddr.a[3]; - break; - case 0x0e8: /* PAUR */ - return (s->conf.macaddr.a[4] << 24) - | (s->conf.macaddr.a[5] << 16) - | 0x8808; - case 0x0ec: - return 0x10000; /* OPD */ - case 0x118: - return 0; - case 0x11c: - return 0; - case 0x120: - return 0; - case 0x124: - return 0; - case 0x144: - return s->tfwr; - case 0x14c: - return 0x600; - case 0x150: - return s->frsr; - case 0x180: - return s->erdsr; - case 0x184: - return s->etdsr; - case 0x188: - return s->emrbr; - case 0x300: - return s->miigsk_cfgr; - case 0x308: - return s->miigsk_enr; + FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), + value); + + return value; +} + +static void imx_default_write(IMXFECState *s, uint32_t index, uint32_t value) +{ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" + PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); + return; +} + +static void imx_fec_write(IMXFECState *s, uint32_t index, uint32_t value) +{ + switch (index) { + case ENET_FRBR: + /* FRBR is read only */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Register FRBR is read only\n", + TYPE_IMX_FEC, __func__); + break; + case ENET_FRSR: + s->regs[index] = (value & 0x000003fc) | 0x00000400; + break; + case ENET_MIIGSK_CFGR: + s->regs[index] = value & 0x00000053; + break; + case ENET_MIIGSK_ENR: + s->regs[index] = (value & 0x00000002) ? 0x00000006 : 0; + break; default: - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" - HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr); - return 0; + imx_default_write(s, index, value); + break; } } -static void imx_fec_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size) +static void imx_enet_write(IMXFECState *s, uint32_t index, uint32_t value) +{ + switch (index) { + case ENET_RSFL: + case ENET_RSEM: + case ENET_RAEM: + case ENET_RAFL: + case ENET_TSEM: + case ENET_TAEM: + case ENET_TAFL: + s->regs[index] = value & 0x000001ff; + break; + case ENET_TIPG: + s->regs[index] = value & 0x0000001f; + break; + case ENET_FTRL: + s->regs[index] = value & 0x00003fff; + break; + case ENET_TACC: + s->regs[index] = value & 0x00000019; + break; + case ENET_RACC: + s->regs[index] = value & 0x000000C7; + break; + case ENET_ATCR: + s->regs[index] = value & 0x00002a9d; + break; + case ENET_ATVR: + case ENET_ATOFF: + case ENET_ATPER: + s->regs[index] = value; + break; + case ENET_ATSTMP: + /* ATSTMP is read only */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Register ATSTMP is read only\n", + TYPE_IMX_FEC, __func__); + break; + case ENET_ATCOR: + s->regs[index] = value & 0x7fffffff; + break; + case ENET_ATINC: + s->regs[index] = value & 0x00007f7f; + break; + case ENET_TGSR: + /* implement clear timer flag */ + value = value & 0x0000000f; + break; + case ENET_TCSR0: + case ENET_TCSR1: + case ENET_TCSR2: + case ENET_TCSR3: + value = value & 0x000000fd; + break; + case ENET_TCCR0: + case ENET_TCCR1: + case ENET_TCCR2: + case ENET_TCCR3: + s->regs[index] = value; + break; + default: + imx_default_write(s, index, value); + break; + } +} + +static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) { IMXFECState *s = IMX_FEC(opaque); + uint32_t index = offset >> 2; - FEC_PRINTF("writing 0x%08x @ 0x%" HWADDR_PRIx "\n", (int)value, addr); + FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), + (uint32_t)value); - switch (addr & 0x3ff) { - case 0x004: /* EIR */ - s->eir &= ~value; + switch (index) { + case ENET_EIR: + s->regs[index] &= ~value; break; - case 0x008: /* EIMR */ - s->eimr = value; + case ENET_EIMR: + s->regs[index] = value; break; - case 0x010: /* RDAR */ - if ((s->ecr & FEC_EN) && !s->rx_enabled) { - imx_fec_enable_rx(s); + case ENET_RDAR: + if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { + if (!s->regs[index]) { + s->regs[index] = ENET_RDAR_RDAR; + imx_eth_enable_rx(s); + } + } else { + s->regs[index] = 0; } break; - case 0x014: /* TDAR */ - if (s->ecr & FEC_EN) { - imx_fec_do_tx(s); + case ENET_TDAR: + if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { + s->regs[index] = ENET_TDAR_TDAR; + imx_eth_do_tx(s); } + s->regs[index] = 0; break; - case 0x024: /* ECR */ - s->ecr = value; - if (value & FEC_RESET) { - imx_fec_reset(DEVICE(s)); + case ENET_ECR: + if (value & ENET_ECR_RESET) { + return imx_eth_reset(DEVICE(s)); } - if ((s->ecr & FEC_EN) == 0) { - s->rx_enabled = 0; + s->regs[index] = value; + if ((s->regs[index] & ENET_ECR_ETHEREN) == 0) { + s->regs[ENET_RDAR] = 0; + s->rx_descriptor = s->regs[ENET_RDSR]; + s->regs[ENET_TDAR] = 0; + s->tx_descriptor = s->regs[ENET_TDSR]; } break; - case 0x040: /* MMFR */ - /* store the value */ - s->mmfr = value; - if (extract32(value, 28, 1)) { - do_phy_write(s, extract32(value, 18, 9), extract32(value, 0, 16)); + case ENET_MMFR: + s->regs[index] = value; + if (extract32(value, 29, 1)) { + /* This is a read operation */ + s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16, + do_phy_read(s, + extract32(value, + 18, 10))); } else { - s->mmfr = do_phy_read(s, extract32(value, 18, 9)); + /* This a write operation */ + do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); } /* raise the interrupt as the PHY operation is done */ - s->eir |= FEC_INT_MII; + s->regs[ENET_EIR] |= ENET_INT_MII; break; - case 0x044: /* MSCR */ - s->mscr = value & 0xfe; + case ENET_MSCR: + s->regs[index] = value & 0xfe; break; - case 0x064: /* MIBC */ + case ENET_MIBC: /* TODO: Implement MIB. */ - s->mibc = (value & 0x80000000) ? 0xc0000000 : 0; + s->regs[index] = (value & 0x80000000) ? 0xc0000000 : 0; break; - case 0x084: /* RCR */ - s->rcr = value & 0x07ff003f; + case ENET_RCR: + s->regs[index] = value & 0x07ff003f; /* TODO: Implement LOOP mode. */ break; - case 0x0c4: /* TCR */ + case ENET_TCR: /* We transmit immediately, so raise GRA immediately. */ - s->tcr = value; + s->regs[index] = value; if (value & 1) { - s->eir |= FEC_INT_GRA; + s->regs[ENET_EIR] |= ENET_INT_GRA; } break; - case 0x0e4: /* PALR */ + case ENET_PALR: + s->regs[index] = value; s->conf.macaddr.a[0] = value >> 24; s->conf.macaddr.a[1] = value >> 16; s->conf.macaddr.a[2] = value >> 8; s->conf.macaddr.a[3] = value; break; - case 0x0e8: /* PAUR */ + case ENET_PAUR: + s->regs[index] = (value | 0x0000ffff) & 0xffff8808; s->conf.macaddr.a[4] = value >> 24; s->conf.macaddr.a[5] = value >> 16; break; - case 0x0ec: /* OPDR */ + case ENET_OPD: + s->regs[index] = (value & 0x0000ffff) | 0x00010000; break; - case 0x118: /* IAUR */ - case 0x11c: /* IALR */ - case 0x120: /* GAUR */ - case 0x124: /* GALR */ + case ENET_IAUR: + case ENET_IALR: + case ENET_GAUR: + case ENET_GALR: /* TODO: implement MAC hash filtering. */ break; - case 0x144: /* TFWR */ - s->tfwr = value & 3; - break; - case 0x14c: /* FRBR */ - /* FRBR writes ignored. */ - break; - case 0x150: /* FRSR */ - s->frsr = (value & 0x3fc) | 0x400; - break; - case 0x180: /* ERDSR */ - s->erdsr = value & ~3; - s->rx_descriptor = s->erdsr; - break; - case 0x184: /* ETDSR */ - s->etdsr = value & ~3; - s->tx_descriptor = s->etdsr; + case ENET_TFWR: + if (s->is_fec) { + s->regs[index] = value & 0x3; + } else { + s->regs[index] = value & 0x13f; + } break; - case 0x188: /* EMRBR */ - s->emrbr = value & 0x7f0; + case ENET_RDSR: + if (s->is_fec) { + s->regs[index] = value & ~3; + } else { + s->regs[index] = value & ~7; + } + s->rx_descriptor = s->regs[index]; break; - case 0x300: /* MIIGSK_CFGR */ - s->miigsk_cfgr = value & 0x53; + case ENET_TDSR: + if (s->is_fec) { + s->regs[index] = value & ~3; + } else { + s->regs[index] = value & ~7; + } + s->tx_descriptor = s->regs[index]; break; - case 0x308: /* MIIGSK_ENR */ - s->miigsk_enr = (value & 0x2) ? 0x6 : 0; + case ENET_MRBR: + s->regs[index] = value & 0x00003ff0; break; default: - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" - HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr); - break; + if (s->is_fec) { + imx_fec_write(s, index, value); + } else { + imx_enet_write(s, index, value); + } + return; } - imx_fec_update(s); + imx_eth_update(s); } -static int imx_fec_can_receive(NetClientState *nc) +static int imx_eth_can_receive(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); - return s->rx_enabled; + FEC_PRINTF("\n"); + + return s->regs[ENET_RDAR] ? 1 : 0; } static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, @@ -560,7 +946,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, FEC_PRINTF("len %d\n", (int)size); - if (!s->rx_enabled) { + if (!s->regs[ENET_RDAR]) { qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", TYPE_IMX_FEC, __func__); return 0; @@ -571,21 +957,21 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, crc = cpu_to_be32(crc32(~0, buf, size)); crc_ptr = (uint8_t *) &crc; - /* Huge frames are truncted. */ - if (size > FEC_MAX_FRAME_SIZE) { - size = FEC_MAX_FRAME_SIZE; - flags |= FEC_BD_TR | FEC_BD_LG; + /* Huge frames are truncated. */ + if (size > ENET_MAX_FRAME_SIZE) { + size = ENET_MAX_FRAME_SIZE; + flags |= ENET_BD_TR | ENET_BD_LG; } /* Frames larger than the user limit just set error flags. */ - if (size > (s->rcr >> 16)) { - flags |= FEC_BD_LG; + if (size > (s->regs[ENET_RCR] >> 16)) { + flags |= ENET_BD_LG; } addr = s->rx_descriptor; while (size > 0) { imx_fec_read_bd(&bd, addr); - if ((bd.flags & FEC_BD_E) == 0) { + if ((bd.flags & ENET_BD_E) == 0) { /* No descriptors available. Bail out. */ /* * FIXME: This is wrong. We should probably either @@ -596,7 +982,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, TYPE_IMX_FEC, __func__); break; } - buf_len = (size <= s->emrbr) ? size : s->emrbr; + buf_len = (size <= s->regs[ENET_MRBR]) ? size : s->regs[ENET_MRBR]; bd.length = buf_len; size -= buf_len; @@ -614,99 +1000,232 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, crc_ptr, 4 - size); crc_ptr += 4 - size; } - bd.flags &= ~FEC_BD_E; + bd.flags &= ~ENET_BD_E; if (size == 0) { /* Last buffer in frame. */ - bd.flags |= flags | FEC_BD_L; + bd.flags |= flags | ENET_BD_L; FEC_PRINTF("rx frame flags %04x\n", bd.flags); - s->eir |= FEC_INT_RXF; + s->regs[ENET_EIR] |= ENET_INT_RXF; } else { - s->eir |= FEC_INT_RXB; + s->regs[ENET_EIR] |= ENET_INT_RXB; } imx_fec_write_bd(&bd, addr); /* Advance to the next descriptor. */ - if ((bd.flags & FEC_BD_W) != 0) { - addr = s->erdsr; + if ((bd.flags & ENET_BD_W) != 0) { + addr = s->regs[ENET_RDSR]; + } else { + addr += sizeof(bd); + } + } + s->rx_descriptor = addr; + imx_eth_enable_rx(s); + imx_eth_update(s); + return len; +} + +static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf, + size_t len) +{ + IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); + IMXENETBufDesc bd; + uint32_t flags = 0; + uint32_t addr; + uint32_t crc; + uint32_t buf_addr; + uint8_t *crc_ptr; + unsigned int buf_len; + size_t size = len; + + FEC_PRINTF("len %d\n", (int)size); + + if (!s->regs[ENET_RDAR]) { + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", + TYPE_IMX_FEC, __func__); + return 0; + } + + /* 4 bytes for the CRC. */ + size += 4; + crc = cpu_to_be32(crc32(~0, buf, size)); + crc_ptr = (uint8_t *) &crc; + + /* Huge frames are truncted. */ + if (size > ENET_MAX_FRAME_SIZE) { + size = ENET_MAX_FRAME_SIZE; + flags |= ENET_BD_TR | ENET_BD_LG; + } + + /* Frames larger than the user limit just set error flags. */ + if (size > (s->regs[ENET_RCR] >> 16)) { + flags |= ENET_BD_LG; + } + + addr = s->rx_descriptor; + while (size > 0) { + imx_enet_read_bd(&bd, addr); + if ((bd.flags & ENET_BD_E) == 0) { + /* No descriptors available. Bail out. */ + /* + * FIXME: This is wrong. We should probably either + * save the remainder for when more RX buffers are + * available, or flag an error. + */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Lost end of frame\n", + TYPE_IMX_FEC, __func__); + break; + } + buf_len = (size <= s->regs[ENET_MRBR]) ? size : s->regs[ENET_MRBR]; + bd.length = buf_len; + size -= buf_len; + + FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length); + + /* The last 4 bytes are the CRC. */ + if (size < 4) { + buf_len += size - 4; + } + buf_addr = bd.data; + dma_memory_write(&address_space_memory, buf_addr, buf, buf_len); + buf += buf_len; + if (size < 4) { + dma_memory_write(&address_space_memory, buf_addr + buf_len, + crc_ptr, 4 - size); + crc_ptr += 4 - size; + } + bd.flags &= ~ENET_BD_E; + if (size == 0) { + /* Last buffer in frame. */ + bd.flags |= flags | ENET_BD_L; + FEC_PRINTF("rx frame flags %04x\n", bd.flags); + if (bd.option & ENET_BD_RX_INT) { + s->regs[ENET_EIR] |= ENET_INT_RXF; + } + } else { + if (bd.option & ENET_BD_RX_INT) { + s->regs[ENET_EIR] |= ENET_INT_RXB; + } + } + imx_enet_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if ((bd.flags & ENET_BD_W) != 0) { + addr = s->regs[ENET_RDSR]; } else { - addr += 8; + addr += sizeof(bd); } } s->rx_descriptor = addr; - imx_fec_enable_rx(s); - imx_fec_update(s); + imx_eth_enable_rx(s); + imx_eth_update(s); return len; } -static const MemoryRegionOps imx_fec_ops = { - .read = imx_fec_read, - .write = imx_fec_write, +static ssize_t imx_eth_receive(NetClientState *nc, const uint8_t *buf, + size_t len) +{ + IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); + + if (!s->is_fec && (s->regs[ENET_ECR] & ENET_ECR_EN1588)) { + return imx_enet_receive(nc, buf, len); + } else { + return imx_fec_receive(nc, buf, len); + } +} + +static const MemoryRegionOps imx_eth_ops = { + .read = imx_eth_read, + .write = imx_eth_write, .valid.min_access_size = 4, .valid.max_access_size = 4, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_NATIVE_ENDIAN, }; -static void imx_fec_cleanup(NetClientState *nc) +static void imx_eth_cleanup(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); s->nic = NULL; } -static NetClientInfo net_imx_fec_info = { - .type = NET_CLIENT_OPTIONS_KIND_NIC, - .size = sizeof(NICState), - .can_receive = imx_fec_can_receive, - .receive = imx_fec_receive, - .cleanup = imx_fec_cleanup, - .link_status_changed = imx_fec_set_link, +static NetClientInfo imx_eth_net_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = imx_eth_can_receive, + .receive = imx_eth_receive, + .cleanup = imx_eth_cleanup, + .link_status_changed = imx_eth_set_link, }; -static void imx_fec_realize(DeviceState *dev, Error **errp) +static void imx_eth_realize(DeviceState *dev, Error **errp) { IMXFECState *s = IMX_FEC(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - memory_region_init_io(&s->iomem, OBJECT(dev), &imx_fec_ops, s, + memory_region_init_io(&s->iomem, OBJECT(dev), &imx_eth_ops, s, TYPE_IMX_FEC, 0x400); sysbus_init_mmio(sbd, &s->iomem); - sysbus_init_irq(sbd, &s->irq); + sysbus_init_irq(sbd, &s->irq[0]); + sysbus_init_irq(sbd, &s->irq[1]); + qemu_macaddr_default_if_unset(&s->conf.macaddr); s->conf.peers.ncs[0] = nd_table[0].netdev; - s->nic = qemu_new_nic(&net_imx_fec_info, &s->conf, - object_get_typename(OBJECT(dev)), DEVICE(dev)->id, - s); + s->nic = qemu_new_nic(&imx_eth_net_info, &s->conf, + object_get_typename(OBJECT(dev)), + DEVICE(dev)->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); } -static Property imx_fec_properties[] = { +static Property imx_eth_properties[] = { DEFINE_NIC_PROPERTIES(IMXFECState, conf), DEFINE_PROP_END_OF_LIST(), }; -static void imx_fec_class_init(ObjectClass *klass, void *data) +static void imx_eth_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - dc->vmsd = &vmstate_imx_fec; - dc->reset = imx_fec_reset; - dc->props = imx_fec_properties; - dc->realize = imx_fec_realize; - dc->desc = "i.MX FEC Ethernet Controller"; + dc->vmsd = &vmstate_imx_eth; + dc->reset = imx_eth_reset; + dc->props = imx_eth_properties; + dc->realize = imx_eth_realize; + dc->desc = "i.MX FEC/ENET Ethernet Controller"; +} + +static void imx_fec_init(Object *obj) +{ + IMXFECState *s = IMX_FEC(obj); + + s->is_fec = true; +} + +static void imx_enet_init(Object *obj) +{ + IMXFECState *s = IMX_FEC(obj); + + s->is_fec = false; } static const TypeInfo imx_fec_info = { - .name = TYPE_IMX_FEC, - .parent = TYPE_SYS_BUS_DEVICE, + .name = TYPE_IMX_FEC, + .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(IMXFECState), - .class_init = imx_fec_class_init, + .instance_init = imx_fec_init, + .class_init = imx_eth_class_init, +}; + +static const TypeInfo imx_enet_info = { + .name = TYPE_IMX_ENET, + .parent = TYPE_IMX_FEC, + .instance_init = imx_enet_init, }; -static void imx_fec_register_types(void) +static void imx_eth_register_types(void) { type_register_static(&imx_fec_info); + type_register_static(&imx_enet_info); } -type_init(imx_fec_register_types) +type_init(imx_eth_register_types) diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c index 740cd98ff1..cf8b8236df 100644 --- a/hw/net/mipsnet.c +++ b/hw/net/mipsnet.c @@ -83,6 +83,9 @@ static ssize_t mipsnet_receive(NetClientState *nc, const uint8_t *buf, size_t si if (!mipsnet_can_receive(nc)) return 0; + if (size >= sizeof(s->rx_buffer)) { + return 0; + } s->busy = 1; /* Just accept everything. */ diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c new file mode 100644 index 0000000000..1019b50c18 --- /dev/null +++ b/hw/net/net_rx_pkt.c @@ -0,0 +1,600 @@ +/* + * QEMU RX packets abstractions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "net_rx_pkt.h" +#include "net/checksum.h" +#include "net/tap.h" + +struct NetRxPkt { + struct virtio_net_hdr virt_hdr; + uint8_t ehdr_buf[sizeof(struct eth_header)]; + struct iovec *vec; + uint16_t vec_len_total; + uint16_t vec_len; + uint32_t tot_len; + uint16_t tci; + bool vlan_stripped; + bool has_virt_hdr; + eth_pkt_types_e packet_type; + + /* Analysis results */ + bool isip4; + bool isip6; + bool isudp; + bool istcp; + + size_t l3hdr_off; + size_t l4hdr_off; + size_t l5hdr_off; + + eth_ip6_hdr_info ip6hdr_info; + eth_ip4_hdr_info ip4hdr_info; + eth_l4_hdr_info l4hdr_info; +}; + +void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) +{ + struct NetRxPkt *p = g_malloc0(sizeof *p); + p->has_virt_hdr = has_virt_hdr; + p->vec = NULL; + p->vec_len_total = 0; + *pkt = p; +} + +void net_rx_pkt_uninit(struct NetRxPkt *pkt) +{ + if (pkt->vec_len_total != 0) { + g_free(pkt->vec); + } + + g_free(pkt); +} + +struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) +{ + assert(pkt); + return &pkt->virt_hdr; +} + +static inline void +net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, + int new_iov_len) +{ + if (pkt->vec_len_total < new_iov_len) { + g_free(pkt->vec); + pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); + pkt->vec_len_total = new_iov_len; + } +} + +static void +net_rx_pkt_pull_data(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t ploff) +{ + if (pkt->vlan_stripped) { + net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); + + pkt->vec[0].iov_base = pkt->ehdr_buf; + pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf); + + pkt->tot_len = + iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header); + + pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, + iov, iovcnt, ploff, pkt->tot_len); + } else { + net_rx_pkt_iovec_realloc(pkt, iovcnt); + + pkt->tot_len = iov_size(iov, iovcnt) - ploff; + pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, + iov, iovcnt, ploff, pkt->tot_len); + } + + eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); + + trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, + pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); +} + +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan) +{ + uint16_t tci = 0; + uint16_t ploff = iovoff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, + &ploff, &tci); + } + + pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); +} + +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet) +{ + uint16_t tci = 0; + uint16_t ploff = iovoff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, + pkt->ehdr_buf, + &ploff, &tci); + } + + pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); +} + +void net_rx_pkt_dump(struct NetRxPkt *pkt) +{ +#ifdef NET_RX_PKT_DEBUG + NetRxPkt *pkt = (NetRxPkt *)pkt; + assert(pkt); + + printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n", + pkt->tot_len, pkt->vlan_stripped, pkt->tci); +#endif +} + +void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, + eth_pkt_types_e packet_type) +{ + assert(pkt); + + pkt->packet_type = packet_type; + +} + +eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->packet_type; +} + +size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->tot_len; +} + +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, + size_t len) +{ + const struct iovec iov = { + .iov_base = (void *)data, + .iov_len = len + }; + + assert(pkt); + + eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); +} + +void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp) +{ + assert(pkt); + + *isip4 = pkt->isip4; + *isip6 = pkt->isip6; + *isudp = pkt->isudp; + *istcp = pkt->istcp; +} + +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l3hdr_off; +} + +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l4hdr_off; +} + +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l5hdr_off; +} + +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) +{ + return &pkt->ip6hdr_info; +} + +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) +{ + return &pkt->ip4hdr_info; +} + +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) +{ + return &pkt->l4hdr_info; +} + +static inline void +_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, + void *ptr, size_t size) +{ + memcpy(&rss_input[*bytes_written], ptr, size); + trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); + *bytes_written += size; +} + +static inline void +_net_rx_rss_prepare_ip4(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_src, sizeof(uint32_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_dst, sizeof(uint32_t)); +} + +static inline void +_net_rx_rss_prepare_ip6(uint8_t *rss_input, + struct NetRxPkt *pkt, + bool ipv6ex, size_t *bytes_written) +{ + eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src + : &ip6info->ip6_hdr.ip6_src, + sizeof(struct in6_address)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst + : &ip6info->ip6_hdr.ip6_dst, + sizeof(struct in6_address)); +} + +static inline void +_net_rx_rss_prepare_tcp(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_sport, sizeof(uint16_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_dport, sizeof(uint16_t)); +} + +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key) +{ + uint8_t rss_input[36]; + size_t rss_length = 0; + uint32_t rss_hash = 0; + net_toeplitz_key key_data; + + switch (type) { + case NetPktRssIpV4: + assert(pkt->isip4); + trace_net_rx_pkt_rss_ip4(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV4Tcp: + assert(pkt->isip4); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip4_tcp(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6Tcp: + assert(pkt->isip6); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip6_tcp(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); + break; + case NetPktRssIpV6Ex: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6_ex(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + break; + default: + assert(false); + break; + } + + net_toeplitz_key_init(&key_data, key); + net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); + + trace_net_rx_pkt_rss_hash(rss_length, rss_hash); + + return rss_hash; +} + +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->isip4) { + return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); + } + + return 0; +} + +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; + } + + return false; +} + +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return pkt->l4hdr_info.has_tcp_data; + } + + return false; +} + +struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec; +} + +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec_len; +} + +void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, + struct virtio_net_hdr *vhdr) +{ + assert(pkt); + + memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); +} + +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt) +{ + assert(pkt); + + iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); +} + +bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vlan_stripped; +} + +bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->has_virt_hdr; +} + +uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->tci; +} + +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint32_t cntr; + uint16_t csum; + uint32_t csl; + + trace_net_rx_pkt_l3_csum_validate_entry(); + + if (!pkt->isip4) { + trace_net_rx_pkt_l3_csum_validate_not_ip4(); + return false; + } + + csl = pkt->l4hdr_off - pkt->l3hdr_off; + + cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l3hdr_off, + csl, 0); + + csum = net_checksum_finish(cntr); + + *csum_valid = (csum == 0); + + trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, + cntr, csum, *csum_valid); + + return true; +} + +static uint16_t +_net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) +{ + uint32_t cntr; + uint16_t csum; + uint16_t csl; + uint32_t cso; + + trace_net_rx_pkt_l4_csum_calc_entry(); + + if (pkt->isip4) { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip4_udp(); + } else { + csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - + IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); + trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); + } + + cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, + csl, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } else { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip6_udp(); + } else { + struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; + size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; + size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + + csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - + ip6opts_len; + trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); + } + + cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, + pkt->ip6hdr_info.l4proto, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } + + cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l4hdr_off, csl, cso); + + csum = net_checksum_finish(cntr); + + trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); + + return csum; +} + +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint16_t csum; + + trace_net_rx_pkt_l4_csum_validate_entry(); + + if (!pkt->istcp && !pkt->isudp) { + trace_net_rx_pkt_l4_csum_validate_not_xxp(); + return false; + } + + if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { + trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); + return false; + } + + csum = _net_rx_pkt_calc_l4_csum(pkt); + + *csum_valid = ((csum == 0) || (csum == 0xFFFF)); + + trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); + + return true; +} + +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) +{ + uint16_t csum = 0; + uint32_t l4_cso; + + trace_net_rx_pkt_l4_csum_fix_entry(); + + if (pkt->istcp) { + l4_cso = offsetof(struct tcp_header, th_sum); + trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); + } else if (pkt->isudp) { + if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { + trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); + return false; + } + l4_cso = offsetof(struct udp_header, uh_sum); + trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); + } else { + trace_net_rx_pkt_l4_csum_fix_not_xxp(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); + return false; + } + + /* Set zero to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + /* Calculate L4 checksum */ + csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); + + /* Set calculated checksum to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); + + return true; +} diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h new file mode 100644 index 0000000000..7adf0fad51 --- /dev/null +++ b/hw/net/net_rx_pkt.h @@ -0,0 +1,363 @@ +/* + * QEMU RX packets abstraction + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef NET_RX_PKT_H +#define NET_RX_PKT_H + +#include "net/eth.h" + +/* defines to enable packet dump functions */ +/*#define NET_RX_PKT_DEBUG*/ + +struct NetRxPkt; + +/** + * Clean all rx packet resources + * + * @pkt: packet + * + */ +void net_rx_pkt_uninit(struct NetRxPkt *pkt); + +/** + * Init function for rx packet functionality + * + * @pkt: packet pointer + * @has_virt_hdr: device uses virtio header + * + */ +void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr); + +/** + * returns total length of data attached to rx context + * + * @pkt: packet + * + * Return: nothing + * + */ +size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt); + +/** + * parse and set packet analysis results + * + * @pkt: packet + * @data: pointer to the data buffer to be parsed + * @len: data length + * + */ +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, + size_t len); + +/** + * fetches packet analysis results + * + * @pkt: packet + * @isip4: whether the packet given is IPv4 + * @isip6: whether the packet given is IPv6 + * @isudp: whether the packet given is UDP + * @istcp: whether the packet given is TCP + * + */ +void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp); + +/** +* fetches L3 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L4 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L5 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt); + +/** + * fetches IP6 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt); + +/** + * fetches IP4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt); + +/** + * fetches L4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt); + +typedef enum { + NetPktRssIpV4, + NetPktRssIpV4Tcp, + NetPktRssIpV6Tcp, + NetPktRssIpV6, + NetPktRssIpV6Ex +} NetRxPktRssType; + +/** +* calculates RSS hash for packet +* +* @pkt: packet +* @type: RSS hash type +* +* Return: Toeplitz RSS hash. +* +*/ +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key); + +/** +* fetches IP identification for the packet +* +* @pkt: packet +* +*/ +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt); + +/** +* check if given packet is a TCP ACK packet +* +* @pkt: packet +* +*/ +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt); + +/** +* check if given packet contains TCP data +* +* @pkt: packet +* +*/ +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt); + +/** + * returns virtio header stored in rx context + * + * @pkt: packet + * @ret: virtio header + * + */ +struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt); + +/** + * returns packet type + * + * @pkt: packet + * @ret: packet type + * + */ +eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt); + +/** + * returns vlan tag + * + * @pkt: packet + * @ret: VLAN tag + * + */ +uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt); + +/** + * tells whether vlan was stripped from the packet + * + * @pkt: packet + * @ret: VLAN stripped sign + * + */ +bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt); + +/** + * notifies caller if the packet has virtio header + * + * @pkt: packet + * @ret: true if packet has virtio header, false otherwize + * + */ +bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); + +/** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* +*/ +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, + int iovcnt, size_t iovoff, + bool strip_vlan); + +/** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* @vet: VLAN tag Ethernet type +* +*/ +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet); + +/** + * attach data to rx packet + * + * @pkt: packet + * @data: pointer to the data buffer + * @len: data length + * @strip_vlan: should the module strip vlan from data + * + */ +static inline void +net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan) +{ + const struct iovec iov = { + .iov_base = (void *) data, + .iov_len = len + }; + + net_rx_pkt_attach_iovec(pkt, &iov, 1, 0, strip_vlan); +} + +/** + * returns io vector that holds the attached data + * + * @pkt: packet + * @ret: pointer to IOVec + * + */ +struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt); + +/** +* returns io vector length that holds the attached data +* +* @pkt: packet +* @ret: IOVec length +* +*/ +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt); + +/** + * prints rx packet data if debug is enabled + * + * @pkt: packet + * + */ +void net_rx_pkt_dump(struct NetRxPkt *pkt); + +/** + * copy passed vhdr data to packet context + * + * @pkt: packet + * @vhdr: VHDR buffer + * + */ +void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, + struct virtio_net_hdr *vhdr); + +/** +* copy passed vhdr data to packet context +* +* @pkt: packet +* @iov: VHDR iov +* @iovcnt: VHDR iov array size +* +*/ +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt); + +/** + * save packet type in packet context + * + * @pkt: packet + * @packet_type: the packet type + * + */ +void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, + eth_pkt_types_e packet_type); + +/** +* validate TCP/UDP checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* validate IPv4 checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* fix IPv4 checksum of the packet +* +* @pkt: packet +* @ret: true if checksum was fixed, false in case packet is +* not TCP/UDP or checksum correction is not possible +* +*/ +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt); + +#endif diff --git a/hw/net/vmxnet_tx_pkt.c b/hw/net/net_tx_pkt.c index 91e1e08fd9..e4478bead8 100644 --- a/hw/net/vmxnet_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -1,5 +1,5 @@ /* - * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstractions + * QEMU TX packets abstractions * * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) * @@ -15,25 +15,24 @@ * */ -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "vmxnet_tx_pkt.h" +#include "net_tx_pkt.h" #include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" #include "net/net.h" +#include "hw/pci/pci.h" enum { - VMXNET_TX_PKT_VHDR_FRAG = 0, - VMXNET_TX_PKT_L2HDR_FRAG, - VMXNET_TX_PKT_L3HDR_FRAG, - VMXNET_TX_PKT_PL_START_FRAG + NET_TX_PKT_VHDR_FRAG = 0, + NET_TX_PKT_L2HDR_FRAG, + NET_TX_PKT_L3HDR_FRAG, + NET_TX_PKT_PL_START_FRAG }; /* TX packet private context */ -struct VmxnetTxPkt { +struct NetTxPkt { + PCIDevice *pci_dev; + struct virtio_net_hdr virt_hdr; bool has_virt_hdr; @@ -44,6 +43,7 @@ struct VmxnetTxPkt { struct iovec *vec; uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN]; uint32_t payload_len; @@ -53,32 +53,35 @@ struct VmxnetTxPkt { uint16_t hdr_len; eth_pkt_types_e packet_type; uint8_t l4proto; + + bool is_loopback; }; -void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr) +void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, + uint32_t max_frags, bool has_virt_hdr) { - struct VmxnetTxPkt *p = g_malloc0(sizeof *p); + struct NetTxPkt *p = g_malloc0(sizeof *p); + + p->pci_dev = pci_dev; p->vec = g_malloc((sizeof *p->vec) * - (max_frags + VMXNET_TX_PKT_PL_START_FRAG)); + (max_frags + NET_TX_PKT_PL_START_FRAG)); p->raw = g_malloc((sizeof *p->raw) * max_frags); p->max_payload_frags = max_frags; p->max_raw_frags = max_frags; p->has_virt_hdr = has_virt_hdr; - p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; - p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_len = + p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; + p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = p->has_virt_hdr ? sizeof p->virt_hdr : 0; - p->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; - p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len = 0; + p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; *pkt = p; } -void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt) +void net_tx_pkt_uninit(struct NetTxPkt *pkt) { if (pkt) { g_free(pkt->vec); @@ -87,49 +90,63 @@ void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt) } } -void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt) +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) { uint16_t csum; - uint32_t ph_raw_csum; assert(pkt); - uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; struct ip_header *ip_hdr; + ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && - VIRTIO_NET_HDR_GSO_UDP != gso_type) { - return; - } + ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); - ip_hdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; + ip_hdr->ip_sum = 0; + csum = net_raw_checksum((uint8_t *)ip_hdr, + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); + ip_hdr->ip_sum = cpu_to_be16(csum); +} + +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +{ + uint16_t csum; + uint32_t cntr, cso; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - if (pkt->payload_len + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len > + if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > ETH_MAX_IP_DGRAM_LEN) { return; } - ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); - - /* Calculate IP header checksum */ - ip_hdr->ip_sum = 0; - csum = net_raw_checksum((uint8_t *)ip_hdr, - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); - ip_hdr->ip_sum = cpu_to_be16(csum); + if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || + gso_type == VIRTIO_NET_HDR_GSO_UDP) { + /* Calculate IP header checksum */ + net_tx_pkt_update_ip_hdr_checksum(pkt); + + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, + IP_PROTO_TCP, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else { + return; + } - /* Calculate IP pseudo header checksum */ - ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); - csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); - iov_from_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); } -static void vmxnet_tx_pkt_calculate_hdr_len(struct VmxnetTxPkt *pkt) +static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) { - pkt->hdr_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len + - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; + pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; } -static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) +static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) { struct iovec *l2_hdr, *l3_hdr; size_t bytes_read; @@ -138,8 +155,8 @@ static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) assert(pkt); - l2_hdr = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; - l3_hdr = &pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG]; + l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; + l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG]; bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, ETH_MAX_L2_HDR_LEN); @@ -160,15 +177,19 @@ static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) if (bytes_read < l2_hdr->iov_len) { l2_hdr->iov_len = 0; + l3_hdr->iov_len = 0; + pkt->packet_type = ETH_PKT_UCAST; return false; + } else { + l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; + l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); } - l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); switch (l3_proto) { case ETH_P_IP: - l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, sizeof(struct ip_header)); @@ -178,27 +199,45 @@ static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) } l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); - pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; - /* copy optional IPv4 header data */ - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, - l2_hdr->iov_len + sizeof(struct ip_header), - l3_hdr->iov_base + sizeof(struct ip_header), - l3_hdr->iov_len - sizeof(struct ip_header)); - if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + if (l3_hdr->iov_len < sizeof(struct ip_header)) { l3_hdr->iov_len = 0; return false; } + + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { + /* copy optional IPv4 header data if any*/ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + } + break; case ETH_P_IPV6: + { + eth_ip6_hdr_info hdrinfo; + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - &pkt->l4proto, &full_ip6hdr_len)) { + &hdrinfo)) { l3_hdr->iov_len = 0; return false; } - l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + pkt->l4proto = hdrinfo.l4proto; + full_ip6hdr_len = hdrinfo.full_hdr_len; + + if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { + l3_hdr->iov_len = 0; + return false; + } bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, full_ip6hdr_len); @@ -210,67 +249,62 @@ static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) l3_hdr->iov_len = full_ip6hdr_len; } break; - + } default: l3_hdr->iov_len = 0; break; } - vmxnet_tx_pkt_calculate_hdr_len(pkt); - pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); + net_tx_pkt_calculate_hdr_len(pkt); return true; } -static bool vmxnet_tx_pkt_rebuild_payload(struct VmxnetTxPkt *pkt) +static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) { - size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; - - pkt->payload_frags = iov_copy(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], + pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; + pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->max_payload_frags, pkt->raw, pkt->raw_frags, - pkt->hdr_len, payload_len); + pkt->hdr_len, pkt->payload_len); +} - if (pkt->payload_frags != (uint32_t) -1) { - pkt->payload_len = payload_len; +bool net_tx_pkt_parse(struct NetTxPkt *pkt) +{ + if (net_tx_pkt_parse_headers(pkt)) { + net_tx_pkt_rebuild_payload(pkt); return true; } else { return false; } } -bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt) -{ - return vmxnet_tx_pkt_parse_headers(pkt) && - vmxnet_tx_pkt_rebuild_payload(pkt); -} - -struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt) +struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) { assert(pkt); return &pkt->virt_hdr; } -static uint8_t vmxnet_tx_pkt_get_gso_type(struct VmxnetTxPkt *pkt, +static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, bool tso_enable) { uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; uint16_t l3_proto; - l3_proto = eth_get_l3_proto(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, - pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len); + l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); if (!tso_enable) { goto func_exit; } - rc = eth_get_gso_type(l3_proto, pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base, + rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, pkt->l4proto); func_exit: return rc; } -void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, +void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, bool csum_enable, uint32_t gso_size) { struct tcp_hdr l4hdr; @@ -279,7 +313,7 @@ void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, /* csum has to be enabled if tso is. */ assert(csum_enable || !tso_enable); - pkt->virt_hdr.gso_type = vmxnet_tx_pkt_get_gso_type(pkt, tso_enable); + pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable); switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_NONE: @@ -288,16 +322,16 @@ void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, break; case VIRTIO_NET_HDR_GSO_UDP: - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); break; case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_TCPV6: - iov_to_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr)); pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; break; default: @@ -322,23 +356,24 @@ void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, } } -void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan) +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype) { bool is_new; assert(pkt); - eth_setup_vlan_headers(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, - vlan, &is_new); + eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, vlan_ethtype, &is_new); /* update l2hdrlen */ if (is_new) { pkt->hdr_len += sizeof(struct vlan_header); - pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len += + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len += sizeof(struct vlan_header); } } -bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, size_t len) { hwaddr mapped_len = 0; @@ -353,44 +388,50 @@ bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, ventry = &pkt->raw[pkt->raw_frags]; mapped_len = len; - ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); - ventry->iov_len = mapped_len; - pkt->raw_frags += !!ventry->iov_base; + ventry->iov_base = pci_dma_map(pkt->pci_dev, pa, + &mapped_len, DMA_DIRECTION_TO_DEVICE); - if ((ventry->iov_base == NULL) || (len != mapped_len)) { + if ((ventry->iov_base != NULL) && (len == mapped_len)) { + ventry->iov_len = mapped_len; + pkt->raw_frags++; + return true; + } else { return false; } +} - return true; +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) +{ + return pkt->raw_frags > 0; } -eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt) +eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) { assert(pkt); return pkt->packet_type; } -size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt) +size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt) { assert(pkt); return pkt->hdr_len + pkt->payload_len; } -void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt) +void net_tx_pkt_dump(struct NetTxPkt *pkt) { -#ifdef VMXNET_TX_PKT_DEBUG +#ifdef NET_TX_PKT_DEBUG assert(pkt); printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, - pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len, - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); #endif } -void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt) +void net_tx_pkt_reset(struct NetTxPkt *pkt) { int i; @@ -401,38 +442,31 @@ void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt) memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); - g_free(pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base); - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - assert(pkt->vec); - for (i = VMXNET_TX_PKT_L2HDR_FRAG; - i < pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG; i++) { - pkt->vec[i].iov_len = 0; - } + pkt->payload_len = 0; pkt->payload_frags = 0; assert(pkt->raw); for (i = 0; i < pkt->raw_frags; i++) { assert(pkt->raw[i].iov_base); - cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, - false, pkt->raw[i].iov_len); - pkt->raw[i].iov_len = 0; + pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, pkt->raw[i].iov_len, + DMA_DIRECTION_TO_DEVICE, 0); } pkt->raw_frags = 0; pkt->hdr_len = 0; - pkt->packet_type = 0; pkt->l4proto = 0; } -static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt) +static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) { - struct iovec *iov = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; + struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; uint32_t csum_cntr; uint16_t csum = 0; + uint32_t cso; /* num of iovec without vhdr */ - uint32_t iov_len = pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG - 1; + uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; uint16_t csl; struct ip_header *iphdr; size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; @@ -443,12 +477,13 @@ static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt) /* Calculate L4 TCP/UDP checksum */ csl = pkt->payload_len; - /* data checksum */ - csum_cntr = - net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); /* add pseudo header to csum */ - iphdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; - csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso); + + /* data checksum */ + csum_cntr += + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); /* Put the checksum obtained into the packet */ csum = cpu_to_be16(net_checksum_finish(csum_cntr)); @@ -456,37 +491,37 @@ static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt) } enum { - VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, - VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS, - VMXNET_TX_PKT_FRAGMENT_HEADER_NUM + NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, + NET_TX_PKT_FRAGMENT_L3_HDR_POS, + NET_TX_PKT_FRAGMENT_HEADER_NUM }; -#define VMXNET_MAX_FRAG_SG_LIST (64) +#define NET_MAX_FRAG_SG_LIST (64) -static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt *pkt, +static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) { size_t fetched = 0; struct iovec *src = pkt->vec; - *dst_idx = VMXNET_TX_PKT_FRAGMENT_HEADER_NUM; + *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; - while (fetched < pkt->virt_hdr.gso_size) { + while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) { /* no more place in fragment iov */ - if (*dst_idx == VMXNET_MAX_FRAG_SG_LIST) { + if (*dst_idx == NET_MAX_FRAG_SG_LIST) { break; } /* no more data in iovec */ - if (*src_idx == (pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG)) { + if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) { break; } dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, - pkt->virt_hdr.gso_size - fetched); + IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size) - fetched); *src_offset += dst[*dst_idx].iov_len; fetched += dst[*dst_idx].iov_len; @@ -502,35 +537,45 @@ static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt *pkt, return fetched; } -static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt, +static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt, + NetClientState *nc, const struct iovec *iov, int iov_cnt) +{ + if (pkt->is_loopback) { + nc->info->receive_iov(nc, iov, iov_cnt); + } else { + qemu_sendv_packet(nc, iov, iov_cnt); + } +} + +static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, NetClientState *nc) { - struct iovec fragment[VMXNET_MAX_FRAG_SG_LIST]; + struct iovec fragment[NET_MAX_FRAG_SG_LIST]; size_t fragment_len = 0; bool more_frags = false; /* some pointers for shorter code */ void *l2_iov_base, *l3_iov_base; size_t l2_iov_len, l3_iov_len; - int src_idx = VMXNET_TX_PKT_PL_START_FRAG, dst_idx; + int src_idx = NET_TX_PKT_PL_START_FRAG, dst_idx; size_t src_offset = 0; size_t fragment_offset = 0; - l2_iov_base = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base; - l2_iov_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len; - l3_iov_base = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; - l3_iov_len = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; + l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base; + l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len; + l3_iov_base = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; /* Copy headers */ - fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; - fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; - fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; - fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; + fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; + fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; + fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; + fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; /* Put as much data as possible and send */ do { - fragment_len = vmxnet_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, + fragment_len = net_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, fragment, &dst_idx); more_frags = (fragment_offset + fragment_len < pkt->payload_len); @@ -540,7 +585,7 @@ static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt, eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); - qemu_sendv_packet(nc, fragment, dst_idx); + net_tx_pkt_sendv(pkt, nc, fragment, dst_idx); fragment_offset += fragment_len; @@ -549,13 +594,13 @@ static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt, return true; } -bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc) +bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) { assert(pkt); if (!pkt->has_virt_hdr && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - vmxnet_tx_pkt_do_sw_csum(pkt); + net_tx_pkt_do_sw_csum(pkt); } /* @@ -565,17 +610,28 @@ bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc) if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { if (pkt->payload_len > ETH_MAX_IP_DGRAM_LEN - - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len) { + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { return false; } } if (pkt->has_virt_hdr || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { - qemu_sendv_packet(nc, pkt->vec, - pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG); + net_tx_pkt_sendv(pkt, nc, pkt->vec, + pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); return true; } - return vmxnet_tx_pkt_do_sw_fragmentation(pkt, nc); + return net_tx_pkt_do_sw_fragmentation(pkt, nc); +} + +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc) +{ + bool res; + + pkt->is_loopback = true; + res = net_tx_pkt_send(pkt, nc); + pkt->is_loopback = false; + + return res; } diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h new file mode 100644 index 0000000000..07b9a2098b --- /dev/null +++ b/hw/net/net_tx_pkt.h @@ -0,0 +1,191 @@ +/* + * QEMU TX packets abstraction + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Tamir Shomer <tamirs@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef NET_TX_PKT_H +#define NET_TX_PKT_H + +#include "qemu/osdep.h" +#include "net/eth.h" +#include "exec/hwaddr.h" + +/* define to enable packet dump functions */ +/*#define NET_TX_PKT_DEBUG*/ + +struct NetTxPkt; + +/** + * Init function for tx packet functionality + * + * @pkt: packet pointer + * @pci_dev: PCI device processing this packet + * @max_frags: max tx ip fragments + * @has_virt_hdr: device uses virtio header. + */ +void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, + uint32_t max_frags, bool has_virt_hdr); + +/** + * Clean all tx packet resources. + * + * @pkt: packet. + */ +void net_tx_pkt_uninit(struct NetTxPkt *pkt); + +/** + * get virtio header + * + * @pkt: packet + * @ret: virtio header + */ +struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt); + +/** + * build virtio header (will be stored in module context) + * + * @pkt: packet + * @tso_enable: TSO enabled + * @csum_enable: CSO enabled + * @gso_size: MSS size for TSO + * + */ +void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, + bool csum_enable, uint32_t gso_size); + +/** +* updates vlan tag, and adds vlan header with custom ethernet type +* in case it is missing. +* +* @pkt: packet +* @vlan: VLAN tag +* @vlan_ethtype: VLAN header Ethernet type +* +*/ +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype); + +/** +* updates vlan tag, and adds vlan header in case it is missing +* +* @pkt: packet +* @vlan: VLAN tag +* +*/ +static inline void +net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +{ + net_tx_pkt_setup_vlan_header_ex(pkt, vlan, ETH_P_VLAN); +} + +/** + * populate data fragment into pkt context. + * + * @pkt: packet + * @pa: physical address of fragment + * @len: length of fragment + * + */ +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, + size_t len); + +/** + * Fix ip header fields and calculate IP header and pseudo header checksums. + * + * @pkt: packet + * + */ +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt); + +/** + * Calculate the IP header checksum. + * + * @pkt: packet + * + */ +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt); + +/** + * get length of all populated data. + * + * @pkt: packet + * @ret: total data length + * + */ +size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt); + +/** + * get packet type + * + * @pkt: packet + * @ret: packet type + * + */ +eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt); + +/** + * prints packet data if debug is enabled + * + * @pkt: packet + * + */ +void net_tx_pkt_dump(struct NetTxPkt *pkt); + +/** + * reset tx packet private context (needed to be called between packets) + * + * @pkt: packet + * + */ +void net_tx_pkt_reset(struct NetTxPkt *pkt); + +/** + * Send packet to qemu. handles sw offloads if vhdr is not supported. + * + * @pkt: packet + * @nc: NetClientState + * @ret: operation result + * + */ +bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); + +/** +* Redirect packet directly to receive path (emulate loopback phy). +* Handles sw offloads if vhdr is not supported. +* +* @pkt: packet +* @nc: NetClientState +* @ret: operation result +* +*/ +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc); + +/** + * parse raw packet data and analyze offload requirements. + * + * @pkt: packet + * + */ +bool net_tx_pkt_parse(struct NetTxPkt *pkt); + +/** +* indicates if there are data fragments held by this packet object. +* +* @pkt: packet +* +*/ +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt); + +#endif diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 1e5ec149fa..562c1fded2 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -1867,11 +1867,6 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor) return 1; } -/* structures and macros for task offloading */ -#define TCP_HEADER_DATA_OFFSET(tcp) (((be16_to_cpu(tcp->th_offset_flags) >> 12)&0xf) << 2) -#define TCP_FLAGS_ONLY(flags) ((flags)&0x3f) -#define TCP_HEADER_FLAGS(tcp) TCP_FLAGS_ONLY(be16_to_cpu(tcp->th_offset_flags)) - #define TCP_HEADER_CLEAR_FLAGS(tcp, off) ((tcp)->th_offset_flags &= cpu_to_be16(~TCP_FLAGS_ONLY(off))) /* produces ones' complement sum of data */ diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 20f26b7d89..16645e6c23 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -30,8 +30,8 @@ #include "vmxnet3.h" #include "vmxnet_debug.h" #include "vmware_utils.h" -#include "vmxnet_tx_pkt.h" -#include "vmxnet_rx_pkt.h" +#include "net_tx_pkt.h" +#include "net_rx_pkt.h" #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 #define VMXNET3_MSIX_BAR_SIZE 0x2000 @@ -314,13 +314,13 @@ typedef struct { bool peer_has_vhdr; /* TX packets to QEMU interface */ - struct VmxnetTxPkt *tx_pkt; + struct NetTxPkt *tx_pkt; uint32_t offload_mode; uint32_t cso_or_gso_size; uint16_t tci; bool needs_vlan; - struct VmxnetRxPkt *rx_pkt; + struct NetRxPkt *rx_pkt; bool tx_sop; bool skip_current_tx_pkt; @@ -474,7 +474,7 @@ static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l) s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0); s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1); - VMW_CFPRN("Variable MAC: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a)); + VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); } @@ -546,18 +546,18 @@ vmxnet3_setup_tx_offloads(VMXNET3State *s) { switch (s->offload_mode) { case VMXNET3_OM_NONE: - vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); + net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); break; case VMXNET3_OM_CSUM: - vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); + net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); VMW_PKPRN("L4 CSO requested\n"); break; case VMXNET3_OM_TSO: - vmxnet_tx_pkt_build_vheader(s->tx_pkt, true, true, + net_tx_pkt_build_vheader(s->tx_pkt, true, true, s->cso_or_gso_size); - vmxnet_tx_pkt_update_ip_checksums(s->tx_pkt); + net_tx_pkt_update_ip_checksums(s->tx_pkt); VMW_PKPRN("GSO offload requested."); break; @@ -590,12 +590,12 @@ static void vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx, Vmxnet3PktStatus status) { - size_t tot_len = vmxnet_tx_pkt_get_total_len(s->tx_pkt); + size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt); struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats; switch (status) { case VMXNET3_PKT_STATUS_OK: - switch (vmxnet_tx_pkt_get_packet_type(s->tx_pkt)) { + switch (net_tx_pkt_get_packet_type(s->tx_pkt)) { case ETH_PKT_BCAST: stats->bcastPktsTxOK++; stats->bcastBytesTxOK += tot_len; @@ -643,7 +643,7 @@ vmxnet3_on_rx_done_update_stats(VMXNET3State *s, Vmxnet3PktStatus status) { struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats; - size_t tot_len = vmxnet_rx_pkt_get_total_len(s->rx_pkt); + size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt); switch (status) { case VMXNET3_PKT_STATUS_OUT_OF_BUF: @@ -654,7 +654,7 @@ vmxnet3_on_rx_done_update_stats(VMXNET3State *s, stats->pktsRxError++; break; case VMXNET3_PKT_STATUS_OK: - switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) { + switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { case ETH_PKT_BCAST: stats->bcastPktsRxOK++; stats->bcastBytesRxOK += tot_len; @@ -715,10 +715,10 @@ vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx) } /* debug prints */ - vmxnet3_dump_virt_hdr(vmxnet_tx_pkt_get_vhdr(s->tx_pkt)); - vmxnet_tx_pkt_dump(s->tx_pkt); + vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt)); + net_tx_pkt_dump(s->tx_pkt); - if (!vmxnet_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { + if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { status = VMXNET3_PKT_STATUS_DISCARD; goto func_exit; } @@ -746,7 +746,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE; data_pa = le64_to_cpu(txd.addr); - if (!vmxnet_tx_pkt_add_raw_fragment(s->tx_pkt, + if (!net_tx_pkt_add_raw_fragment(s->tx_pkt, data_pa, data_len)) { s->skip_current_tx_pkt = true; @@ -759,9 +759,9 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) } if (txd.eop) { - if (!s->skip_current_tx_pkt && vmxnet_tx_pkt_parse(s->tx_pkt)) { + if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) { if (s->needs_vlan) { - vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); + net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); } vmxnet3_send_packet(s, qidx); @@ -773,7 +773,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) vmxnet3_complete_packet(s, qidx, txd_idx); s->tx_sop = true; s->skip_current_tx_pkt = false; - vmxnet_tx_pkt_reset(s->tx_pkt); + net_tx_pkt_reset(s->tx_pkt); } } } @@ -802,7 +802,9 @@ vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen) hwaddr daddr = vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring); - cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + pci_dma_read(PCI_DEVICE(s), daddr, + &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring); if (rxcd.gen != ring_gen) { @@ -928,7 +930,7 @@ vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head, * in the case the host OS performs forwarding, it will forward an * incorrectly checksummed packet. */ -static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, +static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt, const void *pkt_data, size_t pkt_len) { @@ -937,16 +939,16 @@ static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, uint8_t *data; int len; - if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) { + if (!net_rx_pkt_has_virt_hdr(pkt)) { return; } - vhdr = vmxnet_rx_pkt_get_vhdr(pkt); + vhdr = net_rx_pkt_get_vhdr(pkt); if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) { return; } - vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); if (!(isip4 || isip6) || !(istcp || isudp)) { return; } @@ -970,7 +972,7 @@ static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; } -static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, +static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt, struct Vmxnet3_RxCompDesc *rxcd) { int csum_ok, is_gso; @@ -978,16 +980,16 @@ static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, struct virtio_net_hdr *vhdr; uint8_t offload_type; - if (vmxnet_rx_pkt_is_vlan_stripped(pkt)) { + if (net_rx_pkt_is_vlan_stripped(pkt)) { rxcd->ts = 1; - rxcd->tci = vmxnet_rx_pkt_get_vlan_tag(pkt); + rxcd->tci = net_rx_pkt_get_vlan_tag(pkt); } - if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) { + if (!net_rx_pkt_has_virt_hdr(pkt)) { goto nocsum; } - vhdr = vmxnet_rx_pkt_get_vhdr(pkt); + vhdr = net_rx_pkt_get_vhdr(pkt); /* * Checksum is valid when lower level tell so or when lower level * requires checksum offload telling that packet produced/bridged @@ -1004,7 +1006,7 @@ static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, goto nocsum; } - vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); if ((!istcp && !isudp) || (!isip4 && !isip6)) { goto nocsum; } @@ -1023,10 +1025,11 @@ nocsum: } static void -vmxnet3_physical_memory_writev(const struct iovec *iov, - size_t start_iov_off, - hwaddr target_addr, - size_t bytes_to_copy) +vmxnet3_pci_dma_writev(PCIDevice *pci_dev, + const struct iovec *iov, + size_t start_iov_off, + hwaddr target_addr, + size_t bytes_to_copy) { size_t curr_off = 0; size_t copied = 0; @@ -1036,9 +1039,9 @@ vmxnet3_physical_memory_writev(const struct iovec *iov, size_t chunk_len = MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy); - cpu_physical_memory_write(target_addr + copied, - iov->iov_base + start_iov_off - curr_off, - chunk_len); + pci_dma_write(pci_dev, target_addr + copied, + iov->iov_base + start_iov_off - curr_off, + chunk_len); copied += chunk_len; start_iov_off += chunk_len; @@ -1063,13 +1066,13 @@ vmxnet3_indicate_packet(VMXNET3State *s) uint32_t new_rxcd_gen = VMXNET3_INIT_GEN; hwaddr new_rxcd_pa = 0; hwaddr ready_rxcd_pa = 0; - struct iovec *data = vmxnet_rx_pkt_get_iovec(s->rx_pkt); + struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt); size_t bytes_copied = 0; - size_t bytes_left = vmxnet_rx_pkt_get_total_len(s->rx_pkt); + size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt); uint16_t num_frags = 0; size_t chunk_size; - vmxnet_rx_pkt_dump(s->rx_pkt); + net_rx_pkt_dump(s->rx_pkt); while (bytes_left > 0) { @@ -1088,15 +1091,15 @@ vmxnet3_indicate_packet(VMXNET3State *s) } chunk_size = MIN(bytes_left, rxd.len); - vmxnet3_physical_memory_writev(data, bytes_copied, - le64_to_cpu(rxd.addr), chunk_size); + vmxnet3_pci_dma_writev(PCI_DEVICE(s), data, bytes_copied, + le64_to_cpu(rxd.addr), chunk_size); bytes_copied += chunk_size; bytes_left -= chunk_size; vmxnet3_dump_rx_descr(&rxd); if (ready_rxcd_pa != 0) { - cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + pci_dma_write(PCI_DEVICE(s), ready_rxcd_pa, &rxcd, sizeof(rxcd)); } memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc)); @@ -1127,7 +1130,8 @@ vmxnet3_indicate_packet(VMXNET3State *s) if (ready_rxcd_pa != 0) { rxcd.eop = 1; rxcd.err = (bytes_left != 0); - cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + + pci_dma_write(PCI_DEVICE(s), ready_rxcd_pa, &rxcd, sizeof(rxcd)); /* Flush RX descriptor changes */ smp_wmb(); @@ -1219,16 +1223,16 @@ static void vmxnet3_reset_interrupt_states(VMXNET3State *s) static void vmxnet3_reset_mac(VMXNET3State *s) { memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a)); - VMW_CFPRN("MAC address set to: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a)); + VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); } static void vmxnet3_deactivate_device(VMXNET3State *s) { if (s->device_active) { VMW_CBPRN("Deactivating vmxnet3..."); - vmxnet_tx_pkt_reset(s->tx_pkt); - vmxnet_tx_pkt_uninit(s->tx_pkt); - vmxnet_rx_pkt_uninit(s->rx_pkt); + net_tx_pkt_reset(s->tx_pkt); + net_tx_pkt_uninit(s->tx_pkt); + net_rx_pkt_uninit(s->rx_pkt); s->device_active = false; } } @@ -1298,10 +1302,11 @@ static void vmxnet3_update_mcast_filters(VMXNET3State *s) VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.rxFilterConf.mfTablePA); - cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes); + pci_dma_read(PCI_DEVICE(s), mcast_list_pa, s->mcast_list, list_bytes); + VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); for (i = 0; i < s->mcast_list_len; i++) { - VMW_CFPRN("\t" VMXNET_MF, VMXNET_MA(s->mcast_list[i].a)); + VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a)); } } } @@ -1328,15 +1333,17 @@ static void vmxnet3_fill_stats(VMXNET3State *s) return; for (i = 0; i < s->txq_num; i++) { - cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa, - &s->txq_descr[i].txq_stats, - sizeof(s->txq_descr[i].txq_stats)); + pci_dma_write(PCI_DEVICE(s), + s->txq_descr[i].tx_stats_pa, + &s->txq_descr[i].txq_stats, + sizeof(s->txq_descr[i].txq_stats)); } for (i = 0; i < s->rxq_num; i++) { - cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa, - &s->rxq_descr[i].rxq_stats, - sizeof(s->rxq_descr[i].rxq_stats)); + pci_dma_write(PCI_DEVICE(s), + s->rxq_descr[i].rx_stats_pa, + &s->rxq_descr[i].rxq_stats, + sizeof(s->rxq_descr[i].rxq_stats)); } } @@ -1558,8 +1565,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Preallocate TX packet wrapper */ VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); - vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); - vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), + s->max_tx_frags, s->peer_has_vhdr); + net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); /* Read rings memory locations for RX queues */ for (i = 0; i < s->rxq_num; i++) { @@ -1965,7 +1973,7 @@ vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data, return false; } - switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) { + switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { case ETH_PKT_UCAST: if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) { return false; @@ -2013,7 +2021,7 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) } if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); buf += sizeof(struct virtio_net_hdr); size -= sizeof(struct virtio_net_hdr); } @@ -2026,13 +2034,13 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, + net_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); if (vmxnet3_rx_filter_may_indicate(s, buf, size)) { - vmxnet_rx_pkt_set_protocols(s->rx_pkt, buf, size); + net_rx_pkt_set_protocols(s->rx_pkt, buf, size); vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size); - vmxnet_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); + net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1; if (bytes_indicated < size) { VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size); @@ -2102,7 +2110,7 @@ static void vmxnet3_net_init(VMXNET3State *s) s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP; - VMW_CFPRN("Permanent MAC: " VMXNET_MF, VMXNET_MA(s->perm_mac.a)); + VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a)); s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf, object_get_typename(OBJECT(s)), @@ -2255,9 +2263,9 @@ static const MemoryRegionOps b1_ops = { }, }; -static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s) +static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) { - static uint64_t dsn_payload; + uint64_t dsn_payload; uint8_t *dsnp = (uint8_t *)&dsn_payload; dsnp[0] = 0xfe; @@ -2268,7 +2276,7 @@ static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s) dsnp[5] = s->conf.macaddr.a[1]; dsnp[6] = s->conf.macaddr.a[2]; dsnp[7] = 0xff; - return dsnp; + return dsn_payload; } static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) @@ -2313,10 +2321,8 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET); } - pcie_add_capability(pci_dev, PCI_EXT_CAP_ID_DSN, 0x1, - VMXNET3_DSN_OFFSET, PCI_EXT_CAP_DSN_SIZEOF); - memcpy(pci_dev->config + VMXNET3_DSN_OFFSET + 4, - vmxnet3_device_serial_num(s), sizeof(uint64_t)); + pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, + vmxnet3_device_serial_num(s)); } register_savevm(dev, "vmxnet3-msix", -1, 1, @@ -2538,8 +2544,9 @@ static int vmxnet3_post_load(void *opaque, int version_id) VMXNET3State *s = opaque; PCIDevice *d = PCI_DEVICE(s); - vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); - vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), + s->max_tx_frags, s->peer_has_vhdr); + net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); if (s->msix_used) { if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { diff --git a/hw/net/vmxnet_debug.h b/hw/net/vmxnet_debug.h index 96495dbb12..5aab00b050 100644 --- a/hw/net/vmxnet_debug.h +++ b/hw/net/vmxnet_debug.h @@ -142,7 +142,4 @@ } \ } while (0) -#define VMXNET_MF "%02X:%02X:%02X:%02X:%02X:%02X" -#define VMXNET_MA(a) (a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5] - #endif /* _QEMU_VMXNET3_DEBUG_H */ diff --git a/hw/net/vmxnet_rx_pkt.c b/hw/net/vmxnet_rx_pkt.c deleted file mode 100644 index 21bb46e685..0000000000 --- a/hw/net/vmxnet_rx_pkt.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstractions - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman <dmitry@daynix.com> - * Tamir Shomer <tamirs@daynix.com> - * Yan Vugenfirer <yan@daynix.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "vmxnet_rx_pkt.h" -#include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" -#include "net/checksum.h" -#include "net/tap.h" - -/* - * RX packet may contain up to 2 fragments - rebuilt eth header - * in case of VLAN tag stripping - * and payload received from QEMU - in any case - */ -#define VMXNET_MAX_RX_PACKET_FRAGMENTS (2) - -struct VmxnetRxPkt { - struct virtio_net_hdr virt_hdr; - uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; - struct iovec vec[VMXNET_MAX_RX_PACKET_FRAGMENTS]; - uint16_t vec_len; - uint32_t tot_len; - uint16_t tci; - bool vlan_stripped; - bool has_virt_hdr; - eth_pkt_types_e packet_type; - - /* Analysis results */ - bool isip4; - bool isip6; - bool isudp; - bool istcp; -}; - -void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr) -{ - struct VmxnetRxPkt *p = g_malloc0(sizeof *p); - p->has_virt_hdr = has_virt_hdr; - *pkt = p; -} - -void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt) -{ - g_free(pkt); -} - -struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - return &pkt->virt_hdr; -} - -void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan) -{ - uint16_t tci = 0; - uint16_t ploff; - assert(pkt); - pkt->vlan_stripped = false; - - if (strip_vlan) { - pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); - } - - if (pkt->vlan_stripped) { - pkt->vec[0].iov_base = pkt->ehdr_buf; - pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); - pkt->vec[1].iov_base = (uint8_t *) data + ploff; - pkt->vec[1].iov_len = len - ploff; - pkt->vec_len = 2; - pkt->tot_len = len - ploff + sizeof(struct eth_header); - } else { - pkt->vec[0].iov_base = (void *)data; - pkt->vec[0].iov_len = len; - pkt->vec_len = 1; - pkt->tot_len = len; - } - - pkt->tci = tci; -} - -void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt) -{ -#ifdef VMXNET_RX_PKT_DEBUG - VmxnetRxPkt *pkt = (VmxnetRxPkt *)pkt; - assert(pkt); - - printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n", - pkt->tot_len, pkt->vlan_stripped, pkt->tci); -#endif -} - -void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt, - eth_pkt_types_e packet_type) -{ - assert(pkt); - - pkt->packet_type = packet_type; - -} - -eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->packet_type; -} - -size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->tot_len; -} - -void vmxnet_rx_pkt_set_protocols(struct VmxnetRxPkt *pkt, const void *data, - size_t len) -{ - assert(pkt); - - eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, - &pkt->isudp, &pkt->istcp); -} - -void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt, - bool *isip4, bool *isip6, - bool *isudp, bool *istcp) -{ - assert(pkt); - - *isip4 = pkt->isip4; - *isip6 = pkt->isip6; - *isudp = pkt->isudp; - *istcp = pkt->istcp; -} - -struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->vec; -} - -void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt, - struct virtio_net_hdr *vhdr) -{ - assert(pkt); - - memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); -} - -bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->vlan_stripped; -} - -bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->has_virt_hdr; -} - -uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->tci; -} diff --git a/hw/net/vmxnet_rx_pkt.h b/hw/net/vmxnet_rx_pkt.h deleted file mode 100644 index 0a45c1ba00..0000000000 --- a/hw/net/vmxnet_rx_pkt.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstraction - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman <dmitry@daynix.com> - * Tamir Shomer <tamirs@daynix.com> - * Yan Vugenfirer <yan@daynix.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef VMXNET_RX_PKT_H -#define VMXNET_RX_PKT_H - -#include "net/eth.h" - -/* defines to enable packet dump functions */ -/*#define VMXNET_RX_PKT_DEBUG*/ - -struct VmxnetRxPkt; - -/** - * Clean all rx packet resources - * - * @pkt: packet - * - */ -void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt); - -/** - * Init function for rx packet functionality - * - * @pkt: packet pointer - * @has_virt_hdr: device uses virtio header - * - */ -void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr); - -/** - * returns total length of data attached to rx context - * - * @pkt: packet - * - * Return: nothing - * - */ -size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt); - -/** - * parse and set packet analysis results - * - * @pkt: packet - * @data: pointer to the data buffer to be parsed - * @len: data length - * - */ -void vmxnet_rx_pkt_set_protocols(struct VmxnetRxPkt *pkt, const void *data, - size_t len); - -/** - * fetches packet analysis results - * - * @pkt: packet - * @isip4: whether the packet given is IPv4 - * @isip6: whether the packet given is IPv6 - * @isudp: whether the packet given is UDP - * @istcp: whether the packet given is TCP - * - */ -void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt, - bool *isip4, bool *isip6, - bool *isudp, bool *istcp); - -/** - * returns virtio header stored in rx context - * - * @pkt: packet - * @ret: virtio header - * - */ -struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt); - -/** - * returns packet type - * - * @pkt: packet - * @ret: packet type - * - */ -eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt); - -/** - * returns vlan tag - * - * @pkt: packet - * @ret: VLAN tag - * - */ -uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt); - -/** - * tells whether vlan was stripped from the packet - * - * @pkt: packet - * @ret: VLAN stripped sign - * - */ -bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt); - -/** - * notifies caller if the packet has virtio header - * - * @pkt: packet - * @ret: true if packet has virtio header, false otherwize - * - */ -bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt); - -/** - * attach data to rx packet - * - * @pkt: packet - * @data: pointer to the data buffer - * @len: data length - * @strip_vlan: should the module strip vlan from data - * - */ -void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan); - -/** - * returns io vector that holds the attached data - * - * @pkt: packet - * @ret: pointer to IOVec - * - */ -struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt); - -/** - * prints rx packet data if debug is enabled - * - * @pkt: packet - * - */ -void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt); - -/** - * copy passed vhdr data to packet context - * - * @pkt: packet - * @vhdr: VHDR buffer - * - */ -void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt, - struct virtio_net_hdr *vhdr); - -/** - * save packet type in packet context - * - * @pkt: packet - * @packet_type: the packet type - * - */ -void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt, - eth_pkt_types_e packet_type); - -#endif diff --git a/hw/net/vmxnet_tx_pkt.h b/hw/net/vmxnet_tx_pkt.h deleted file mode 100644 index f51e98ad95..0000000000 --- a/hw/net/vmxnet_tx_pkt.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstraction - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman <dmitry@daynix.com> - * Tamir Shomer <tamirs@daynix.com> - * Yan Vugenfirer <yan@daynix.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef VMXNET_TX_PKT_H -#define VMXNET_TX_PKT_H - -#include "net/eth.h" -#include "exec/hwaddr.h" - -/* define to enable packet dump functions */ -/*#define VMXNET_TX_PKT_DEBUG*/ - -struct VmxnetTxPkt; - -/** - * Init function for tx packet functionality - * - * @pkt: packet pointer - * @max_frags: max tx ip fragments - * @has_virt_hdr: device uses virtio header. - */ -void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr); - -/** - * Clean all tx packet resources. - * - * @pkt: packet. - */ -void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt); - -/** - * get virtio header - * - * @pkt: packet - * @ret: virtio header - */ -struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt); - -/** - * build virtio header (will be stored in module context) - * - * @pkt: packet - * @tso_enable: TSO enabled - * @csum_enable: CSO enabled - * @gso_size: MSS size for TSO - * - */ -void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, - bool csum_enable, uint32_t gso_size); - -/** - * updates vlan tag, and adds vlan header in case it is missing - * - * @pkt: packet - * @vlan: VLAN tag - * - */ -void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan); - -/** - * populate data fragment into pkt context. - * - * @pkt: packet - * @pa: physical address of fragment - * @len: length of fragment - * - */ -bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, - size_t len); - -/** - * fix ip header fields and calculate checksums needed. - * - * @pkt: packet - * - */ -void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt); - -/** - * get length of all populated data. - * - * @pkt: packet - * @ret: total data length - * - */ -size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt); - -/** - * get packet type - * - * @pkt: packet - * @ret: packet type - * - */ -eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt); - -/** - * prints packet data if debug is enabled - * - * @pkt: packet - * - */ -void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt); - -/** - * reset tx packet private context (needed to be called between packets) - * - * @pkt: packet - * - */ -void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt); - -/** - * Send packet to qemu. handles sw offloads if vhdr is not supported. - * - * @pkt: packet - * @nc: NetClientState - * @ret: operation result - * - */ -bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc); - -/** - * parse raw packet data and analyze offload requirements. - * - * @pkt: packet - * - */ -bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt); - -#endif diff --git a/hw/pci/msix.c b/hw/pci/msix.c index b75f0e9c47..0ec1cb14fc 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -72,7 +72,7 @@ void msix_set_pending(PCIDevice *dev, unsigned int vector) *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); } -static void msix_clr_pending(PCIDevice *dev, int vector) +void msix_clr_pending(PCIDevice *dev, int vector) { *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 728386ada7..9599fdef57 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -43,26 +43,15 @@ /*************************************************************************** * pci express capability helper functions */ -int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) -{ - int pos; - uint8_t *exp_cap; - - assert(pci_is_express(dev)); - - pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, - PCI_EXP_VER2_SIZEOF); - if (pos < 0) { - return pos; - } - dev->exp.exp_cap = pos; - exp_cap = dev->config + pos; +static void +pcie_cap_v1_fill(uint8_t *exp_cap, uint8_t port, uint8_t type, uint8_t version) +{ /* capability register - interrupt message number defaults to 0 */ + interrupt message number defaults to 0 */ pci_set_word(exp_cap + PCI_EXP_FLAGS, ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) | - PCI_EXP_FLAGS_VER2); + version); /* device capability register * table 7-12: @@ -81,7 +70,27 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) pci_set_word(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA); +} + +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) +{ + /* PCIe cap v2 init */ + int pos; + uint8_t *exp_cap; + assert(pci_is_express(dev)); + + pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, PCI_EXP_VER2_SIZEOF); + if (pos < 0) { + return pos; + } + dev->exp.exp_cap = pos; + exp_cap = dev->config + pos; + + /* Filling values common with v1 */ + pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER2); + + /* Filling v2 specific values */ pci_set_long(exp_cap + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); @@ -89,7 +98,29 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) return pos; } -int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) +int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, uint8_t type, + uint8_t port) +{ + /* PCIe cap v1 init */ + int pos; + uint8_t *exp_cap; + + assert(pci_is_express(dev)); + + pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, PCI_EXP_VER1_SIZEOF); + if (pos < 0) { + return pos; + } + dev->exp.exp_cap = pos; + exp_cap = dev->config + pos; + + pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER1); + + return pos; +} + +static int +pcie_endpoint_cap_common_init(PCIDevice *dev, uint8_t offset, uint8_t cap_size) { uint8_t type = PCI_EXP_TYPE_ENDPOINT; @@ -102,7 +133,19 @@ int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) type = PCI_EXP_TYPE_RC_END; } - return pcie_cap_init(dev, offset, type, 0); + return (cap_size == PCI_EXP_VER1_SIZEOF) + ? pcie_cap_v1_init(dev, offset, type, 0) + : pcie_cap_init(dev, offset, type, 0); +} + +int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) +{ + return pcie_endpoint_cap_common_init(dev, offset, PCI_EXP_VER2_SIZEOF); +} + +int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset) +{ + return pcie_endpoint_cap_common_init(dev, offset, PCI_EXP_VER1_SIZEOF); } void pcie_cap_exit(PCIDevice *dev) @@ -110,6 +153,11 @@ void pcie_cap_exit(PCIDevice *dev) pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF); } +void pcie_cap_v1_exit(PCIDevice *dev) +{ + pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER1_SIZEOF); +} + uint8_t pcie_cap_get_type(const PCIDevice *dev) { uint32_t pos = dev->exp.exp_cap; @@ -647,3 +695,13 @@ void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn) offset, PCI_ARI_SIZEOF); pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8); } + +void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num) +{ + static const int pci_dsn_ver = 1; + static const int pci_dsn_cap = 4; + + pcie_add_capability(dev, PCI_EXT_CAP_ID_DSN, pci_dsn_ver, offset, + PCI_EXT_CAP_DSN_SIZEOF); + pci_set_quad(dev->config + offset + pci_dsn_cap, ser_num); +} diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h index e9157ea4b3..ec6c509d74 100644 --- a/include/hw/arm/fsl-imx6.h +++ b/include/hw/arm/fsl-imx6.h @@ -28,6 +28,7 @@ #include "hw/gpio/imx_gpio.h" #include "hw/sd/sdhci.h" #include "hw/ssi/imx_spi.h" +#include "hw/net/imx_fec.h" #include "exec/memory.h" #include "cpu.h" @@ -58,6 +59,7 @@ typedef struct FslIMX6State { IMXGPIOState gpio[FSL_IMX6_NUM_GPIOS]; SDHCIState esdhc[FSL_IMX6_NUM_ESDHCS]; IMXSPIState spi[FSL_IMX6_NUM_ECSPIS]; + IMXFECState eth; MemoryRegion rom; MemoryRegion caam; MemoryRegion ocram; @@ -436,8 +438,8 @@ typedef struct FslIMX6State { #define FSL_IMX6_HDMI_MASTER_IRQ 115 #define FSL_IMX6_HDMI_CEC_IRQ 116 #define FSL_IMX6_MLB150_LOW_IRQ 117 -#define FSL_IMX6_ENET_MAC_IRQ 118 -#define FSL_IMX6_ENET_MAC_1588_IRQ 119 +#define FSL_IMX6_ENET_MAC_1588_IRQ 118 +#define FSL_IMX6_ENET_MAC_IRQ 119 #define FSL_IMX6_PCIE1_IRQ 120 #define FSL_IMX6_PCIE2_IRQ 121 #define FSL_IMX6_PCIE3_IRQ 122 diff --git a/include/hw/net/imx_fec.h b/include/hw/net/imx_fec.h index cbf86509e8..62ad473b05 100644 --- a/include/hw/net/imx_fec.h +++ b/include/hw/net/imx_fec.h @@ -1,5 +1,5 @@ /* - * i.MX Fast Ethernet Controller emulation. + * i.MX FEC/ENET Ethernet Controller emulation. * * Copyright (c) 2013 Jean-Christophe Dubois. <jcd@tribudubois.net> * @@ -27,27 +27,147 @@ #define TYPE_IMX_FEC "imx.fec" #define IMX_FEC(obj) OBJECT_CHECK(IMXFECState, (obj), TYPE_IMX_FEC) +#define TYPE_IMX_ENET "imx.enet" + #include "hw/sysbus.h" #include "net/net.h" -#define FEC_MAX_FRAME_SIZE 2032 - -#define FEC_INT_HB (1 << 31) -#define FEC_INT_BABR (1 << 30) -#define FEC_INT_BABT (1 << 29) -#define FEC_INT_GRA (1 << 28) -#define FEC_INT_TXF (1 << 27) -#define FEC_INT_TXB (1 << 26) -#define FEC_INT_RXF (1 << 25) -#define FEC_INT_RXB (1 << 24) -#define FEC_INT_MII (1 << 23) -#define FEC_INT_EBERR (1 << 22) -#define FEC_INT_LC (1 << 21) -#define FEC_INT_RL (1 << 20) -#define FEC_INT_UN (1 << 19) - -#define FEC_EN 2 -#define FEC_RESET 1 +#define ENET_EIR 1 +#define ENET_EIMR 2 +#define ENET_RDAR 4 +#define ENET_TDAR 5 +#define ENET_ECR 9 +#define ENET_MMFR 16 +#define ENET_MSCR 17 +#define ENET_MIBC 25 +#define ENET_RCR 33 +#define ENET_TCR 49 +#define ENET_PALR 57 +#define ENET_PAUR 58 +#define ENET_OPD 59 +#define ENET_IAUR 70 +#define ENET_IALR 71 +#define ENET_GAUR 72 +#define ENET_GALR 73 +#define ENET_TFWR 81 +#define ENET_FRBR 83 +#define ENET_FRSR 84 +#define ENET_RDSR 96 +#define ENET_TDSR 97 +#define ENET_MRBR 98 +#define ENET_RSFL 100 +#define ENET_RSEM 101 +#define ENET_RAEM 102 +#define ENET_RAFL 103 +#define ENET_TSEM 104 +#define ENET_TAEM 105 +#define ENET_TAFL 106 +#define ENET_TIPG 107 +#define ENET_FTRL 108 +#define ENET_TACC 112 +#define ENET_RACC 113 +#define ENET_MIIGSK_CFGR 192 +#define ENET_MIIGSK_ENR 194 +#define ENET_ATCR 256 +#define ENET_ATVR 257 +#define ENET_ATOFF 258 +#define ENET_ATPER 259 +#define ENET_ATCOR 260 +#define ENET_ATINC 261 +#define ENET_ATSTMP 262 +#define ENET_TGSR 385 +#define ENET_TCSR0 386 +#define ENET_TCCR0 387 +#define ENET_TCSR1 388 +#define ENET_TCCR1 389 +#define ENET_TCSR2 390 +#define ENET_TCCR2 391 +#define ENET_TCSR3 392 +#define ENET_TCCR3 393 +#define ENET_MAX 400 + +#define ENET_MAX_FRAME_SIZE 2032 + +/* EIR and EIMR */ +#define ENET_INT_HB (1 << 31) +#define ENET_INT_BABR (1 << 30) +#define ENET_INT_BABT (1 << 29) +#define ENET_INT_GRA (1 << 28) +#define ENET_INT_TXF (1 << 27) +#define ENET_INT_TXB (1 << 26) +#define ENET_INT_RXF (1 << 25) +#define ENET_INT_RXB (1 << 24) +#define ENET_INT_MII (1 << 23) +#define ENET_INT_EBERR (1 << 22) +#define ENET_INT_LC (1 << 21) +#define ENET_INT_RL (1 << 20) +#define ENET_INT_UN (1 << 19) +#define ENET_INT_PLR (1 << 18) +#define ENET_INT_WAKEUP (1 << 17) +#define ENET_INT_TS_AVAIL (1 << 16) +#define ENET_INT_TS_TIMER (1 << 15) + +#define ENET_INT_MAC (ENET_INT_HB | ENET_INT_BABR | ENET_INT_BABT | \ + ENET_INT_GRA | ENET_INT_TXF | ENET_INT_TXB | \ + ENET_INT_RXF | ENET_INT_RXB | ENET_INT_MII | \ + ENET_INT_EBERR | ENET_INT_LC | ENET_INT_RL | \ + ENET_INT_UN | ENET_INT_PLR | ENET_INT_WAKEUP | \ + ENET_INT_TS_AVAIL) + +/* RDAR */ +#define ENET_RDAR_RDAR (1 << 24) + +/* TDAR */ +#define ENET_TDAR_TDAR (1 << 24) + +/* ECR */ +#define ENET_ECR_RESET (1 << 0) +#define ENET_ECR_ETHEREN (1 << 1) +#define ENET_ECR_MAGICEN (1 << 2) +#define ENET_ECR_SLEEP (1 << 3) +#define ENET_ECR_EN1588 (1 << 4) +#define ENET_ECR_SPEED (1 << 5) +#define ENET_ECR_DBGEN (1 << 6) +#define ENET_ECR_STOPEN (1 << 7) +#define ENET_ECR_DSBWP (1 << 8) + +/* MIBC */ +#define ENET_MIBC_MIB_DIS (1 << 31) +#define ENET_MIBC_MIB_IDLE (1 << 30) +#define ENET_MIBC_MIB_CLEAR (1 << 29) + +/* RCR */ +#define ENET_RCR_LOOP (1 << 0) +#define ENET_RCR_DRT (1 << 1) +#define ENET_RCR_MII_MODE (1 << 2) +#define ENET_RCR_PROM (1 << 3) +#define ENET_RCR_BC_REJ (1 << 4) +#define ENET_RCR_FCE (1 << 5) +#define ENET_RCR_RGMII_EN (1 << 6) +#define ENET_RCR_RMII_MODE (1 << 8) +#define ENET_RCR_RMII_10T (1 << 9) +#define ENET_RCR_PADEN (1 << 12) +#define ENET_RCR_PAUFWD (1 << 13) +#define ENET_RCR_CRCFWD (1 << 14) +#define ENET_RCR_CFEN (1 << 15) +#define ENET_RCR_MAX_FL_SHIFT (16) +#define ENET_RCR_MAX_FL_LENGTH (14) +#define ENET_RCR_NLC (1 << 30) +#define ENET_RCR_GRS (1 << 31) + +/* TCR */ +#define ENET_TCR_GTS (1 << 0) +#define ENET_TCR_FDEN (1 << 2) +#define ENET_TCR_TFC_PAUSE (1 << 3) +#define ENET_TCR_RFC_PAUSE (1 << 4) +#define ENET_TCR_ADDSEL_SHIFT (5) +#define ENET_TCR_ADDSEL_LENGTH (3) +#define ENET_TCR_CRCFWD (1 << 9) + +/* RDSR */ +#define ENET_TWFR_TFWR_SHIFT (0) +#define ENET_TWFR_TFWR_LENGTH (6) +#define ENET_TWFR_STRFWD (1 << 8) /* Buffer Descriptor. */ typedef struct { @@ -56,22 +176,60 @@ typedef struct { uint32_t data; } IMXFECBufDesc; -#define FEC_BD_R (1 << 15) -#define FEC_BD_E (1 << 15) -#define FEC_BD_O1 (1 << 14) -#define FEC_BD_W (1 << 13) -#define FEC_BD_O2 (1 << 12) -#define FEC_BD_L (1 << 11) -#define FEC_BD_TC (1 << 10) -#define FEC_BD_ABC (1 << 9) -#define FEC_BD_M (1 << 8) -#define FEC_BD_BC (1 << 7) -#define FEC_BD_MC (1 << 6) -#define FEC_BD_LG (1 << 5) -#define FEC_BD_NO (1 << 4) -#define FEC_BD_CR (1 << 2) -#define FEC_BD_OV (1 << 1) -#define FEC_BD_TR (1 << 0) +#define ENET_BD_R (1 << 15) +#define ENET_BD_E (1 << 15) +#define ENET_BD_O1 (1 << 14) +#define ENET_BD_W (1 << 13) +#define ENET_BD_O2 (1 << 12) +#define ENET_BD_L (1 << 11) +#define ENET_BD_TC (1 << 10) +#define ENET_BD_ABC (1 << 9) +#define ENET_BD_M (1 << 8) +#define ENET_BD_BC (1 << 7) +#define ENET_BD_MC (1 << 6) +#define ENET_BD_LG (1 << 5) +#define ENET_BD_NO (1 << 4) +#define ENET_BD_CR (1 << 2) +#define ENET_BD_OV (1 << 1) +#define ENET_BD_TR (1 << 0) + +typedef struct { + uint16_t length; + uint16_t flags; + uint32_t data; + uint16_t status; + uint16_t option; + uint16_t checksum; + uint16_t head_proto; + uint32_t last_buffer; + uint32_t timestamp; + uint32_t reserved[2]; +} IMXENETBufDesc; + +#define ENET_BD_ME (1 << 15) +#define ENET_BD_TX_INT (1 << 14) +#define ENET_BD_TS (1 << 13) +#define ENET_BD_PINS (1 << 12) +#define ENET_BD_IINS (1 << 11) +#define ENET_BD_PE (1 << 10) +#define ENET_BD_CE (1 << 9) +#define ENET_BD_UC (1 << 8) +#define ENET_BD_RX_INT (1 << 7) + +#define ENET_BD_TXE (1 << 15) +#define ENET_BD_UE (1 << 13) +#define ENET_BD_EE (1 << 12) +#define ENET_BD_FE (1 << 11) +#define ENET_BD_LCE (1 << 10) +#define ENET_BD_OE (1 << 9) +#define ENET_BD_TSE (1 << 8) +#define ENET_BD_ICE (1 << 5) +#define ENET_BD_PCR (1 << 4) +#define ENET_BD_VLAN (1 << 2) +#define ENET_BD_IPV6 (1 << 1) +#define ENET_BD_FRAG (1 << 0) + +#define ENET_BD_BDU (1 << 31) typedef struct IMXFECState { /*< private >*/ @@ -80,34 +238,20 @@ typedef struct IMXFECState { /*< public >*/ NICState *nic; NICConf conf; - qemu_irq irq; + qemu_irq irq[2]; MemoryRegion iomem; - uint32_t irq_state; - uint32_t eir; - uint32_t eimr; - uint32_t rx_enabled; + uint32_t regs[ENET_MAX]; uint32_t rx_descriptor; uint32_t tx_descriptor; - uint32_t ecr; - uint32_t mmfr; - uint32_t mscr; - uint32_t mibc; - uint32_t rcr; - uint32_t tcr; - uint32_t tfwr; - uint32_t frsr; - uint32_t erdsr; - uint32_t etdsr; - uint32_t emrbr; - uint32_t miigsk_cfgr; - uint32_t miigsk_enr; uint32_t phy_status; uint32_t phy_control; uint32_t phy_advertise; uint32_t phy_int; uint32_t phy_int_mask; + + bool is_fec; } IMXFECState; #endif diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h index 72e5f931c5..048a29dd2f 100644 --- a/include/hw/pci/msix.h +++ b/include/hw/pci/msix.h @@ -29,6 +29,7 @@ int msix_present(PCIDevice *dev); bool msix_is_masked(PCIDevice *dev, unsigned vector); void msix_set_pending(PCIDevice *dev, unsigned vector); +void msix_clr_pending(PCIDevice *dev, int vector); int msix_vector_use(PCIDevice *dev, unsigned vector); void msix_vector_unuse(PCIDevice *dev, unsigned vector); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index ef6ba51f6c..4420f47598 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -465,16 +465,23 @@ pci_get_long(const uint8_t *config) return ldl_le_p(config); } +/* + * PCI capabilities and/or their fields + * are generally DWORD aligned only so + * mechanism used by pci_set/get_quad() + * must be tolerant to unaligned pointers + * + */ static inline void pci_set_quad(uint8_t *config, uint64_t val) { - cpu_to_le64w((uint64_t *)config, val); + stq_le_p(config, val); } static inline uint64_t pci_get_quad(const uint8_t *config) { - return le64_to_cpup((const uint64_t *)config); + return ldq_le_p(config); } static inline void diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h index ba8cbe9278..7a83142578 100644 --- a/include/hw/pci/pci_regs.h +++ b/include/hw/pci/pci_regs.h @@ -1 +1,3 @@ #include "standard-headers/linux/pci_regs.h" + +#define PCI_PM_CAP_VER_1_1 0x0002 /* PCI PM spec ver. 1.1 */ diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index b48a7a2c5a..056d25e53c 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -80,8 +80,12 @@ struct PCIExpressDevice { /* PCI express capability helper functions */ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port); +int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, + uint8_t type, uint8_t port); int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset); void pcie_cap_exit(PCIDevice *dev); +int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset); +void pcie_cap_v1_exit(PCIDevice *dev); uint8_t pcie_cap_get_type(const PCIDevice *dev); void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector); uint8_t pcie_cap_flags_get_vector(PCIDevice *dev); @@ -115,6 +119,7 @@ void pcie_add_capability(PCIDevice *dev, uint16_t offset, uint16_t size); void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); +void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); extern const VMStateDescription vmstate_pcie_device; diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 6a28b33e69..a95522a13b 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -11,6 +11,7 @@ /* express capability */ +#define PCI_EXP_VER1_SIZEOF 0x14 /* express capability of ver. 1 */ #define PCI_EXP_VER2_SIZEOF 0x3c /* express capability of ver. 2 */ #define PCI_EXT_CAP_VER_SHIFT 16 #define PCI_EXT_CAP_NEXT_SHIFT 20 @@ -26,11 +27,11 @@ (((x) + PCI_EXT_CAP_ALIGN - 1) & ~(PCI_EXT_CAP_ALIGN - 1)) /* PCI_EXP_FLAGS */ -#define PCI_EXP_FLAGS_VER2 2 /* for now, supports only ver. 2 */ +#define PCI_EXP_FLAGS_VER1 1 +#define PCI_EXP_FLAGS_VER2 2 #define PCI_EXP_FLAGS_IRQ_SHIFT ctz32(PCI_EXP_FLAGS_IRQ) #define PCI_EXP_FLAGS_TYPE_SHIFT ctz32(PCI_EXP_FLAGS_TYPE) - /* PCI_EXP_LINK{CAP, STA} */ /* link speed */ #define PCI_EXP_LNK_LS_25 1 diff --git a/include/net/checksum.h b/include/net/checksum.h index 7de1acb79a..7df472c2fe 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -18,6 +18,7 @@ #ifndef QEMU_NET_CHECKSUM_H #define QEMU_NET_CHECKSUM_H +#include "qemu/bswap.h" struct iovec; uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq); @@ -45,9 +46,55 @@ net_raw_checksum(uint8_t *data, int length) * @iov_cnt: number of array elements * @iov_off: starting iov offset for checksumming * @size: length of data to be checksummed + * @csum_offset: offset of the checksum chunk */ uint32_t net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, - uint32_t iov_off, uint32_t size); + uint32_t iov_off, uint32_t size, + uint32_t csum_offset); + +typedef struct toeplitz_key_st { + uint32_t leftmost_32_bits; + uint8_t *next_byte; +} net_toeplitz_key; + +static inline +void net_toeplitz_key_init(net_toeplitz_key *key, uint8_t *key_bytes) +{ + key->leftmost_32_bits = be32_to_cpu(*(uint32_t *)key_bytes); + key->next_byte = key_bytes + sizeof(uint32_t); +} + +static inline +void net_toeplitz_add(uint32_t *result, + uint8_t *input, + uint32_t len, + net_toeplitz_key *key) +{ + register uint32_t accumulator = *result; + register uint32_t leftmost_32_bits = key->leftmost_32_bits; + register uint32_t byte; + + for (byte = 0; byte < len; byte++) { + register uint8_t input_byte = input[byte]; + register uint8_t key_byte = *(key->next_byte++); + register uint8_t bit; + + for (bit = 0; bit < 8; bit++) { + if (input_byte & (1 << 7)) { + accumulator ^= leftmost_32_bits; + } + + leftmost_32_bits = + (leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7); + + input_byte <<= 1; + key_byte <<= 1; + } + } + + key->leftmost_32_bits = leftmost_32_bits; + *result = accumulator; +} #endif /* QEMU_NET_CHECKSUM_H */ diff --git a/include/net/eth.h b/include/net/eth.h index 18d0be3b16..2013175857 100644 --- a/include/net/eth.h +++ b/include/net/eth.h @@ -67,6 +67,16 @@ typedef struct tcp_header { uint16_t th_urp; /* urgent pointer */ } tcp_header; +#define TCP_FLAGS_ONLY(flags) ((flags) & 0x3f) + +#define TCP_HEADER_FLAGS(tcp) \ + TCP_FLAGS_ONLY(be16_to_cpu((tcp)->th_offset_flags)) + +#define TCP_FLAG_ACK 0x10 + +#define TCP_HEADER_DATA_OFFSET(tcp) \ + (((be16_to_cpu((tcp)->th_offset_flags) >> 12) & 0xf) << 2) + typedef struct udp_header { uint16_t uh_sport; /* source port */ uint16_t uh_dport; /* destination port */ @@ -108,11 +118,34 @@ struct ip6_header { struct in6_address ip6_dst; /* destination address */ }; +typedef struct ip6_pseudo_header { + struct in6_address ip6_src; + struct in6_address ip6_dst; + uint32_t len; + uint8_t zero[3]; + uint8_t next_hdr; +} ip6_pseudo_header; + struct ip6_ext_hdr { uint8_t ip6r_nxt; /* next header */ uint8_t ip6r_len; /* length in units of 8 octets */ }; +struct ip6_ext_hdr_routing { + uint8_t nxt; + uint8_t len; + uint8_t rtype; + uint8_t segleft; + uint8_t rsvd[4]; +}; + +struct ip6_option_hdr { +#define IP6_OPT_PAD1 (0x00) +#define IP6_OPT_HOME (0xC9) + uint8_t type; + uint8_t len; +}; + struct udp_hdr { uint16_t uh_sport; /* source port */ uint16_t uh_dport; /* destination port */ @@ -161,19 +194,22 @@ struct tcp_hdr { #define PKT_GET_IP_HDR(p) \ ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p))) #define IP_HDR_GET_LEN(p) \ - ((((struct ip_header *)p)->ip_ver_len & 0x0F) << 2) + ((((struct ip_header *)(p))->ip_ver_len & 0x0F) << 2) #define PKT_GET_IP_HDR_LEN(p) \ (IP_HDR_GET_LEN(PKT_GET_IP_HDR(p))) #define PKT_GET_IP6_HDR(p) \ ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p))) #define IP_HEADER_VERSION(ip) \ - ((ip->ip_ver_len >> 4)&0xf) + (((ip)->ip_ver_len >> 4) & 0xf) +#define IP4_IS_FRAGMENT(ip) \ + ((be16_to_cpu((ip)->ip_off) & (IP_OFFMASK | IP_MF)) != 0) #define ETH_P_IP (0x0800) /* Internet Protocol packet */ #define ETH_P_ARP (0x0806) /* Address Resolution packet */ #define ETH_P_IPV6 (0x86dd) #define ETH_P_VLAN (0x8100) #define ETH_P_DVLAN (0x88a8) +#define ETH_P_UNKNOWN (0xffff) #define VLAN_VID_MASK 0x0fff #define IP_HEADER_VERSION_4 (4) #define IP_HEADER_VERSION_6 (6) @@ -258,7 +294,7 @@ eth_get_l2_hdr_length(const void *p) case ETH_P_VLAN: return sizeof(struct eth_header) + sizeof(struct vlan_header); case ETH_P_DVLAN: - if (hvlan->h_proto == ETH_P_VLAN) { + if (be16_to_cpu(hvlan->h_proto) == ETH_P_VLAN) { return sizeof(struct eth_header) + 2 * sizeof(struct vlan_header); } else { return sizeof(struct eth_header) + sizeof(struct vlan_header); @@ -268,6 +304,19 @@ eth_get_l2_hdr_length(const void *p) } } +static inline uint32_t +eth_get_l2_hdr_length_iov(const struct iovec *iov, int iovcnt) +{ + uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)]; + size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p)); + + if (copied < ARRAY_SIZE(p)) { + return copied; + } + + return eth_get_l2_hdr_length(p); +} + static inline uint16_t eth_get_pkt_tci(const void *p) { @@ -282,51 +331,67 @@ eth_get_pkt_tci(const void *p) } } -static inline bool -eth_strip_vlan(const void *p, uint8_t *new_ehdr_buf, - uint16_t *payload_offset, uint16_t *tci) -{ - uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto); - struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p); - struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; +bool +eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, + uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci); - switch (proto) { - case ETH_P_VLAN: - case ETH_P_DVLAN: - memcpy(new_ehdr->h_source, PKT_GET_ETH_HDR(p)->h_source, ETH_ALEN); - memcpy(new_ehdr->h_dest, PKT_GET_ETH_HDR(p)->h_dest, ETH_ALEN); - new_ehdr->h_proto = hvlan->h_proto; - *tci = be16_to_cpu(hvlan->h_tci); - *payload_offset = - sizeof(struct eth_header) + sizeof(struct vlan_header); - if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { - memcpy(PKT_GET_VLAN_HDR(new_ehdr), - PKT_GET_DVLAN_HDR(p), - sizeof(struct vlan_header)); - *payload_offset += sizeof(struct vlan_header); - } - return true; - default: - return false; - } -} +bool +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, + uint16_t vet, uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci); -static inline uint16_t -eth_get_l3_proto(const void *l2hdr, size_t l2hdr_len) +uint16_t +eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len); + +void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, + uint16_t vlan_ethtype, bool *is_new); + +static inline void +eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, + bool *is_new) { - uint8_t *proto_ptr = (uint8_t *) l2hdr + l2hdr_len - sizeof(uint16_t); - return be16_to_cpup((uint16_t *)proto_ptr); + eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new); } -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, - bool *is_new); uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto); -void eth_get_protocols(const uint8_t *headers, - uint32_t hdr_length, +typedef struct eth_ip6_hdr_info_st { + uint8_t l4proto; + size_t full_hdr_len; + struct ip6_header ip6_hdr; + bool has_ext_hdrs; + bool rss_ex_src_valid; + struct in6_address rss_ex_src; + bool rss_ex_dst_valid; + struct in6_address rss_ex_dst; + bool fragment; +} eth_ip6_hdr_info; + +typedef struct eth_ip4_hdr_info_st { + struct ip_header ip4_hdr; + bool fragment; +} eth_ip4_hdr_info; + +typedef struct eth_l4_hdr_info_st { + union { + struct tcp_header tcp; + struct udp_header udp; + } hdr; + + bool has_tcp_data; +} eth_l4_hdr_info; + +void eth_get_protocols(const struct iovec *iov, int iovcnt, bool *isip4, bool *isip6, - bool *isudp, bool *istcp); + bool *isudp, bool *istcp, + size_t *l3hdr_off, + size_t *l4hdr_off, + size_t *l5hdr_off, + eth_ip6_hdr_info *ip6hdr_info, + eth_ip4_hdr_info *ip4hdr_info, + eth_l4_hdr_info *l4hdr_info); void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, void *l3hdr, size_t l3hdr_len, @@ -337,11 +402,18 @@ void eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len); uint32_t -eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl); +eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, + uint16_t csl, + uint32_t *cso); + +uint32_t +eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, + uint16_t csl, + uint8_t l4_proto, + uint32_t *cso); bool -eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags, - size_t ip6hdr_off, uint8_t *l4proto, - size_t *full_hdr_len); +eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, eth_ip6_hdr_info *info); #endif diff --git a/include/net/net.h b/include/net/net.h index 73e4c466e2..a69e382ba7 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -9,6 +9,11 @@ #include "migration/vmstate.h" #include "qapi-types.h" +#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X" +#define MAC_ARG(x) ((uint8_t *)(x))[0], ((uint8_t *)(x))[1], \ + ((uint8_t *)(x))[2], ((uint8_t *)(x))[3], \ + ((uint8_t *)(x))[4], ((uint8_t *)(x))[5] + #define MAX_QUEUE_NUM 1024 /* Maximum GSO packet size (64k) plus plenty of room for @@ -57,6 +62,8 @@ typedef void (SetOffload)(NetClientState *, int, int, int, int, int); typedef void (SetVnetHdrLen)(NetClientState *, int); typedef int (SetVnetLE)(NetClientState *, bool); typedef int (SetVnetBE)(NetClientState *, bool); +typedef struct SocketReadState SocketReadState; +typedef void (SocketReadStateFinalize)(SocketReadState *rs); typedef struct NetClientInfo { NetClientOptionsKind type; @@ -102,6 +109,15 @@ typedef struct NICState { bool peer_deleted; } NICState; +struct SocketReadState { + int state; /* 0 = getting length, 1 = getting data */ + uint32_t index; + uint32_t packet_len; + uint8_t buf[NET_BUFSIZE]; + SocketReadStateFinalize *finalize; +}; + +int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size); char *qemu_mac_strdup_printf(const uint8_t *macaddr); NetClientState *qemu_find_netdev(const char *id); int qemu_find_net_clients_except(const char *id, NetClientState **ncs, @@ -160,6 +176,8 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, void print_net_client(Monitor *mon, NetClientState *nc); void hmp_info_network(Monitor *mon, const QDict *qdict); +void net_socket_rs_init(SocketReadState *rs, + SocketReadStateFinalize *finalize); /* NIC info */ @@ -178,7 +196,6 @@ struct NICInfo { extern int nb_nics; extern NICInfo nd_table[MAX_NICS]; -extern int default_net; extern const char *host_net_devices[]; /* from net.c */ diff --git a/net/checksum.c b/net/checksum.c index d0fa424cc1..23323b0760 100644 --- a/net/checksum.c +++ b/net/checksum.c @@ -18,9 +18,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "net/checksum.h" - -#define PROTO_TCP 6 -#define PROTO_UDP 17 +#include "net/eth.h" uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq) { @@ -57,50 +55,118 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto, void net_checksum_calculate(uint8_t *data, int length) { - int hlen, plen, proto, csum_offset; - uint16_t csum; + int mac_hdr_len, ip_len; + struct ip_header *ip; + + /* + * Note: We cannot assume "data" is aligned, so the all code uses + * some macros that take care of possible unaligned access for + * struct members (just in case). + */ - /* Ensure data has complete L2 & L3 headers. */ - if (length < 14 + 20) { + /* Ensure we have at least an Eth header */ + if (length < sizeof(struct eth_header)) { return; } - if ((data[14] & 0xf0) != 0x40) - return; /* not IPv4 */ - hlen = (data[14] & 0x0f) * 4; - plen = (data[16] << 8 | data[17]) - hlen; - proto = data[23]; - - switch (proto) { - case PROTO_TCP: - csum_offset = 16; - break; - case PROTO_UDP: - csum_offset = 6; - break; + /* Handle the optionnal VLAN headers */ + switch (lduw_be_p(&PKT_GET_ETH_HDR(data)->h_proto)) { + case ETH_P_VLAN: + mac_hdr_len = sizeof(struct eth_header) + + sizeof(struct vlan_header); + break; + case ETH_P_DVLAN: + if (lduw_be_p(&PKT_GET_VLAN_HDR(data)->h_proto) == ETH_P_VLAN) { + mac_hdr_len = sizeof(struct eth_header) + + 2 * sizeof(struct vlan_header); + } else { + mac_hdr_len = sizeof(struct eth_header) + + sizeof(struct vlan_header); + } + break; default: - return; + mac_hdr_len = sizeof(struct eth_header); + break; } - if (plen < csum_offset + 2 || 14 + hlen + plen > length) { + length -= mac_hdr_len; + + /* Now check we have an IP header (with an optionnal VLAN header) */ + if (length < sizeof(struct ip_header)) { return; } - data[14+hlen+csum_offset] = 0; - data[14+hlen+csum_offset+1] = 0; - csum = net_checksum_tcpudp(plen, proto, data+14+12, data+14+hlen); - data[14+hlen+csum_offset] = csum >> 8; - data[14+hlen+csum_offset+1] = csum & 0xff; + ip = (struct ip_header *)(data + mac_hdr_len); + + if (IP_HEADER_VERSION(ip) != IP_HEADER_VERSION_4) { + return; /* not IPv4 */ + } + + ip_len = lduw_be_p(&ip->ip_len); + + /* Last, check that we have enough data for the all IP frame */ + if (length < ip_len) { + return; + } + + ip_len -= IP_HDR_GET_LEN(ip); + + switch (ip->ip_p) { + case IP_PROTO_TCP: + { + uint16_t csum; + tcp_header *tcp = (tcp_header *)(ip + 1); + + if (ip_len < sizeof(tcp_header)) { + return; + } + + /* Set csum to 0 */ + stw_he_p(&tcp->th_sum, 0); + + csum = net_checksum_tcpudp(ip_len, ip->ip_p, + (uint8_t *)&ip->ip_src, + (uint8_t *)tcp); + + /* Store computed csum */ + stw_be_p(&tcp->th_sum, csum); + + break; + } + case IP_PROTO_UDP: + { + uint16_t csum; + udp_header *udp = (udp_header *)(ip + 1); + + if (ip_len < sizeof(udp_header)) { + return; + } + + /* Set csum to 0 */ + stw_he_p(&udp->uh_sum, 0); + + csum = net_checksum_tcpudp(ip_len, ip->ip_p, + (uint8_t *)&ip->ip_src, + (uint8_t *)udp); + + /* Store computed csum */ + stw_be_p(&udp->uh_sum, csum); + + break; + } + default: + /* Can't handle any other protocol */ + break; + } } uint32_t net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, - uint32_t iov_off, uint32_t size) + uint32_t iov_off, uint32_t size, uint32_t csum_offset) { size_t iovec_off, buf_off; unsigned int i; uint32_t res = 0; - uint32_t seq = 0; iovec_off = 0; buf_off = 0; @@ -109,8 +175,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt, size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off); - res += net_checksum_add_cont(len, chunk_buf, seq); - seq += len; + res += net_checksum_add_cont(len, chunk_buf, csum_offset); + csum_offset += len; buf_off += len; iov_off += len; @@ -21,8 +21,8 @@ #include "qemu-common.h" #include "net/tap.h" -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, - bool *is_new) +void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, + uint16_t vlan_ethtype, bool *is_new) { struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); @@ -36,7 +36,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, default: /* No VLAN header, put a new one */ vhdr->h_proto = ehdr->h_proto; - ehdr->h_proto = cpu_to_be16(ETH_P_VLAN); + ehdr->h_proto = cpu_to_be16(vlan_ethtype); *is_new = true; break; } @@ -79,26 +79,100 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) return VIRTIO_NET_HDR_GSO_NONE | ecn_state; } -void eth_get_protocols(const uint8_t *headers, - uint32_t hdr_length, +uint16_t +eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) +{ + uint16_t proto; + size_t copied; + size_t size = iov_size(l2hdr_iov, iovcnt); + size_t proto_offset = l2hdr_len - sizeof(proto); + + if (size < proto_offset) { + return ETH_P_UNKNOWN; + } + + copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, + &proto, sizeof(proto)); + + return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; +} + +static bool +_eth_copy_chunk(size_t input_size, + const struct iovec *iov, int iovcnt, + size_t offset, size_t length, + void *buffer) +{ + size_t copied; + + if (input_size < offset) { + return false; + } + + copied = iov_to_buf(iov, iovcnt, offset, buffer, length); + + if (copied < length) { + return false; + } + + return true; +} + +static bool +_eth_tcp_has_data(bool is_ip4, + const struct ip_header *ip4_hdr, + const struct ip6_header *ip6_hdr, + size_t full_ip6hdr_len, + const struct tcp_header *tcp) +{ + uint32_t l4len; + + if (is_ip4) { + l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); + } else { + size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; + } + + return l4len > TCP_HEADER_DATA_OFFSET(tcp); +} + +void eth_get_protocols(const struct iovec *iov, int iovcnt, bool *isip4, bool *isip6, - bool *isudp, bool *istcp) + bool *isudp, bool *istcp, + size_t *l3hdr_off, + size_t *l4hdr_off, + size_t *l5hdr_off, + eth_ip6_hdr_info *ip6hdr_info, + eth_ip4_hdr_info *ip4hdr_info, + eth_l4_hdr_info *l4hdr_info) { int proto; - size_t l2hdr_len = eth_get_l2_hdr_length(headers); - assert(hdr_length >= eth_get_l2_hdr_length(headers)); + bool fragment = false; + size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); + size_t input_size = iov_size(iov, iovcnt); + size_t copied; + *isip4 = *isip6 = *isudp = *istcp = false; - proto = eth_get_l3_proto(headers, l2hdr_len); + proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); + + *l3hdr_off = l2hdr_len; + if (proto == ETH_P_IP) { - *isip4 = true; + struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; - struct ip_header *iphdr; + if (input_size < l2hdr_len) { + return; + } + + copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); - assert(hdr_length >= - eth_get_l2_hdr_length(headers) + sizeof(struct ip_header)); + *isip4 = true; - iphdr = PKT_GET_IP_HDR(headers); + if (copied < sizeof(*iphdr)) { + return; + } if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { if (iphdr->ip_p == IP_PROTO_TCP) { @@ -107,24 +181,135 @@ void eth_get_protocols(const uint8_t *headers, *isudp = true; } } - } else if (proto == ETH_P_IPV6) { - uint8_t l4proto; - size_t full_ip6hdr_len; - struct iovec hdr_vec; - hdr_vec.iov_base = (void *) headers; - hdr_vec.iov_len = hdr_length; + ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); + *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); + + fragment = ip4hdr_info->fragment; + } else if (proto == ETH_P_IPV6) { *isip6 = true; - if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len, - &l4proto, &full_ip6hdr_len)) { - if (l4proto == IP_PROTO_TCP) { + if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, + ip6hdr_info)) { + if (ip6hdr_info->l4proto == IP_PROTO_TCP) { *istcp = true; - } else if (l4proto == IP_PROTO_UDP) { + } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) { *isudp = true; } + } else { + return; + } + + *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; + fragment = ip6hdr_info->fragment; + } + + if (!fragment) { + if (*istcp) { + *istcp = _eth_copy_chunk(input_size, + iov, iovcnt, + *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), + &l4hdr_info->hdr.tcp); + + if (istcp) { + *l5hdr_off = *l4hdr_off + + TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); + + l4hdr_info->has_tcp_data = + _eth_tcp_has_data(proto == ETH_P_IP, + &ip4hdr_info->ip4_hdr, + &ip6hdr_info->ip6_hdr, + *l4hdr_off - *l3hdr_off, + &l4hdr_info->hdr.tcp); + } + } else if (*isudp) { + *isudp = _eth_copy_chunk(input_size, + iov, iovcnt, + *l4hdr_off, sizeof(l4hdr_info->hdr.udp), + &l4hdr_info->hdr.udp); + *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); + } + } +} + +bool +eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, + uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci) +{ + struct vlan_header vlan_hdr; + struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; + + size_t copied = iov_to_buf(iov, iovcnt, iovoff, + new_ehdr, sizeof(*new_ehdr)); + + if (copied < sizeof(*new_ehdr)) { + return false; + } + + switch (be16_to_cpu(new_ehdr->h_proto)) { + case ETH_P_VLAN: + case ETH_P_DVLAN: + copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), + &vlan_hdr, sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; + } + + new_ehdr->h_proto = vlan_hdr.h_proto; + + *tci = be16_to_cpu(vlan_hdr.h_tci); + *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); + + if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { + + copied = iov_to_buf(iov, iovcnt, *payload_offset, + PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; + } + + *payload_offset += sizeof(vlan_hdr); + } + return true; + default: + return false; + } +} + +bool +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, + uint16_t vet, uint8_t *new_ehdr_buf, + uint16_t *payload_offset, uint16_t *tci) +{ + struct vlan_header vlan_hdr; + struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; + + size_t copied = iov_to_buf(iov, iovcnt, iovoff, + new_ehdr, sizeof(*new_ehdr)); + + if (copied < sizeof(*new_ehdr)) { + return false; + } + + if (be16_to_cpu(new_ehdr->h_proto) == vet) { + copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), + &vlan_hdr, sizeof(vlan_hdr)); + + if (copied < sizeof(vlan_hdr)) { + return false; } + + new_ehdr->h_proto = vlan_hdr.h_proto; + + *tci = be16_to_cpu(vlan_hdr.h_tci); + *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); + return true; } + + return false; } void @@ -133,7 +318,12 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, size_t l3payload_len, size_t frag_offset, bool more_frags) { - if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) { + const struct iovec l2vec = { + .iov_base = (void *) l2hdr, + .iov_len = l2hdr_len + }; + + if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) { uint16_t orig_flags; struct ip_header *iphdr = (struct ip_header *) l3hdr; uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; @@ -158,7 +348,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) } uint32_t -eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) +eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, + uint16_t csl, + uint32_t *cso) { struct ip_pseudo_header ipph; ipph.ip_src = iphdr->ip_src; @@ -166,7 +358,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl) ipph.ip_payload = cpu_to_be16(csl); ipph.ip_proto = iphdr->ip_p; ipph.zeros = 0; - return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph); + *cso = sizeof(ipph); + return net_checksum_add(*cso, (uint8_t *) &ipph); +} + +uint32_t +eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, + uint16_t csl, + uint8_t l4_proto, + uint32_t *cso) +{ + struct ip6_pseudo_header ipph; + ipph.ip6_src = iphdr->ip6_src; + ipph.ip6_dst = iphdr->ip6_dst; + ipph.len = cpu_to_be16(csl); + ipph.zero[0] = 0; + ipph.zero[1] = 0; + ipph.zero[2] = 0; + ipph.next_hdr = l4_proto; + *cso = sizeof(ipph); + return net_checksum_add(*cso, (uint8_t *)&ipph); } static bool @@ -186,33 +397,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type) } } -bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags, - size_t ip6hdr_off, uint8_t *l4proto, - size_t *full_hdr_len) +static bool +_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + size_t rthdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *dst_addr) +{ + struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; + + if ((rthdr->rtype == 2) && + (rthdr->len == sizeof(struct in6_address) / 8) && + (rthdr->segleft == 1)) { + + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read; + + if (input_size < rthdr_offset + sizeof(*ext_hdr)) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, + rthdr_offset + sizeof(*ext_hdr), + dst_addr, sizeof(dst_addr)); + + return bytes_read == sizeof(dst_addr); + } + + return false; +} + +static bool +_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags, + size_t dsthdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *src_addr) +{ + size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); + struct ip6_option_hdr opthdr; + size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); + + while (bytes_left > sizeof(opthdr)) { + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read, optlen; + + if (input_size < opt_offset) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, + &opthdr, sizeof(opthdr)); + + if (bytes_read != sizeof(opthdr)) { + return false; + } + + optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 + : (opthdr.len + sizeof(opthdr)); + + if (optlen > bytes_left) { + return false; + } + + if (opthdr.type == IP6_OPT_HOME) { + size_t input_size = iov_size(pkt, pkt_frags); + + if (input_size < opt_offset + sizeof(opthdr)) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, + opt_offset + sizeof(opthdr), + src_addr, sizeof(src_addr)); + + return bytes_read == sizeof(src_addr); + } + + opt_offset += optlen; + bytes_left -= optlen; + } + + return false; +} + +bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + size_t ip6hdr_off, eth_ip6_hdr_info *info) { - struct ip6_header ip6_hdr; struct ip6_ext_hdr ext_hdr; size_t bytes_read; + uint8_t curr_ext_hdr_type; + size_t input_size = iov_size(pkt, pkt_frags); + + info->rss_ex_dst_valid = false; + info->rss_ex_src_valid = false; + info->fragment = false; + + if (input_size < ip6hdr_off) { + return false; + } bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, - &ip6_hdr, sizeof(ip6_hdr)); - if (bytes_read < sizeof(ip6_hdr)) { + &info->ip6_hdr, sizeof(info->ip6_hdr)); + if (bytes_read < sizeof(info->ip6_hdr)) { return false; } - *full_hdr_len = sizeof(struct ip6_header); + info->full_hdr_len = sizeof(struct ip6_header); + + curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; - if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) { - *l4proto = ip6_hdr.ip6_nxt; + if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { + info->l4proto = info->ip6_hdr.ip6_nxt; + info->has_ext_hdrs = false; return true; } + info->has_ext_hdrs = true; + do { - bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len, + if (input_size < ip6hdr_off + info->full_hdr_len) { + return false; + } + + bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, &ext_hdr, sizeof(ext_hdr)); - *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; - } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt)); - *l4proto = ext_hdr.ip6r_nxt; + if (bytes_read < sizeof(ext_hdr)) { + return false; + } + + if (curr_ext_hdr_type == IP6_ROUTING) { + info->rss_ex_dst_valid = + _eth_get_rss_ex_dst_addr(pkt, pkt_frags, + ip6hdr_off + info->full_hdr_len, + &ext_hdr, &info->rss_ex_dst); + } else if (curr_ext_hdr_type == IP6_DESTINATON) { + info->rss_ex_src_valid = + _eth_get_rss_ex_src_addr(pkt, pkt_frags, + ip6hdr_off + info->full_hdr_len, + &ext_hdr, &info->rss_ex_src); + } else if (curr_ext_hdr_type == IP6_FRAGMENT) { + info->fragment = true; + } + + info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; + curr_ext_hdr_type = ext_hdr.ip6r_nxt; + } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); + + info->l4proto = ext_hdr.ip6r_nxt; return true; } diff --git a/net/filter-mirror.c b/net/filter-mirror.c index c0c4dc60b6..35df37451d 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -40,10 +40,7 @@ typedef struct MirrorState { char *outdev; CharDriverState *chr_in; CharDriverState *chr_out; - int state; /* 0 = getting length, 1 = getting data */ - unsigned int index; - unsigned int packet_len; - uint8_t buf[REDIRECTOR_MAX_LEN]; + SocketReadState rs; } MirrorState; static int filter_mirror_send(CharDriverState *chr_out, @@ -108,51 +105,12 @@ static void redirector_chr_read(void *opaque, const uint8_t *buf, int size) { NetFilterState *nf = opaque; MirrorState *s = FILTER_REDIRECTOR(nf); - unsigned int l; - - while (size > 0) { - /* reassemble a packet from the network */ - switch (s->state) { /* 0 = getting length, 1 = getting data */ - case 0: - l = 4 - s->index; - if (l > size) { - l = size; - } - memcpy(s->buf + s->index, buf, l); - buf += l; - size -= l; - s->index += l; - if (s->index == 4) { - /* got length */ - s->packet_len = ntohl(*(uint32_t *)s->buf); - s->index = 0; - s->state = 1; - } - break; - case 1: - l = s->packet_len - s->index; - if (l > size) { - l = size; - } - if (s->index + l <= sizeof(s->buf)) { - memcpy(s->buf + s->index, buf, l); - } else { - error_report("serious error: oversized packet received."); - s->index = s->state = 0; - qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); - return; - } - - s->index += l; - buf += l; - size -= l; - if (s->index >= s->packet_len) { - s->index = 0; - s->state = 0; - redirector_to_filter(nf, s->buf, s->packet_len); - } - break; - } + int ret; + + ret = net_fill_rstate(&s->rs, buf, size); + + if (ret == -1) { + qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); } } @@ -258,6 +216,14 @@ static void filter_mirror_setup(NetFilterState *nf, Error **errp) } } +static void redirector_rs_finalize(SocketReadState *rs) +{ + MirrorState *s = container_of(rs, MirrorState, rs); + NetFilterState *nf = NETFILTER(s); + + redirector_to_filter(nf, rs->buf, rs->packet_len); +} + static void filter_redirector_setup(NetFilterState *nf, Error **errp) { MirrorState *s = FILTER_REDIRECTOR(nf); @@ -274,7 +240,7 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp) } } - s->state = s->index = 0; + net_socket_rs_init(&s->rs, redirector_rs_finalize); if (s->indev) { s->chr_in = qemu_chr_find(s->indev); @@ -76,8 +76,6 @@ const char *host_net_devices[] = { NULL, }; -int default_net = 1; - /***********************************************************/ /* network device redirectors */ @@ -1415,18 +1413,6 @@ void net_check_clients(void) NetClientState *nc; int i; - /* Don't warn about the default network setup that you get if - * no command line -net or -netdev options are specified. There - * are two cases that we would otherwise complain about: - * (1) board doesn't support a NIC but the implicit "-net nic" - * requested one - * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic" - * sets up a nic that isn't connected to anything. - */ - if (default_net) { - return; - } - net_hub_check_clients(); QTAILQ_FOREACH(nc, &net_clients, next) { @@ -1483,14 +1469,6 @@ int net_init_clients(void) { QemuOptsList *net = qemu_find_opts("net"); - if (default_net) { - /* if no clients, we use a default config */ - qemu_opts_set(net, NULL, "type", "nic", &error_abort); -#ifdef CONFIG_SLIRP - qemu_opts_set(net, NULL, "type", "user", &error_abort); -#endif - } - net_change_state_entry = qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL); @@ -1521,7 +1499,6 @@ int net_client_parse(QemuOptsList *opts_list, const char *optarg) return -1; } - default_net = 0; return 0; } @@ -1573,3 +1550,73 @@ QemuOptsList qemu_net_opts = { { /* end of list */ } }, }; + +void net_socket_rs_init(SocketReadState *rs, + SocketReadStateFinalize *finalize) +{ + rs->state = 0; + rs->index = 0; + rs->packet_len = 0; + memset(rs->buf, 0, sizeof(rs->buf)); + rs->finalize = finalize; +} + +/* + * Returns + * 0: SocketReadState is not ready + * 1: SocketReadState is ready + * otherwise error occurs + */ +int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size) +{ + unsigned int l; + + while (size > 0) { + /* reassemble a packet from the network */ + switch (rs->state) { /* 0 = getting length, 1 = getting data */ + case 0: + l = 4 - rs->index; + if (l > size) { + l = size; + } + memcpy(rs->buf + rs->index, buf, l); + buf += l; + size -= l; + rs->index += l; + if (rs->index == 4) { + /* got length */ + rs->packet_len = ntohl(*(uint32_t *)rs->buf); + rs->index = 0; + rs->state = 1; + } + break; + case 1: + l = rs->packet_len - rs->index; + if (l > size) { + l = size; + } + if (rs->index + l <= sizeof(rs->buf)) { + memcpy(rs->buf + rs->index, buf, l); + } else { + fprintf(stderr, "serious error: oversized packet received," + "connection terminated.\n"); + rs->index = rs->state = 0; + return -1; + } + + rs->index += l; + buf += l; + size -= l; + if (rs->index >= rs->packet_len) { + rs->index = 0; + rs->state = 0; + if (rs->finalize) { + rs->finalize(rs); + } + return 1; + } + break; + } + } + return 0; +} diff --git a/net/socket.c b/net/socket.c index 9fa2cd8d51..333fb9ecfa 100644 --- a/net/socket.c +++ b/net/socket.c @@ -38,11 +38,8 @@ typedef struct NetSocketState { NetClientState nc; int listen_fd; int fd; - int state; /* 0 = getting length, 1 = getting data */ - unsigned int index; - unsigned int packet_len; + SocketReadState rs; unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */ - uint8_t buf[NET_BUFSIZE]; struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */ IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */ bool read_poll; /* waiting to receive data? */ @@ -143,11 +140,22 @@ static void net_socket_send_completed(NetClientState *nc, ssize_t len) } } +static void net_socket_rs_finalize(SocketReadState *rs) +{ + NetSocketState *s = container_of(rs, NetSocketState, rs); + + if (qemu_send_packet_async(&s->nc, rs->buf, + rs->packet_len, + net_socket_send_completed) == 0) { + net_socket_read_poll(s, false); + } +} + static void net_socket_send(void *opaque) { NetSocketState *s = opaque; int size; - unsigned l; + int ret; uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; @@ -166,61 +174,18 @@ static void net_socket_send(void *opaque) closesocket(s->fd); s->fd = -1; - s->state = 0; - s->index = 0; - s->packet_len = 0; + net_socket_rs_init(&s->rs, net_socket_rs_finalize); s->nc.link_down = true; - memset(s->buf, 0, sizeof(s->buf)); memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); return; } buf = buf1; - while (size > 0) { - /* reassemble a packet from the network */ - switch(s->state) { - case 0: - l = 4 - s->index; - if (l > size) - l = size; - memcpy(s->buf + s->index, buf, l); - buf += l; - size -= l; - s->index += l; - if (s->index == 4) { - /* got length */ - s->packet_len = ntohl(*(uint32_t *)s->buf); - s->index = 0; - s->state = 1; - } - break; - case 1: - l = s->packet_len - s->index; - if (l > size) - l = size; - if (s->index + l <= sizeof(s->buf)) { - memcpy(s->buf + s->index, buf, l); - } else { - fprintf(stderr, "serious error: oversized packet received," - "connection terminated.\n"); - s->state = 0; - goto eoc; - } - s->index += l; - buf += l; - size -= l; - if (s->index >= s->packet_len) { - s->index = 0; - s->state = 0; - if (qemu_send_packet_async(&s->nc, s->buf, s->packet_len, - net_socket_send_completed) == 0) { - net_socket_read_poll(s, false); - break; - } - } - break; - } + ret = net_fill_rstate(&s->rs, buf, size); + + if (ret == -1) { + goto eoc; } } @@ -229,7 +194,7 @@ static void net_socket_send_dgram(void *opaque) NetSocketState *s = opaque; int size; - size = qemu_recv(s->fd, s->buf, sizeof(s->buf), 0); + size = qemu_recv(s->fd, s->rs.buf, sizeof(s->rs.buf), 0); if (size < 0) return; if (size == 0) { @@ -238,7 +203,7 @@ static void net_socket_send_dgram(void *opaque) net_socket_write_poll(s, false); return; } - if (qemu_send_packet_async(&s->nc, s->buf, size, + if (qemu_send_packet_async(&s->nc, s->rs.buf, size, net_socket_send_completed) == 0) { net_socket_read_poll(s, false); } @@ -401,6 +366,7 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, s->fd = fd; s->listen_fd = -1; s->send_fn = net_socket_send_dgram; + net_socket_rs_init(&s->rs, net_socket_rs_finalize); net_socket_read_poll(s, true); /* mcast: save bound address as dst */ @@ -451,6 +417,7 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, s->fd = fd; s->listen_fd = -1; + net_socket_rs_init(&s->rs, net_socket_rs_finalize); /* Disable Nagle algorithm on TCP sockets to reduce latency */ socket_set_nodelay(fd); @@ -769,8 +769,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, return -1; } } else if (tap->has_fds) { - char *fds[MAX_TAP_QUEUES]; - char *vhost_fds[MAX_TAP_QUEUES]; + char **fds = g_new(char *, MAX_TAP_QUEUES); + char **vhost_fds = g_new(char *, MAX_TAP_QUEUES); int nfds, nvhosts; if (tap->has_ifname || tap->has_script || tap->has_downscript || @@ -818,6 +818,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, return -1; } } + g_free(fds); + g_free(vhost_fds); } else if (tap->has_helper) { if (tap->has_ifname || tap->has_script || tap->has_downscript || tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) { diff --git a/tests/Makefile b/tests/Makefile index 4bc041c8c7..a3e20e39ec 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -142,6 +142,8 @@ gcov-files-virtio-y += $(gcov-files-virtioserial-y) check-qtest-pci-y += tests/e1000-test$(EXESUF) gcov-files-pci-y += hw/net/e1000.c +check-qtest-pci-y += tests/e1000e-test$(EXESUF) +gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c check-qtest-pci-y += tests/rtl8139-test$(EXESUF) gcov-files-pci-y += hw/net/rtl8139.c check-qtest-pci-y += tests/pcnet-test$(EXESUF) @@ -198,8 +200,8 @@ check-qtest-i386-y += $(check-qtest-pci-y) gcov-files-i386-y += $(gcov-files-pci-y) check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) gcov-files-i386-y += hw/net/vmxnet3.c -gcov-files-i386-y += hw/net/vmxnet_rx_pkt.c -gcov-files-i386-y += hw/net/vmxnet_tx_pkt.c +gcov-files-i386-y += hw/net/net_rx_pkt.c +gcov-files-i386-y += hw/net/net_tx_pkt.c check-qtest-i386-y += tests/pvpanic-test$(EXESUF) gcov-files-i386-y += i386-softmmu/hw/misc/pvpanic.c check-qtest-i386-y += tests/i82801b11-test$(EXESUF) @@ -551,6 +553,7 @@ tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y) tests/q35-test$(EXESUF): tests/q35-test.o $(libqos-pc-obj-y) tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) tests/e1000-test$(EXESUF): tests/e1000-test.o +tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) tests/pcnet-test$(EXESUF): tests/pcnet-test.o tests/eepro100-test$(EXESUF): tests/eepro100-test.o diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c new file mode 100644 index 0000000000..dbf4859f88 --- /dev/null +++ b/tests/e1000e-test.c @@ -0,0 +1,479 @@ + /* + * QTest testcase for e1000e NIC + * + * Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman <dmitry@daynix.com> + * Leonid Bloch <leonid@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + + +#include "qemu/osdep.h" +#include <glib.h> +#include "libqtest.h" +#include "qemu-common.h" +#include "libqos/pci-pc.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/bitops.h" +#include "libqos/malloc.h" +#include "libqos/malloc-pc.h" +#include "libqos/malloc-generic.h" + +#define E1000E_IMS (0x00d0) + +#define E1000E_STATUS (0x0008) +#define E1000E_STATUS_LU BIT(1) +#define E1000E_STATUS_ASDV1000 BIT(9) + +#define E1000E_CTRL (0x0000) +#define E1000E_CTRL_RESET BIT(26) + +#define E1000E_RCTL (0x0100) +#define E1000E_RCTL_EN BIT(1) +#define E1000E_RCTL_UPE BIT(3) +#define E1000E_RCTL_MPE BIT(4) + +#define E1000E_RFCTL (0x5008) +#define E1000E_RFCTL_EXTEN BIT(15) + +#define E1000E_TCTL (0x0400) +#define E1000E_TCTL_EN BIT(1) + +#define E1000E_CTRL_EXT (0x0018) +#define E1000E_CTRL_EXT_DRV_LOAD BIT(28) +#define E1000E_CTRL_EXT_TXLSFLOW BIT(22) + +#define E1000E_RX0_MSG_ID (0) +#define E1000E_TX0_MSG_ID (1) +#define E1000E_OTHER_MSG_ID (2) + +#define E1000E_IVAR (0x00E4) +#define E1000E_IVAR_TEST_CFG ((E1000E_RX0_MSG_ID << 0) | BIT(3) | \ + (E1000E_TX0_MSG_ID << 8) | BIT(11) | \ + (E1000E_OTHER_MSG_ID << 16) | BIT(19) | \ + BIT(31)) + +#define E1000E_RING_LEN (0x1000) +#define E1000E_TXD_LEN (16) +#define E1000E_RXD_LEN (16) + +#define E1000E_TDBAL (0x3800) +#define E1000E_TDBAH (0x3804) +#define E1000E_TDLEN (0x3808) +#define E1000E_TDH (0x3810) +#define E1000E_TDT (0x3818) + +#define E1000E_RDBAL (0x2800) +#define E1000E_RDBAH (0x2804) +#define E1000E_RDLEN (0x2808) +#define E1000E_RDH (0x2810) +#define E1000E_RDT (0x2818) + +typedef struct e1000e_device { + QPCIDevice *pci_dev; + void *mac_regs; + + uint64_t tx_ring; + uint64_t rx_ring; +} e1000e_device; + +static int test_sockets[2]; +static QGuestAllocator *test_alloc; +static QPCIBus *test_bus; + +static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data) +{ + *(QPCIDevice **) data = dev; +} + +static QPCIDevice *e1000e_device_find(QPCIBus *bus) +{ + static const int e1000e_vendor_id = 0x8086; + static const int e1000e_dev_id = 0x10D3; + + QPCIDevice *e1000e_dev = NULL; + + qpci_device_foreach(bus, e1000e_vendor_id, e1000e_dev_id, + e1000e_pci_foreach_callback, &e1000e_dev); + + g_assert_nonnull(e1000e_dev); + + return e1000e_dev; +} + +static void e1000e_macreg_write(e1000e_device *d, uint32_t reg, uint32_t val) +{ + qpci_io_writel(d->pci_dev, d->mac_regs + reg, val); +} + +static uint32_t e1000e_macreg_read(e1000e_device *d, uint32_t reg) +{ + return qpci_io_readl(d->pci_dev, d->mac_regs + reg); +} + +static void e1000e_device_init(QPCIBus *bus, e1000e_device *d) +{ + uint32_t val; + + d->pci_dev = e1000e_device_find(bus); + + /* Enable the device */ + qpci_device_enable(d->pci_dev); + + /* Map BAR0 (mac registers) */ + d->mac_regs = qpci_iomap(d->pci_dev, 0, NULL); + g_assert_nonnull(d->mac_regs); + + /* Reset the device */ + val = e1000e_macreg_read(d, E1000E_CTRL); + e1000e_macreg_write(d, E1000E_CTRL, val | E1000E_CTRL_RESET); + + /* Enable and configure MSI-X */ + qpci_msix_enable(d->pci_dev); + e1000e_macreg_write(d, E1000E_IVAR, E1000E_IVAR_TEST_CFG); + + /* Check the device status - link and speed */ + val = e1000e_macreg_read(d, E1000E_STATUS); + g_assert_cmphex(val & (E1000E_STATUS_LU | E1000E_STATUS_ASDV1000), + ==, E1000E_STATUS_LU | E1000E_STATUS_ASDV1000); + + /* Initialize TX/RX logic */ + e1000e_macreg_write(d, E1000E_RCTL, 0); + e1000e_macreg_write(d, E1000E_TCTL, 0); + + /* Notify the device that the driver is ready */ + val = e1000e_macreg_read(d, E1000E_CTRL_EXT); + e1000e_macreg_write(d, E1000E_CTRL_EXT, + val | E1000E_CTRL_EXT_DRV_LOAD | E1000E_CTRL_EXT_TXLSFLOW); + + /* Allocate and setup TX ring */ + d->tx_ring = guest_alloc(test_alloc, E1000E_RING_LEN); + g_assert(d->tx_ring != 0); + + e1000e_macreg_write(d, E1000E_TDBAL, (uint32_t) d->tx_ring); + e1000e_macreg_write(d, E1000E_TDBAH, (uint32_t) (d->tx_ring >> 32)); + e1000e_macreg_write(d, E1000E_TDLEN, E1000E_RING_LEN); + e1000e_macreg_write(d, E1000E_TDT, 0); + e1000e_macreg_write(d, E1000E_TDH, 0); + + /* Enable transmit */ + e1000e_macreg_write(d, E1000E_TCTL, E1000E_TCTL_EN); + + /* Allocate and setup RX ring */ + d->rx_ring = guest_alloc(test_alloc, E1000E_RING_LEN); + g_assert(d->rx_ring != 0); + + e1000e_macreg_write(d, E1000E_RDBAL, (uint32_t)d->rx_ring); + e1000e_macreg_write(d, E1000E_RDBAH, (uint32_t)(d->rx_ring >> 32)); + e1000e_macreg_write(d, E1000E_RDLEN, E1000E_RING_LEN); + e1000e_macreg_write(d, E1000E_RDT, 0); + e1000e_macreg_write(d, E1000E_RDH, 0); + + /* Enable receive */ + e1000e_macreg_write(d, E1000E_RFCTL, E1000E_RFCTL_EXTEN); + e1000e_macreg_write(d, E1000E_RCTL, E1000E_RCTL_EN | + E1000E_RCTL_UPE | + E1000E_RCTL_MPE); + + /* Enable all interrupts */ + e1000e_macreg_write(d, E1000E_IMS, 0xFFFFFFFF); +} + +static void e1000e_tx_ring_push(e1000e_device *d, void *descr) +{ + uint32_t tail = e1000e_macreg_read(d, E1000E_TDT); + uint32_t len = e1000e_macreg_read(d, E1000E_TDLEN) / E1000E_TXD_LEN; + + memwrite(d->tx_ring + tail * E1000E_TXD_LEN, descr, E1000E_TXD_LEN); + e1000e_macreg_write(d, E1000E_TDT, (tail + 1) % len); + + /* Read WB data for the packet transmitted */ + memread(d->tx_ring + tail * E1000E_TXD_LEN, descr, E1000E_TXD_LEN); +} + +static void e1000e_rx_ring_push(e1000e_device *d, void *descr) +{ + uint32_t tail = e1000e_macreg_read(d, E1000E_RDT); + uint32_t len = e1000e_macreg_read(d, E1000E_RDLEN) / E1000E_RXD_LEN; + + memwrite(d->rx_ring + tail * E1000E_RXD_LEN, descr, E1000E_RXD_LEN); + e1000e_macreg_write(d, E1000E_RDT, (tail + 1) % len); + + /* Read WB data for the packet received */ + memread(d->rx_ring + tail * E1000E_RXD_LEN, descr, E1000E_RXD_LEN); +} + +static void e1000e_wait_isr(e1000e_device *d, uint16_t msg_id) +{ + guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; + + do { + if (qpci_msix_pending(d->pci_dev, msg_id)) { + return; + } + clock_step(10000); + } while (g_get_monotonic_time() < end_time); + + g_error("Timeout expired"); +} + +static void e1000e_send_verify(e1000e_device *d) +{ + struct { + uint64_t buffer_addr; + union { + uint32_t data; + struct { + uint16_t length; + uint8_t cso; + uint8_t cmd; + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; + uint8_t css; + uint16_t special; + } fields; + } upper; + } descr; + + static const uint32_t dtyp_data = BIT(20); + static const uint32_t dtyp_ext = BIT(29); + static const uint32_t dcmd_rs = BIT(27); + static const uint32_t dcmd_eop = BIT(24); + static const uint32_t dsta_dd = BIT(0); + static const int data_len = 64; + char buffer[64]; + int ret; + uint32_t recv_len; + + /* Prepare test data buffer */ + uint64_t data = guest_alloc(test_alloc, data_len); + memwrite(data, "TEST", 5); + + /* Prepare TX descriptor */ + memset(&descr, 0, sizeof(descr)); + descr.buffer_addr = cpu_to_le64(data); + descr.lower.data = cpu_to_le32(dcmd_rs | + dcmd_eop | + dtyp_ext | + dtyp_data | + data_len); + + /* Put descriptor to the ring */ + e1000e_tx_ring_push(d, &descr); + + /* Wait for TX WB interrupt */ + e1000e_wait_isr(d, E1000E_TX0_MSG_ID); + + /* Check DD bit */ + g_assert_cmphex(le32_to_cpu(descr.upper.data) & dsta_dd, ==, dsta_dd); + + /* Check data sent to the backend */ + ret = qemu_recv(test_sockets[0], &recv_len, sizeof(recv_len), 0); + g_assert_cmpint(ret, == , sizeof(recv_len)); + qemu_recv(test_sockets[0], buffer, 64, 0); + g_assert_cmpstr(buffer, == , "TEST"); + + /* Free test data buffer */ + guest_free(test_alloc, data); +} + +static void e1000e_receive_verify(e1000e_device *d) +{ + union { + struct { + uint64_t buffer_addr; + uint64_t reserved; + } read; + struct { + struct { + uint32_t mrq; + union { + uint32_t rss; + struct { + uint16_t ip_id; + uint16_t csum; + } csum_ip; + } hi_dword; + } lower; + struct { + uint32_t status_error; + uint16_t length; + uint16_t vlan; + } upper; + } wb; + } descr; + + static const uint32_t esta_dd = BIT(0); + + char test[] = "TEST"; + int len = htonl(sizeof(test)); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + },{ + .iov_base = test, + .iov_len = sizeof(test), + }, + }; + + static const int data_len = 64; + char buffer[64]; + int ret; + + /* Send a dummy packet to device's socket*/ + ret = iov_send(test_sockets[0], iov, 2, 0, sizeof(len) + sizeof(test)); + g_assert_cmpint(ret, == , sizeof(test) + sizeof(len)); + + /* Prepare test data buffer */ + uint64_t data = guest_alloc(test_alloc, data_len); + + /* Prepare RX descriptor */ + memset(&descr, 0, sizeof(descr)); + descr.read.buffer_addr = cpu_to_le64(data); + + /* Put descriptor to the ring */ + e1000e_rx_ring_push(d, &descr); + + /* Wait for TX WB interrupt */ + e1000e_wait_isr(d, E1000E_RX0_MSG_ID); + + /* Check DD bit */ + g_assert_cmphex(le32_to_cpu(descr.wb.upper.status_error) & + esta_dd, ==, esta_dd); + + /* Check data sent to the backend */ + memread(data, buffer, sizeof(buffer)); + g_assert_cmpstr(buffer, == , "TEST"); + + /* Free test data buffer */ + guest_free(test_alloc, data); +} + +static void e1000e_device_clear(QPCIBus *bus, e1000e_device *d) +{ + qpci_iounmap(d->pci_dev, d->mac_regs); + qpci_msix_disable(d->pci_dev); +} + +static void data_test_init(e1000e_device *d) +{ + char *cmdline; + + int ret = socketpair(PF_UNIX, SOCK_STREAM, 0, test_sockets); + g_assert_cmpint(ret, != , -1); + + cmdline = g_strdup_printf("-netdev socket,fd=%d,id=hs0 " + "-device e1000e,netdev=hs0", test_sockets[1]); + g_assert_nonnull(cmdline); + + qtest_start(cmdline); + g_free(cmdline); + + test_bus = qpci_init_pc(); + g_assert_nonnull(test_bus); + + test_alloc = pc_alloc_init(); + g_assert_nonnull(test_alloc); + + e1000e_device_init(test_bus, d); +} + +static void data_test_clear(e1000e_device *d) +{ + e1000e_device_clear(test_bus, d); + close(test_sockets[0]); + pc_alloc_uninit(test_alloc); + qpci_free_pc(test_bus); + qtest_end(); +} + +static void test_e1000e_init(gconstpointer data) +{ + e1000e_device d; + + data_test_init(&d); + data_test_clear(&d); +} + +static void test_e1000e_tx(gconstpointer data) +{ + e1000e_device d; + + data_test_init(&d); + e1000e_send_verify(&d); + data_test_clear(&d); +} + +static void test_e1000e_rx(gconstpointer data) +{ + e1000e_device d; + + data_test_init(&d); + e1000e_receive_verify(&d); + data_test_clear(&d); +} + +static void test_e1000e_multiple_transfers(gconstpointer data) +{ + static const long iterations = 4 * 1024; + long i; + + e1000e_device d; + + data_test_init(&d); + + for (i = 0; i < iterations; i++) { + e1000e_send_verify(&d); + e1000e_receive_verify(&d); + } + + data_test_clear(&d); +} + +static void test_e1000e_hotplug(gconstpointer data) +{ + static const uint8_t slot = 0x06; + + qtest_start("-device e1000e"); + + qpci_plug_device_test("e1000e", "e1000e_net", slot, NULL); + qpci_unplug_acpi_device_test("e1000e_net", slot); + + qtest_end(); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_data_func("e1000e/init", NULL, test_e1000e_init); + qtest_add_data_func("e1000e/tx", NULL, test_e1000e_tx); + qtest_add_data_func("e1000e/rx", NULL, test_e1000e_rx); + qtest_add_data_func("e1000e/multiple_transfers", NULL, + test_e1000e_multiple_transfers); + qtest_add_data_func("e1000e/hotplug", NULL, test_e1000e_hotplug); + + return g_test_run(); +} diff --git a/trace-events b/trace-events index b27d1dab65..68ebac9d84 100644 --- a/trace-events +++ b/trace-events @@ -1946,3 +1946,216 @@ gic_set_irq(int irq, int level, int cpumask, int target) "irq %d level %d cpumas gic_update_bestirq(int cpu, int irq, int prio, int priority_mask, int running_priority) "cpu %d irq %d priority %d cpu priority mask %d cpu running priority %d" gic_update_set_irq(int cpu, const char *name, int level) "cpu[%d]: %s = %d" gic_acknowledge_irq(int cpu, int irq) "cpu %d acknowledged irq %d" + +# hw/net/net_rx_pkt.c +net_rx_pkt_parsed(bool ip4, bool ip6, bool udp, bool tcp, size_t l3o, size_t l4o, size_t l5o) "RX packet parsed: ip4: %d, ip6: %d, udp: %d, tcp: %d, l3 offset: %zu, l4 offset: %zu, l5 offset: %zu" +net_rx_pkt_l4_csum_validate_entry(void) "Starting L4 checksum validation" +net_rx_pkt_l4_csum_validate_not_xxp(void) "Not a TCP/UDP packet" +net_rx_pkt_l4_csum_validate_udp_with_no_checksum(void) "UDP packet without checksum" +net_rx_pkt_l4_csum_validate_ip4_fragment(void) "IP4 fragment" +net_rx_pkt_l4_csum_validate_ip4_udp(void) "IP4/UDP packet" +net_rx_pkt_l4_csum_validate_ip4_tcp(void) "IP4/TCP packet" +net_rx_pkt_l4_csum_validate_ip6_udp(void) "IP6/UDP packet" +net_rx_pkt_l4_csum_validate_ip6_tcp(void) "IP6/TCP packet" +net_rx_pkt_l4_csum_validate_csum(bool csum_valid) "Checksum valid: %d" + +net_rx_pkt_l4_csum_calc_entry(void) "Starting L4 checksum calculation" +net_rx_pkt_l4_csum_calc_ip4_udp(void) "IP4/UDP packet" +net_rx_pkt_l4_csum_calc_ip4_tcp(void) "IP4/TCP packet" +net_rx_pkt_l4_csum_calc_ip6_udp(void) "IP6/UDP packet" +net_rx_pkt_l4_csum_calc_ip6_tcp(void) "IP6/TCP packet" +net_rx_pkt_l4_csum_calc_ph_csum(uint32_t cntr, uint16_t csl) "Pseudo-header: checksum counter %u, length %u" +net_rx_pkt_l4_csum_calc_csum(size_t l4hdr_off, uint16_t csl, uint32_t cntr, uint16_t csum) "L4 Checksum: L4 header offset: %zu, length: %u, counter: 0x%X, final checksum: 0x%X" + +net_rx_pkt_l4_csum_fix_entry(void) "Starting L4 checksum correction" +net_rx_pkt_l4_csum_fix_tcp(uint32_t l4_cso) "TCP packet, L4 cso: %u" +net_rx_pkt_l4_csum_fix_udp(uint32_t l4_cso) "UDP packet, L4 cso: %u" +net_rx_pkt_l4_csum_fix_not_xxp(void) "Not an IP4 packet" +net_rx_pkt_l4_csum_fix_ip4_fragment(void) "IP4 fragment" +net_rx_pkt_l4_csum_fix_udp_with_no_checksum(void) "UDP packet without checksum" +net_rx_pkt_l4_csum_fix_csum(uint32_t cso, uint16_t csum) "L4 Checksum: Offset: %u, value 0x%X" + +net_rx_pkt_l3_csum_validate_entry(void) "Starting L3 checksum validation" +net_rx_pkt_l3_csum_validate_not_ip4(void) "Not an IP4 packet" +net_rx_pkt_l3_csum_validate_csum(size_t l3hdr_off, uint32_t csl, uint32_t cntr, uint16_t csum, bool csum_valid) "L3 Checksum: L3 header offset: %zu, length: %u, counter: 0x%X, final checksum: 0x%X, valid: %d" + +net_rx_pkt_rss_ip4(void) "Calculating IPv4 RSS hash" +net_rx_pkt_rss_ip4_tcp(void) "Calculating IPv4/TCP RSS hash" +net_rx_pkt_rss_ip6_tcp(void) "Calculating IPv6/TCP RSS hash" +net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash" +net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash" +net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X" +net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes" + +# hw/net/e1000x_common.c +e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d" +e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X" +e1000x_rx_flt_ucast_match(uint32_t idx, uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x" +e1000x_rx_flt_ucast_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x" +e1000x_rx_flt_inexact_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5, uint32_t mo, uint32_t mta, uint32_t mta_val) "inexact mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x" +e1000x_rx_link_down(uint32_t status_reg) "Received packet dropped because the link is down STATUS = %u" +e1000x_rx_disabled(uint32_t rctl_reg) "Received packet dropped because receive is disabled RCTL = %u" +e1000x_rx_oversized(size_t size) "Received packet dropped because it was oversized (%zu bytes)" +e1000x_mac_indicate(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Indicating MAC to guest: %02x:%02x:%02x:%02x:%02x:%02x" +e1000x_link_negotiation_start(void) "Start link auto negotiation" +e1000x_link_negotiation_done(void) "Auto negotiation is completed" + +# hw/net/e1000e_core.c +e1000e_core_write(uint64_t index, uint32_t size, uint64_t val) "Write to register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64 +e1000e_core_read(uint64_t index, uint32_t size, uint64_t val) "Read from register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64 +e1000e_core_mdic_read(uint8_t page, uint32_t addr, uint32_t data) "MDIC READ: PHY[%u][%u] = 0x%x" +e1000e_core_mdic_read_unhandled(uint8_t page, uint32_t addr) "MDIC READ: PHY[%u][%u] UNHANDLED" +e1000e_core_mdic_write(uint8_t page, uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u][%u] = 0x%x" +e1000e_core_mdic_write_unhandled(uint8_t page, uint32_t addr) "MDIC WRITE: PHY[%u][%u] UNHANDLED" +e1000e_core_eeeprom_write(uint16_t bit_in, uint16_t bit_out, uint16_t reading) "eeprom bitnum in %d out %d, reading %d" +e1000e_core_ctrl_write(uint64_t index, uint32_t val) "Write CTRL register 0x%"PRIx64", value: 0x%X" +e1000e_core_ctrl_sw_reset(void) "Doing SW reset" +e1000e_core_ctrl_phy_reset(void) "Doing PHY reset" + +e1000e_link_autoneg_flowctl(bool enabled) "Auto-negotiated flow control state is %d" +e1000e_link_set_params(bool autodetect, uint32_t speed, bool force_spd, bool force_dplx, bool rx_fctl, bool tx_fctl) "Set link params: Autodetect: %d, Speed: %d, Force speed: %d, Force duplex: %d, RX flow control %d, TX flow control %d" +e1000e_link_read_params(bool autodetect, uint32_t speed, bool force_spd, bool force_dplx, bool rx_fctl, bool tx_fctl) "Get link params: Autodetect: %d, Speed: %d, Force speed: %d, Force duplex: %d, RX flow control %d, TX flow control %d" +e1000e_link_set_ext_params(bool asd_check, bool speed_select_bypass) "Set extended link params: ASD check: %d, Speed select bypass: %d" +e1000e_link_status(bool link_up, bool full_dplx, uint32_t speed, uint32_t asdv) "Link up: %d, Duplex: %d, Speed: %d, ASDV: %d" +e1000e_link_status_changed(bool status) "New link status: %d" + +e1000e_wrn_regs_write_ro(uint64_t index, uint32_t size, uint64_t val) "WARNING: Write to RO register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64 +e1000e_wrn_regs_write_unknown(uint64_t index, uint32_t size, uint64_t val) "WARNING: Write to unknown register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64 +e1000e_wrn_regs_read_unknown(uint64_t index, uint32_t size) "WARNING: Read from unknown register 0x%"PRIx64", %d byte(s)" +e1000e_wrn_regs_read_trivial(uint32_t index) "WARNING: Reading register at offset: 0x%05x. It is not fully implemented." +e1000e_wrn_regs_write_trivial(uint32_t index) "WARNING: Writing to register at offset: 0x%05x. It is not fully implemented." +e1000e_wrn_no_ts_support(void) "WARNING: Guest requested TX timestamping which is not supported" +e1000e_wrn_no_snap_support(void) "WARNING: Guest requested TX SNAP header update which is not supported" +e1000e_wrn_iscsi_filtering_not_supported(void) "WARNING: Guest requested iSCSI filtering which is not supported" +e1000e_wrn_nfsw_filtering_not_supported(void) "WARNING: Guest requested NFS write filtering which is not supported" +e1000e_wrn_nfsr_filtering_not_supported(void) "WARNING: Guest requested NFS read filtering which is not supported" + +e1000e_tx_disabled(void) "TX Disabled" +e1000e_tx_descr(void *addr, uint32_t lower, uint32_t upper) "%p : %x %x" + +e1000e_ring_free_space(int ridx, uint32_t rdlen, uint32_t rdh, uint32_t rdt) "ring #%d: LEN: %u, DH: %u, DT: %u" + +e1000e_rx_can_recv_rings_full(void) "Cannot receive: all rings are full" +e1000e_rx_can_recv(void) "Can receive" +e1000e_rx_has_buffers(int ridx, uint32_t free_desc, size_t total_size, uint32_t desc_buf_size) "ring #%d: free descr: %u, packet size %zu, descr buffer size %u" +e1000e_rx_null_descriptor(void) "Null RX descriptor!!" +e1000e_rx_flt_vlan_mismatch(uint16_t vid) "VID mismatch: 0x%X" +e1000e_rx_flt_vlan_match(uint16_t vid) "VID match: 0x%X" +e1000e_rx_desc_ps_read(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) "buffers: [0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64"]" +e1000e_rx_desc_ps_write(uint16_t a0, uint16_t a1, uint16_t a2, uint16_t a3) "bytes written: [%u, %u, %u, %u]" +e1000e_rx_desc_buff_sizes(uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3) "buffer sizes: [%u, %u, %u, %u]" +e1000e_rx_desc_len(uint8_t rx_desc_len) "RX descriptor length: %u" +e1000e_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" +e1000e_rx_descr(int ridx, uint64_t base, uint8_t len) "Next RX descriptor: ring #%d, PA: 0x%"PRIx64", length: %u" +e1000e_rx_set_rctl(uint32_t rctl) "RCTL = 0x%x" +e1000e_rx_receive_iov(int iovcnt) "Received vector of %d fragments" +e1000e_rx_packet_size(size_t full, size_t vhdr, size_t data) "Received packet of %zu bytes total, %zu virt header, %zu data" +e1000e_rx_flt_dropped(void) "Received packet dropped by RX filter" +e1000e_rx_written_to_guest(uint32_t causes) "Received packet written to guest (ICR causes %u)" +e1000e_rx_not_written_to_guest(uint32_t causes) "Received packet NOT written to guest (ICR causes %u)" +e1000e_rx_interrupt_set(uint32_t causes) "Receive interrupt set (ICR causes %u)" +e1000e_rx_interrupt_delayed(uint32_t causes) "Receive interrupt delayed (ICR causes %u)" +e1000e_rx_set_cso(int cso_state) "RX CSO state set to %d" +e1000e_rx_set_rdt(int queue_idx, uint32_t val) "Setting RDT[%d] = %u" +e1000e_rx_set_rfctl(uint32_t val) "Setting RFCTL = 0x%X" +e1000e_rx_start_recv(void) + +e1000e_rx_rss_started(void) "Starting RSS processing" +e1000e_rx_rss_disabled(void) "RSS is disabled" +e1000e_rx_rss_type(uint32_t type) "RSS type is %u" +e1000e_rx_rss_ip4(bool isfragment, bool istcp, uint32_t mrqc, bool tcpipv4_enabled, bool ipv4_enabled) "RSS IPv4: fragment %d, tcp %d, mrqc 0x%X, tcpipv4 enabled %d, ipv4 enabled %d" +e1000e_rx_rss_ip6(uint32_t rfctl, bool ex_dis, bool new_ex_dis, bool istcp, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: rfctl 0x%X, ex_dis: %d, new_ex_dis: %d, tcp %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6 enabled %d, ipv6ex enabled %d, ipv6 enabled %d" +e1000e_rx_rss_dispatched_to_queue(int queue_idx) "Packet being dispatched to queue %d" + +e1000e_rx_metadata_protocols(bool isip4, bool isip6, bool isudp, bool istcp) "protocols: ip4: %d, ip6: %d, udp: %d, tcp: %d" +e1000e_rx_metadata_vlan(uint16_t vlan_tag) "VLAN tag is 0x%X" +e1000e_rx_metadata_rss(uint32_t rss, uint32_t mrq) "RSS data: rss: 0x%X, mrq: 0x%X" +e1000e_rx_metadata_ip_id(uint16_t ip_id) "the IPv4 ID is 0x%X" +e1000e_rx_metadata_ack(void) "the packet is TCP ACK" +e1000e_rx_metadata_pkt_type(uint32_t pkt_type) "the packet type is %u" +e1000e_rx_metadata_no_virthdr(void) "the packet has no virt-header" +e1000e_rx_metadata_virthdr_no_csum_info(void) "virt-header does not contain checksum info" +e1000e_rx_metadata_l3_cso_disabled(void) "IP4 CSO is disabled" +e1000e_rx_metadata_l4_cso_disabled(void) "TCP/UDP CSO is disabled" +e1000e_rx_metadata_l3_csum_validation_failed(void) "Cannot validate L3 checksum" +e1000e_rx_metadata_l4_csum_validation_failed(void) "Cannot validate L4 checksum" +e1000e_rx_metadata_status_flags(uint32_t status_flags) "status_flags is 0x%X" +e1000e_rx_metadata_ipv6_sum_disabled(void) "IPv6 RX checksummimg disabled by RFCTL" +e1000e_rx_metadata_ipv6_filtering_disabled(void) "IPv6 RX filtering disabled by RFCTL" + +e1000e_vlan_vet(uint16_t vet) "Setting VLAN ethernet type 0x%X" + +e1000e_irq_set_cause(uint32_t cause) "IRQ cause set 0x%x" +e1000e_irq_msi_notify(uint32_t cause) "MSI notify 0x%x" +e1000e_irq_throttling_no_pending_interrupts(void) "No pending interrupts to notify" +e1000e_irq_msi_notify_postponed(void) "Sending MSI postponed by ITR" +e1000e_irq_legacy_notify_postponed(void) "Raising legacy IRQ postponed by ITR" +e1000e_irq_throttling_no_pending_vec(int idx) "No pending interrupts for vector %d" +e1000e_irq_msix_notify_postponed_vec(int idx) "Sending MSI-X postponed by EITR[%d]" +e1000e_irq_msix_notify(uint32_t cause) "MSI-X notify 0x%x" +e1000e_irq_legacy_notify(bool level) "IRQ line state: %d" +e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x" +e1000e_irq_postponed_by_xitr(uint32_t reg) "Interrupt postponed by [E]ITR register 0x%x" +e1000e_irq_clear_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Clearing IMS bits 0x%x: 0x%x --> 0x%x" +e1000e_irq_set_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Setting IMS bits 0x%x: 0x%x --> 0x%x" +e1000e_irq_fix_icr_asserted(uint32_t new_val) "ICR_ASSERTED bit fixed: 0x%x" +e1000e_irq_add_msi_other(uint32_t new_val) "ICR_OTHER bit added: 0x%x" +e1000e_irq_pending_interrupts(uint32_t pending, uint32_t icr, uint32_t ims) "ICR PENDING: 0x%x (ICR: 0x%x, IMS: 0x%x)" +e1000e_irq_set_cause_entry(uint32_t val, uint32_t icr) "Going to set IRQ cause 0x%x, ICR: 0x%x" +e1000e_irq_set_cause_exit(uint32_t val, uint32_t icr) "Set IRQ cause 0x%x, ICR: 0x%x" +e1000e_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x" +e1000e_irq_write_ics(uint32_t val) "Adding ICR bits 0x%x" +e1000e_irq_icr_process_iame(void) "Clearing IMS bits due to IAME" +e1000e_irq_read_ics(uint32_t ics) "Current ICS: 0x%x" +e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x" +e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x" +e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x" +e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS" +e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME" +e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X" +e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X" +e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x" +e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts" +e1000e_irq_rearm_timer(uint32_t reg, int64_t delay_ns) "Mitigation timer armed for register 0x%X, delay %"PRId64" ns" +e1000e_irq_throttling_timer(uint32_t reg) "Mitigation timer shot for register 0x%X" +e1000e_irq_rdtr_fpd_running(void) "FPD written while RDTR was running" +e1000e_irq_rdtr_fpd_not_running(void) "FPD written while RDTR was not running" +e1000e_irq_tidv_fpd_running(void) "FPD written while TIDV was running" +e1000e_irq_tidv_fpd_not_running(void) "FPD written while TIDV was not running" +e1000e_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = %u" +e1000e_irq_itr_set(uint32_t val) "ITR = %u" +e1000e_irq_fire_all_timers(uint32_t val) "Firing all delay/throttling timers on all interrupts enable (0x%X written to IMS)" +e1000e_irq_adding_delayed_causes(uint32_t val, uint32_t icr) "Merging delayed causes 0x%X to ICR 0x%X" +e1000e_irq_msix_pending_clearing(uint32_t cause, uint32_t int_cfg, uint32_t vec) "Clearing MSI-X pending bit for cause 0x%x, IVAR config 0x%x, vector %u" + +e1000e_wrn_msix_vec_wrong(uint32_t cause, uint32_t cfg) "Invalid configuration for cause 0x%x: 0x%x" +e1000e_wrn_msix_invalid(uint32_t cause, uint32_t cfg) "Invalid entry for cause 0x%x: 0x%x" + +e1000e_mac_set_permanent(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Set permanent MAC: %02x:%02x:%02x:%02x:%02x:%02x" +e1000e_mac_set_sw(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Set SW MAC: %02x:%02x:%02x:%02x:%02x:%02x" + +# hw/net/e1000e.c +e1000e_cb_pci_realize(void) "E1000E PCI realize entry" +e1000e_cb_pci_uninit(void) "E1000E PCI unit entry" +e1000e_cb_qdev_reset(void) "E1000E qdev reset entry" +e1000e_cb_pre_save(void) "E1000E pre save entry" +e1000e_cb_post_load(void) "E1000E post load entry" + +e1000e_io_write_addr(uint64_t addr) "IOADDR write 0x%"PRIx64 +e1000e_io_write_data(uint64_t addr, uint64_t val) "IODATA write 0x%"PRIx64", value: 0x%"PRIx64 +e1000e_io_read_addr(uint64_t addr) "IOADDR read 0x%"PRIx64 +e1000e_io_read_data(uint64_t addr, uint64_t val) "IODATA read 0x%"PRIx64", value: 0x%"PRIx64 +e1000e_wrn_io_write_unknown(uint64_t addr) "IO write unknown address 0x%"PRIx64 +e1000e_wrn_io_read_unknown(uint64_t addr) "IO read unknown address 0x%"PRIx64 +e1000e_wrn_io_addr_undefined(uint64_t addr) "IO undefined register 0x%"PRIx64 +e1000e_wrn_io_addr_flash(uint64_t addr) "IO flash access (0x%"PRIx64") not implemented" +e1000e_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64 + +e1000e_msi_init_fail(int32_t res) "Failed to initialize MSI, error %d" +e1000e_msix_init_fail(int32_t res) "Failed to initialize MSI-X, error %d" +e1000e_msix_use_vector_fail(uint32_t vec, int32_t res) "Failed to use MSI-X vector %d, error %d" + +e1000e_cfg_support_virtio(bool support) "Virtio header supported: %d" + +e1000e_vm_state_running(void) "VM state is running" +e1000e_vm_state_stopped(void) "VM state is stopped" @@ -207,6 +207,7 @@ static int default_floppy = 1; static int default_cdrom = 1; static int default_sdcard = 1; static int default_vga = 1; +static int default_net = 1; static struct { const char *driver; @@ -3267,11 +3268,13 @@ int main(int argc, char **argv, char **envp) fd_bootchk = 0; break; case QEMU_OPTION_netdev: + default_net = 0; if (net_client_parse(qemu_find_opts("netdev"), optarg) == -1) { exit(1); } break; case QEMU_OPTION_net: + default_net = 0; if (net_client_parse(qemu_find_opts("net"), optarg) == -1) { exit(1); } @@ -4361,6 +4364,14 @@ int main(int argc, char **argv, char **envp) /* clean up network at qemu process termination */ atexit(&net_cleanup); + if (default_net) { + QemuOptsList *net = qemu_find_opts("net"); + qemu_opts_set(net, NULL, "type", "nic", &error_abort); +#ifdef CONFIG_SLIRP + qemu_opts_set(net, NULL, "type", "user", &error_abort); +#endif + } + if (net_init_clients() < 0) { exit(1); } @@ -4509,7 +4520,18 @@ int main(int argc, char **argv, char **envp) /* Did we create any drives that we failed to create a device for? */ drive_check_orphaned(); - net_check_clients(); + /* Don't warn about the default network setup that you get if + * no command line -net or -netdev options are specified. There + * are two cases that we would otherwise complain about: + * (1) board doesn't support a NIC but the implicit "-net nic" + * requested one + * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic" + * sets up a nic that isn't connected to anything. + */ + if (!default_net) { + net_check_clients(); + } + if (boot_once) { qemu_boot_set(boot_once, &error_fatal); |