diff options
author | Dmitry Fleytman <dmitry.fleytman@ravellosystems.com> | 2016-06-01 11:23:45 +0300 |
---|---|---|
committer | Jason Wang <jasowang@redhat.com> | 2016-06-02 10:42:29 +0800 |
commit | 6f3fbe4ed06a54b14716e619f1053c073fddae49 (patch) | |
tree | 41d5cf1076370bd7c26d757e0fba5342b879560f /hw/net | |
parent | 093454e21d4f6799bb2e549394cd918da1530569 (diff) |
net: Introduce e1000e device emulation
This patch introduces emulation for the Intel 82574 adapter, AKA e1000e.
This implementation is derived from the e1000 emulation code, and
utilizes the TX/RX packet abstractions that were initially developed for
the vmxnet3 device. Although some parts of the introduced code may be
shared with e1000, the differences are substantial enough so that the
only shared resources for the two devices are the definitions in
hw/net/e1000_regs.h.
Similarly to vmxnet3, the new device uses virtio headers for task
offloads (for backends that support virtio extensions). Usage of
virtio headers may be forcibly disabled via a boolean device property
"vnet" (which is enabled by default). In such case task offloads
will be performed in software, in the same way it is done on
backends that do not support virtio headers.
The device code is split into two parts:
1. hw/net/e1000e.c: QEMU-specific code for a network device;
2. hw/net/e1000e_core.[hc]: Device emulation according to the spec.
The new device name is e1000e.
Intel specifications for the 82574 controller are available at:
http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
Throughput measurement results (iperf2):
Fedora 22 guest, TCP, RX
4 ++------------------------------------------+
| |
| X X X X X
3.5 ++ X X X X |
| X |
| |
3 ++ |
G | X |
b | |
/ 2.5 ++ |
s | |
| |
2 ++ |
| |
| |
1.5 X+ |
| |
+ + + + + + + + + + + +
1 ++--+---+---+---+---+---+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Fedora 22 guest, TCP, TX
18 ++-------------------------------------------+
| X |
16 ++ X X X X X
| X |
14 ++ |
| |
12 ++ |
G | X |
b 10 ++ |
/ | |
s 8 ++ |
| |
6 ++ X |
| |
4 ++ |
| X |
2 ++ X |
X + + + + + + + + + + +
0 ++--+---+---+---+---+----+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Fedora 22 guest, UDP, RX
3 ++------------------------------------------+
| X
| |
2.5 ++ |
| |
| |
2 ++ X |
G | |
b | |
/ 1.5 ++ |
s | X |
| |
1 ++ |
| |
| X |
0.5 ++ |
| X |
X + + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Fedora 22 guest, UDP, TX
1 ++------------------------------------------+
| X
0.9 ++ |
| |
0.8 ++ |
0.7 ++ |
| |
G 0.6 ++ |
b | |
/ 0.5 ++ |
s | X |
0.4 ++ |
| |
0.3 ++ |
0.2 ++ X |
| |
0.1 ++ X |
X X + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Windows 2012R2 guest, TCP, RX
3.2 ++------------------------------------------+
| X |
3 ++ |
| |
2.8 ++ |
| |
2.6 ++ X |
G | X X X X X
b 2.4 ++ X X |
/ | |
s 2.2 ++ |
| |
2 ++ |
| X X |
1.8 ++ |
| |
1.6 X+ |
+ + + + + + + + + + + +
1.4 ++--+---+---+---+---+---+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Windows 2012R2 guest, TCP, TX
14 ++-------------------------------------------+
| |
| X X
12 ++ |
| |
10 ++ |
| |
G | |
b 8 ++ |
/ | X |
s 6 ++ |
| |
| |
4 ++ X |
| |
2 ++ |
| X X X |
+ X X + + X X + + + + +
0 X+--+---+---+---+---+----+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Windows 2012R2 guest, UDP, RX
1.6 ++------------------------------------------X
| |
1.4 ++ |
| |
1.2 ++ |
| X |
| |
G 1 ++ |
b | |
/ 0.8 ++ |
s | |
0.6 ++ X |
| |
0.4 ++ |
| X |
| |
0.2 ++ X |
X + + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Windows 2012R2 guest, UDP, TX
0.6 ++------------------------------------------+
| X
| |
0.5 ++ |
| |
| |
0.4 ++ |
G | |
b | |
/ 0.3 ++ X |
s | |
| |
0.2 ++ |
| |
| X |
0.1 ++ |
| X |
X X + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Diffstat (limited to 'hw/net')
-rw-r--r-- | hw/net/Makefile.objs | 1 | ||||
-rw-r--r-- | hw/net/e1000_regs.h | 4 | ||||
-rw-r--r-- | hw/net/e1000e.c | 739 | ||||
-rw-r--r-- | hw/net/e1000e_core.c | 3476 | ||||
-rw-r--r-- | hw/net/e1000e_core.h | 146 |
5 files changed, 4366 insertions, 0 deletions
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index bc69948fed..fe61e9fb2b 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -7,6 +7,7 @@ common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o common-obj-$(CONFIG_E1000_PCI) += e1000.o e1000x_common.o +common-obj-$(CONFIG_E1000E_PCI) += e1000e.o e1000e_core.o e1000x_common.o common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index d62b3fa818..c1acd458f2 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -417,6 +417,10 @@ #define E1000_ICR_ASSERTED BIT(31) #define E1000_EIAC_MASK 0x01F00000 +/* [TR]DBAL and [TR]DLEN masks */ +#define E1000_XDBAL_MASK (~(BIT(4) - 1)) +#define E1000_XDLEN_MASK ((BIT(20) - 1) & (~(BIT(7) - 1))) + /* IVAR register parsing helpers */ #define E1000_IVAR_INT_ALLOC_VALID (0x8) diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c new file mode 100644 index 0000000000..61bcbb6083 --- /dev/null +++ b/hw/net/e1000e.c @@ -0,0 +1,739 @@ +/* +* QEMU INTEL 82574 GbE NIC emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman <dmitry@daynix.com> +* Leonid Bloch <leonid@daynix.com> +* Yan Vugenfirer <yan@daynix.com> +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "qemu/osdep.h" +#include "net/net.h" +#include "net/tap.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#include "hw/net/e1000_regs.h" + +#include "e1000x_common.h" +#include "e1000e_core.h" + +#include "trace.h" + +#define TYPE_E1000E "e1000e" +#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E) + +typedef struct E1000EState { + PCIDevice parent_obj; + NICState *nic; + NICConf conf; + + MemoryRegion mmio; + MemoryRegion flash; + MemoryRegion io; + MemoryRegion msix; + + uint32_t ioaddr; + + uint16_t subsys_ven; + uint16_t subsys; + + uint16_t subsys_ven_used; + uint16_t subsys_used; + + uint32_t intr_state; + bool disable_vnet; + + E1000ECore core; + +} E1000EState; + +#define E1000E_MMIO_IDX 0 +#define E1000E_FLASH_IDX 1 +#define E1000E_IO_IDX 2 +#define E1000E_MSIX_IDX 3 + +#define E1000E_MMIO_SIZE (128 * 1024) +#define E1000E_FLASH_SIZE (128 * 1024) +#define E1000E_IO_SIZE (32) +#define E1000E_MSIX_SIZE (16 * 1024) + +#define E1000E_MSIX_TABLE (0x0000) +#define E1000E_MSIX_PBA (0x2000) + +#define E1000E_USE_MSI BIT(0) +#define E1000E_USE_MSIX BIT(1) + +static uint64_t +e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000EState *s = opaque; + return e1000e_core_read(&s->core, addr, size); +} + +static void +e1000e_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000EState *s = opaque; + e1000e_core_write(&s->core, addr, val, size); +} + +static bool +e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx) +{ + if (s->ioaddr < 0x1FFFF) { + *idx = s->ioaddr; + return true; + } + + if (s->ioaddr < 0x7FFFF) { + trace_e1000e_wrn_io_addr_undefined(s->ioaddr); + return false; + } + + if (s->ioaddr < 0xFFFFF) { + trace_e1000e_wrn_io_addr_flash(s->ioaddr); + return false; + } + + trace_e1000e_wrn_io_addr_unknown(s->ioaddr); + return false; +} + +static uint64_t +e1000e_io_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000EState *s = opaque; + uint32_t idx; + uint64_t val; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_read_addr(s->ioaddr); + return s->ioaddr; + case E1000_IODATA: + if (e1000e_io_get_reg_index(s, &idx)) { + val = e1000e_core_read(&s->core, idx, sizeof(val)); + trace_e1000e_io_read_data(idx, val); + return val; + } + return 0; + default: + trace_e1000e_wrn_io_read_unknown(addr); + return 0; + } +} + +static void +e1000e_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000EState *s = opaque; + uint32_t idx; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_write_addr(val); + s->ioaddr = (uint32_t) val; + return; + case E1000_IODATA: + if (e1000e_io_get_reg_index(s, &idx)) { + trace_e1000e_io_write_data(idx, val); + e1000e_core_write(&s->core, idx, val, sizeof(val)); + } + return; + default: + trace_e1000e_wrn_io_write_unknown(addr); + return; + } +} + +static const MemoryRegionOps mmio_ops = { + .read = e1000e_mmio_read, + .write = e1000e_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps io_ops = { + .read = e1000e_io_read, + .write = e1000e_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static int +e1000e_nc_can_receive(NetClientState *nc) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_can_receive(&s->core); +} + +static ssize_t +e1000e_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_receive_iov(&s->core, iov, iovcnt); +} + +static ssize_t +e1000e_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_receive(&s->core, buf, size); +} + +static void +e1000e_set_link_status(NetClientState *nc) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + e1000e_core_set_link_status(&s->core); +} + +static NetClientInfo net_e1000e_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = e1000e_nc_can_receive, + .receive = e1000e_nc_receive, + .receive_iov = e1000e_nc_receive_iov, + .link_status_changed = e1000e_set_link_status, +}; + +/* +* EEPROM (NVM) contents documented in Table 36, section 6.1 +* and generally 6.1.2 Software accessed words. +*/ +static const uint16_t e1000e_eeprom_template[64] = { + /* Address | Compat. | ImVer | Compat. */ + 0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff, + /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */ + 0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058, + /* NVM words 1,2,3 |-------------------------------|PCI-EID*/ + 0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704, + /* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */ + 0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706, + /* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/ + 0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff, + /* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */ + 0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff, + /* SW Section */ + 0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff, + /* SW Section |CHKSUM */ + 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000, +}; + +static void e1000e_core_realize(E1000EState *s) +{ + s->core.owner = &s->parent_obj; + s->core.owner_nic = s->nic; +} + +static void +e1000e_init_msi(E1000EState *s) +{ + int res; + + res = msi_init(PCI_DEVICE(s), + 0xD0, /* MSI capability offset */ + 1, /* MAC MSI interrupts */ + true, /* 64-bit message addresses supported */ + false); /* Per vector mask supported */ + + if (res > 0) { + s->intr_state |= E1000E_USE_MSI; + } else { + trace_e1000e_msi_init_fail(res); + } +} + +static void +e1000e_cleanup_msi(E1000EState *s) +{ + if (s->intr_state & E1000E_USE_MSI) { + msi_uninit(PCI_DEVICE(s)); + } +} + +static void +e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors) +{ + int i; + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(PCI_DEVICE(s), i); + } +} + +static bool +e1000e_use_msix_vectors(E1000EState *s, int num_vectors) +{ + int i; + for (i = 0; i < num_vectors; i++) { + int res = msix_vector_use(PCI_DEVICE(s), i); + if (res < 0) { + trace_e1000e_msix_use_vector_fail(i, res); + e1000e_unuse_msix_vectors(s, i); + return false; + } + } + return true; +} + +static void +e1000e_init_msix(E1000EState *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM, + &s->msix, + E1000E_MSIX_IDX, E1000E_MSIX_TABLE, + &s->msix, + E1000E_MSIX_IDX, E1000E_MSIX_PBA, + 0xA0); + + if (res < 0) { + trace_e1000e_msix_init_fail(res); + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); + } else { + s->intr_state |= E1000E_USE_MSIX; + } + } +} + +static void +e1000e_cleanup_msix(E1000EState *s) +{ + if (s->intr_state & E1000E_USE_MSIX) { + e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM); + msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix); + } +} + +static void +e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) +{ + DeviceState *dev = DEVICE(pci_dev); + NetClientState *nc; + int i; + + s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + + s->core.max_queue_num = s->conf.peers.queues - 1; + + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr); + + /* Setup virtio headers */ + if (s->disable_vnet) { + s->core.has_vnet = false; + trace_e1000e_cfg_support_virtio(false); + return; + } else { + s->core.has_vnet = true; + } + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) { + s->core.has_vnet = false; + trace_e1000e_cfg_support_virtio(false); + return; + } + } + + trace_e1000e_cfg_support_virtio(true); + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr)); + qemu_using_vnet_hdr(nc->peer, true); + } +} + +static inline uint64_t +e1000e_gen_dsn(uint8_t *mac) +{ + return (uint64_t)(mac[5]) | + (uint64_t)(mac[4]) << 8 | + (uint64_t)(mac[3]) << 16 | + (uint64_t)(0x00FF) << 24 | + (uint64_t)(0x00FF) << 32 | + (uint64_t)(mac[2]) << 40 | + (uint64_t)(mac[1]) << 48 | + (uint64_t)(mac[0]) << 56; +} + +static int +e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) +{ + int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF); + + if (ret >= 0) { + pci_set_word(pdev->config + offset + PCI_PM_PMC, + PCI_PM_CAP_VER_1_1 | + pmc); + + pci_set_word(pdev->wmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK | + PCI_PM_CTRL_PME_ENABLE | + PCI_PM_CTRL_DATA_SEL_MASK); + + pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_PME_STATUS); + } + + return ret; +} + +static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) +{ + E1000EState *s = E1000E(pci_dev); + + pci_default_write_config(pci_dev, address, val, len); + + if (range_covers_byte(address, len, PCI_COMMAND) && + (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } +} + +static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +{ + static const uint16_t e1000e_pmrb_offset = 0x0C8; + static const uint16_t e1000e_pcie_offset = 0x0E0; + static const uint16_t e1000e_aer_offset = 0x100; + static const uint16_t e1000e_dsn_offset = 0x140; + E1000EState *s = E1000E(pci_dev); + uint8_t *macaddr; + + trace_e1000e_cb_pci_realize(); + + pci_dev->config_write = e1000e_write_config; + + pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10; + pci_dev->config[PCI_INTERRUPT_PIN] = 1; + + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys); + + s->subsys_ven_used = s->subsys_ven; + s->subsys_used = s->subsys; + + /* Define IO/MMIO regions */ + memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s, + "e1000e-mmio", E1000E_MMIO_SIZE); + pci_register_bar(pci_dev, E1000E_MMIO_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); + + /* + * We provide a dummy implementation for the flash BAR + * for drivers that may theoretically probe for its presence. + */ + memory_region_init(&s->flash, OBJECT(s), + "e1000e-flash", E1000E_FLASH_SIZE); + pci_register_bar(pci_dev, E1000E_FLASH_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash); + + memory_region_init_io(&s->io, OBJECT(s), &io_ops, s, + "e1000e-io", E1000E_IO_SIZE); + pci_register_bar(pci_dev, E1000E_IO_IDX, + PCI_BASE_ADDRESS_SPACE_IO, &s->io); + + memory_region_init(&s->msix, OBJECT(s), "e1000e-msix", + E1000E_MSIX_SIZE); + pci_register_bar(pci_dev, E1000E_MSIX_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix); + + /* Create networking backend */ + qemu_macaddr_default_if_unset(&s->conf.macaddr); + macaddr = s->conf.macaddr.a; + + e1000e_init_msix(s); + + if (pcie_endpoint_cap_v1_init(pci_dev, e1000e_pcie_offset) < 0) { + hw_error("Failed to initialize PCIe capability"); + } + + e1000e_init_msi(s); + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, + PCI_PM_CAP_DSI) < 0) { + hw_error("Failed to initialize PM capability"); + } + + if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF) < 0) { + hw_error("Failed to initialize AER capability"); + } + + pcie_dev_ser_num_init(pci_dev, e1000e_dsn_offset, + e1000e_gen_dsn(macaddr)); + + e1000e_init_net_peer(s, pci_dev, macaddr); + + /* Initialize core */ + e1000e_core_realize(s); + + e1000e_core_pci_realize(&s->core, + e1000e_eeprom_template, + sizeof(e1000e_eeprom_template), + macaddr); +} + +static void e1000e_pci_uninit(PCIDevice *pci_dev) +{ + E1000EState *s = E1000E(pci_dev); + + trace_e1000e_cb_pci_uninit(); + + e1000e_core_pci_uninit(&s->core); + + pcie_aer_exit(pci_dev); + pcie_cap_exit(pci_dev); + + qemu_del_nic(s->nic); + + e1000e_cleanup_msix(s); + e1000e_cleanup_msi(s); +} + +static void e1000e_qdev_reset(DeviceState *dev) +{ + E1000EState *s = E1000E(dev); + + trace_e1000e_cb_qdev_reset(); + + e1000e_core_reset(&s->core); +} + +static void e1000e_pre_save(void *opaque) +{ + E1000EState *s = opaque; + + trace_e1000e_cb_pre_save(); + + e1000e_core_pre_save(&s->core); +} + +static int e1000e_post_load(void *opaque, int version_id) +{ + E1000EState *s = opaque; + + trace_e1000e_cb_post_load(); + + if ((s->subsys != s->subsys_used) || + (s->subsys_ven != s->subsys_ven_used)) { + fprintf(stderr, + "ERROR: Cannot migrate while device properties " + "(subsys/subsys_ven) differ"); + return -1; + } + + return e1000e_core_post_load(&s->core); +} + +static const VMStateDescription e1000e_vmstate_tx = { + .name = "e1000e-tx", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(props.sum_needed, struct e1000e_tx), + VMSTATE_UINT8(props.ipcss, struct e1000e_tx), + VMSTATE_UINT8(props.ipcso, struct e1000e_tx), + VMSTATE_UINT16(props.ipcse, struct e1000e_tx), + VMSTATE_UINT8(props.tucss, struct e1000e_tx), + VMSTATE_UINT8(props.tucso, struct e1000e_tx), + VMSTATE_UINT16(props.tucse, struct e1000e_tx), + VMSTATE_UINT8(props.hdr_len, struct e1000e_tx), + VMSTATE_UINT16(props.mss, struct e1000e_tx), + VMSTATE_UINT32(props.paylen, struct e1000e_tx), + VMSTATE_INT8(props.ip, struct e1000e_tx), + VMSTATE_INT8(props.tcp, struct e1000e_tx), + VMSTATE_BOOL(props.tse, struct e1000e_tx), + VMSTATE_BOOL(props.cptse, struct e1000e_tx), + VMSTATE_BOOL(skip_cp, struct e1000e_tx), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription e1000e_vmstate_intr_timer = { + .name = "e1000e-intr-timer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(timer, E1000IntrDelayTimer), + VMSTATE_BOOL(running, E1000IntrDelayTimer), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_E1000E_INTR_DELAY_TIMER(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +#define VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \ + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = e1000e_pre_save, + .post_load = e1000e_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCIE_DEVICE(parent_obj, E1000EState), + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), + VMSTATE_UINT32(intr_state, E1000EState), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, + E1000_PSRCTL_BUFFS_PER_DESC), + VMSTATE_UINT32(core.rx_desc_buf_size, E1000EState), + VMSTATE_UINT16_ARRAY(core.eeprom, E1000EState, E1000E_EEPROM_SIZE), + VMSTATE_UINT16_2DARRAY(core.phy, E1000EState, + E1000E_PHY_PAGES, E1000E_PHY_PAGE_SIZE), + VMSTATE_UINT32_ARRAY(core.mac, E1000EState, E1000E_MAC_SIZE), + VMSTATE_UINT8_ARRAY(core.permanent_mac, E1000EState, ETH_ALEN), + + VMSTATE_UINT32(core.delayed_causes, E1000EState), + + VMSTATE_UINT16(subsys, E1000EState), + VMSTATE_UINT16(subsys_ven, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER(core.rdtr, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.radv, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.raid, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.tadv, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.tidv, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER(core.itr, E1000EState), + VMSTATE_BOOL(core.itr_intr_pending, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(core.eitr, E1000EState, + E1000E_MSIX_VEC_NUM), + VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState, + E1000E_MSIX_VEC_NUM), + + VMSTATE_UINT32(core.itr_guest_value, E1000EState), + VMSTATE_UINT32_ARRAY(core.eitr_guest_value, E1000EState, + E1000E_MSIX_VEC_NUM), + + VMSTATE_UINT16(core.vet, E1000EState), + + VMSTATE_STRUCT_ARRAY(core.tx, E1000EState, E1000E_NUM_QUEUES, 0, + e1000e_vmstate_tx, struct e1000e_tx), + VMSTATE_END_OF_LIST() + } +}; + +static PropertyInfo e1000e_prop_disable_vnet, + e1000e_prop_subsys_ven, + e1000e_prop_subsys; + +static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), + DEFINE_PROP_DEFAULT("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_DEFAULT("subsys_ven", E1000EState, subsys_ven, + PCI_VENDOR_ID_INTEL, + e1000e_prop_subsys_ven, uint16_t), + DEFINE_PROP_DEFAULT("subsys", E1000EState, subsys, 0, + e1000e_prop_subsys, uint16_t), + DEFINE_PROP_END_OF_LIST(), +}; + +static void e1000e_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIDeviceClass *c = PCI_DEVICE_CLASS(class); + + c->realize = e1000e_pci_realize; + c->exit = e1000e_pci_uninit; + c->vendor_id = PCI_VENDOR_ID_INTEL; + c->device_id = E1000_DEV_ID_82574L; + c->revision = 0; + c->class_id = PCI_CLASS_NETWORK_ETHERNET; + c->is_express = 1; + + dc->desc = "Intel 82574L GbE Controller"; + dc->reset = e1000e_qdev_reset; + dc->vmsd = &e1000e_vmstate; + dc->props = e1000e_properties; + + e1000e_prop_disable_vnet = qdev_prop_uint8; + e1000e_prop_disable_vnet.description = "Do not use virtio headers, " + "perform SW offloads emulation " + "instead"; + + e1000e_prop_subsys_ven = qdev_prop_uint16; + e1000e_prop_subsys_ven.description = "PCI device Subsystem Vendor ID"; + + e1000e_prop_subsys = qdev_prop_uint16; + e1000e_prop_subsys.description = "PCI device Subsystem ID"; + + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static void e1000e_instance_init(Object *obj) +{ + E1000EState *s = E1000E(obj); + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static const TypeInfo e1000e_info = { + .name = TYPE_E1000E, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(E1000EState), + .class_init = e1000e_class_init, + .instance_init = e1000e_instance_init, +}; + +static void e1000e_register_types(void) +{ + type_register_static(&e1000e_info); +} + +type_init(e1000e_register_types) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c new file mode 100644 index 0000000000..6a44ea1c3f --- /dev/null +++ b/hw/net/e1000e_core.c @@ -0,0 +1,3476 @@ +/* +* Core code for QEMU e1000e emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman <dmitry@daynix.com> +* Leonid Bloch <leonid@daynix.com> +* Yan Vugenfirer <yan@daynix.com> +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "qemu/osdep.h" +#include "sysemu/sysemu.h" +#include "net/net.h" +#include "net/tap.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#include "net_tx_pkt.h" +#include "net_rx_pkt.h" + +#include "e1000x_common.h" +#include "e1000e_core.h" + +#include "trace.h" + +#define E1000E_MIN_XITR (500) /* No more then 7813 interrupts per + second according to spec 10.2.4.2 */ +#define E1000E_MAX_TX_FRAGS (64) + +static void +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val); + +static inline void +e1000e_process_ts_option(E1000ECore *core, struct e1000_tx_desc *dp) +{ + if (le32_to_cpu(dp->upper.data) & E1000_TXD_EXTCMD_TSTAMP) { + trace_e1000e_wrn_no_ts_support(); + } +} + +static inline void +e1000e_process_snap_option(E1000ECore *core, uint32_t cmd_and_length) +{ + if (cmd_and_length & E1000_TXD_CMD_SNAP) { + trace_e1000e_wrn_no_snap_support(); + } +} + +static inline void +e1000e_raise_legacy_irq(E1000ECore *core) +{ + trace_e1000e_irq_legacy_notify(true); + e1000x_inc_reg_if_not_full(core->mac, IAC); + pci_set_irq(core->owner, 1); +} + +static inline void +e1000e_lower_legacy_irq(E1000ECore *core) +{ + trace_e1000e_irq_legacy_notify(false); + pci_set_irq(core->owner, 0); +} + +static inline void +e1000e_intrmgr_rearm_timer(E1000IntrDelayTimer *timer) +{ + int64_t delay_ns = (int64_t) timer->core->mac[timer->delay_reg] * + timer->delay_resolution_ns; + + trace_e1000e_irq_rearm_timer(timer->delay_reg << 2, delay_ns); + + timer_mod(timer->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns); + + timer->running = true; +} + +static void +e1000e_intmgr_timer_resume(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + e1000e_intrmgr_rearm_timer(timer); + } +} + +static void +e1000e_intmgr_timer_pause(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + timer_del(timer->timer); + } +} + +static inline void +e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + timer_del(timer->timer); + timer->running = false; + } +} + +static inline void +e1000e_intrmgr_fire_delayed_interrupts(E1000ECore *core) +{ + trace_e1000e_irq_fire_delayed_interrupts(); + e1000e_set_interrupt_cause(core, 0); +} + +static void +e1000e_intrmgr_on_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + + trace_e1000e_irq_throttling_timer(timer->delay_reg << 2); + + timer->running = false; + e1000e_intrmgr_fire_delayed_interrupts(timer->core); +} + +static void +e1000e_intrmgr_on_throttling_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + + assert(!msix_enabled(timer->core->owner)); + + timer->running = false; + + if (!timer->core->itr_intr_pending) { + trace_e1000e_irq_throttling_no_pending_interrupts(); + return; + } + + if (msi_enabled(timer->core->owner)) { + trace_e1000e_irq_msi_notify_postponed(); + e1000e_set_interrupt_cause(timer->core, 0); + } else { + trace_e1000e_irq_legacy_notify_postponed(); + e1000e_set_interrupt_cause(timer->core, 0); + } +} + +static void +e1000e_intrmgr_on_msix_throttling_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + int idx = timer - &timer->core->eitr[0]; + + assert(msix_enabled(timer->core->owner)); + + timer->running = false; + + if (!timer->core->eitr_intr_pending[idx]) { + trace_e1000e_irq_throttling_no_pending_vec(idx); + return; + } + + trace_e1000e_irq_msix_notify_postponed_vec(idx); + msix_notify(timer->core->owner, idx); +} + +static void +e1000e_intrmgr_initialize_all_timers(E1000ECore *core, bool create) +{ + int i; + + core->radv.delay_reg = RADV; + core->rdtr.delay_reg = RDTR; + core->raid.delay_reg = RAID; + core->tadv.delay_reg = TADV; + core->tidv.delay_reg = TIDV; + + core->radv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->rdtr.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->raid.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->tadv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->tidv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + + core->radv.core = core; + core->rdtr.core = core; + core->raid.core = core; + core->tadv.core = core; + core->tidv.core = core; + + core->itr.core = core; + core->itr.delay_reg = ITR; + core->itr.delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES; + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + core->eitr[i].core = core; + core->eitr[i].delay_reg = EITR + i; + core->eitr[i].delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES; + } + + if (!create) { + return; + } + + core->radv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->radv); + core->rdtr.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->rdtr); + core->raid.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->raid); + + core->tadv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tadv); + core->tidv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tidv); + + core->itr.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + e1000e_intrmgr_on_throttling_timer, + &core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + core->eitr[i].timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + e1000e_intrmgr_on_msix_throttling_timer, + &core->eitr[i]); + } +} + +static inline void +e1000e_intrmgr_stop_delay_timers(E1000ECore *core) +{ + e1000e_intrmgr_stop_timer(&core->radv); + e1000e_intrmgr_stop_timer(&core->rdtr); + e1000e_intrmgr_stop_timer(&core->raid); + e1000e_intrmgr_stop_timer(&core->tidv); + e1000e_intrmgr_stop_timer(&core->tadv); +} + +static bool +e1000e_intrmgr_delay_rx_causes(E1000ECore *core, uint32_t *causes) +{ + uint32_t delayable_causes; + uint32_t rdtr = core->mac[RDTR]; + uint32_t radv = core->mac[RADV]; + uint32_t raid = core->mac[RAID]; + + if (msix_enabled(core->owner)) { + return false; + } + + delayable_causes = E1000_ICR_RXQ0 | + E1000_ICR_RXQ1 | + E1000_ICR_RXT0; + + if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS)) { + delayable_causes |= E1000_ICR_ACK; + } + + /* Clean up all causes that may be delayed */ + core->delayed_causes |= *causes & delayable_causes; + *causes &= ~delayable_causes; + + /* Check if delayed RX interrupts disabled by client + or if there are causes that cannot be delayed */ + if ((rdtr == 0) || (causes != 0)) { + return false; + } + + /* Check if delayed RX ACK interrupts disabled by client + and there is an ACK packet received */ + if ((raid == 0) && (core->delayed_causes & E1000_ICR_ACK)) { + return false; + } + + /* All causes delayed */ + e1000e_intrmgr_rearm_timer(&core->rdtr); + + if (!core->radv.running && (radv != 0)) { + e1000e_intrmgr_rearm_timer(&core->radv); + } + + if (!core->raid.running && (core->delayed_causes & E1000_ICR_ACK)) { + e1000e_intrmgr_rearm_timer(&core->raid); + } + + return true; +} + +static bool +e1000e_intrmgr_delay_tx_causes(E1000ECore *core, uint32_t *causes) +{ + static const uint32_t delayable_causes = E1000_ICR_TXQ0 | + E1000_ICR_TXQ1 | + E1000_ICR_TXQE | + E1000_ICR_TXDW; + + if (msix_enabled(core->owner)) { + return false; + } + + /* Clean up all causes that may be delayed */ + core->delayed_causes |= *causes & delayable_causes; + *causes &= ~delayable_causes; + + /* If there are causes that cannot be delayed */ + if (causes != 0) { + return false; + } + + /* All causes delayed */ + e1000e_intrmgr_rearm_timer(&core->tidv); + + if (!core->tadv.running && (core->mac[TADV] != 0)) { + e1000e_intrmgr_rearm_timer(&core->tadv); + } + + return true; +} + +static uint32_t +e1000e_intmgr_collect_delayed_causes(E1000ECore *core) +{ + uint32_t res; + + if (msix_enabled(core->owner)) { + assert(core->delayed_causes == 0); + return 0; + } + + res = core->delayed_causes; + core->delayed_causes = 0; + + e1000e_intrmgr_stop_delay_timers(core); + + return res; +} + +static void +e1000e_intrmgr_fire_all_timers(E1000ECore *core) +{ + int i; + uint32_t val = e1000e_intmgr_collect_delayed_causes(core); + + trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]); + core->mac[ICR] |= val; + + if (core->itr.running) { + timer_del(core->itr.timer); + e1000e_intrmgr_on_throttling_timer(&core->itr); + } + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + if (core->eitr[i].running) { + timer_del(core->eitr[i].timer); + e1000e_intrmgr_on_msix_throttling_timer(&core->eitr[i]); + } + } +} + +static void +e1000e_intrmgr_resume(E1000ECore *core) +{ + int i; + + e1000e_intmgr_timer_resume(&core->radv); + e1000e_intmgr_timer_resume(&core->rdtr); + e1000e_intmgr_timer_resume(&core->raid); + e1000e_intmgr_timer_resume(&core->tidv); + e1000e_intmgr_timer_resume(&core->tadv); + + e1000e_intmgr_timer_resume(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intmgr_timer_resume(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_pause(E1000ECore *core) +{ + int i; + + e1000e_intmgr_timer_pause(&core->radv); + e1000e_intmgr_timer_pause(&core->rdtr); + e1000e_intmgr_timer_pause(&core->raid); + e1000e_intmgr_timer_pause(&core->tidv); + e1000e_intmgr_timer_pause(&core->tadv); + + e1000e_intmgr_timer_pause(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intmgr_timer_pause(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_reset(E1000ECore *core) +{ + int i; + + core->delayed_causes = 0; + + e1000e_intrmgr_stop_delay_timers(core); + + e1000e_intrmgr_stop_timer(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intrmgr_stop_timer(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_pci_unint(E1000ECore *core) +{ + int i; + + timer_del(core->radv.timer); + timer_free(core->radv.timer); + timer_del(core->rdtr.timer); + timer_free(core->rdtr.timer); + timer_del(core->raid.timer); + timer_free(core->raid.timer); + + timer_del(core->tadv.timer); + timer_free(core->tadv.timer); + timer_del(core->tidv.timer); + timer_free(core->tidv.timer); + + timer_del(core->itr.timer); + timer_free(core->itr.timer); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + timer_del(core->eitr[i].timer); + timer_free(core->eitr[i].timer); + } +} + +static void +e1000e_intrmgr_pci_realize(E1000ECore *core) +{ + e1000e_intrmgr_initialize_all_timers(core, true); +} + +static inline bool +e1000e_rx_csum_enabled(E1000ECore *core) +{ + return (core->mac[RXCSUM] & E1000_RXCSUM_PCSD) ? false : true; +} + +static inline bool +e1000e_rx_use_legacy_descriptor(E1000ECore *core) +{ + return (core->mac[RFCTL] & E1000_RFCTL_EXTEN) ? false : true; +} + +static inline bool +e1000e_rx_use_ps_descriptor(E1000ECore *core) +{ + return !e1000e_rx_use_legacy_descriptor(core) && + (core->mac[RCTL] & E1000_RCTL_DTYP_PS); +} + +static inline bool +e1000e_rss_enabled(E1000ECore *core) +{ + return E1000_MRQC_ENABLED(core->mac[MRQC]) && + !e1000e_rx_csum_enabled(core) && + !e1000e_rx_use_legacy_descriptor(core); +} + +typedef struct E1000E_RSSInfo_st { + bool enabled; + uint32_t hash; + uint32_t queue; + uint32_t type; +} E1000E_RSSInfo; + +static uint32_t +e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt) +{ + bool isip4, isip6, isudp, istcp; + + assert(e1000e_rss_enabled(core)); + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + + if (isip4) { + bool fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + + trace_e1000e_rx_rss_ip4(fragment, istcp, core->mac[MRQC], + E1000_MRQC_EN_TCPIPV4(core->mac[MRQC]), + E1000_MRQC_EN_IPV4(core->mac[MRQC])); + + if (!fragment && istcp && E1000_MRQC_EN_TCPIPV4(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV4TCP; + } + + if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV4; + } + } else if (isip6) { + eth_ip6_hdr_info *ip6info = net_rx_pkt_get_ip6_info(pkt); + + bool ex_dis = core->mac[RFCTL] & E1000_RFCTL_IPV6_EX_DIS; + bool new_ex_dis = core->mac[RFCTL] & E1000_RFCTL_NEW_IPV6_EXT_DIS; + + trace_e1000e_rx_rss_ip6(core->mac[RFCTL], + ex_dis, new_ex_dis, istcp, + ip6info->has_ext_hdrs, + ip6info->rss_ex_dst_valid, + ip6info->rss_ex_src_valid, + core->mac[MRQC], + E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]), + E1000_MRQC_EN_IPV6EX(core->mac[MRQC]), + E1000_MRQC_EN_IPV6(core->mac[MRQC])); + + if ((!ex_dis || !ip6info->has_ext_hdrs) && + (!new_ex_dis || !(ip6info->rss_ex_dst_valid || + ip6info->rss_ex_src_valid))) { + + if (istcp && !ip6info->fragment && + E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6TCP; + } + + if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6EX; + } + + } + + if (E1000_MRQC_EN_IPV6(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6; + } + + } + + return E1000_MRQ_RSS_TYPE_NONE; +} + +static uint32_t +e1000e_rss_calc_hash(E1000ECore *core, + struct NetRxPkt *pkt, + E1000E_RSSInfo *info) +{ + NetRxPktRssType type; + + assert(e1000e_rss_enabled(core)); + + switch (info->type) { + case E1000_MRQ_RSS_TYPE_IPV4: + type = NetPktRssIpV4; + break; + case E1000_MRQ_RSS_TYPE_IPV4TCP: + type = NetPktRssIpV4Tcp; + break; + case E1000_MRQ_RSS_TYPE_IPV6TCP: + type = NetPktRssIpV6Tcp; + break; + case E1000_MRQ_RSS_TYPE_IPV6: + type = NetPktRssIpV6; + break; + case E1000_MRQ_RSS_TYPE_IPV6EX: + type = NetPktRssIpV6Ex; + break; + default: + assert(false); + return 0; + } + + return net_rx_pkt_calc_rss_hash(pkt, type, (uint8_t *) &core->mac[RSSRK]); +} + +static void +e1000e_rss_parse_packet(E1000ECore *core, + struct NetRxPkt *pkt, + E1000E_RSSInfo *info) +{ + trace_e1000e_rx_rss_started(); + + if (!e1000e_rss_enabled(core)) { + info->enabled = false; + info->hash = 0; + info->queue = 0; + info->type = 0; + trace_e1000e_rx_rss_disabled(); + return; + } + + info->enabled = true; + + info->type = e1000e_rss_get_hash_type(core, pkt); + + trace_e1000e_rx_rss_type(info->type); + + if (info->type == E1000_MRQ_RSS_TYPE_NONE) { + info->hash = 0; + info->queue = 0; + return; + } + + info->hash = e1000e_rss_calc_hash(core, pkt, info); + info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash); +} + +static void +e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx) +{ + if (tx->props.tse && tx->props.cptse) { + net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->props.mss); + net_tx_pkt_update_ip_checksums(tx->tx_pkt); + e1000x_inc_reg_if_not_full(core->mac, TSCTC); + return; + } + + if (tx->props.sum_needed & E1000_TXD_POPTS_TXSM) { + net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0); + } + + if (tx->props.sum_needed & E1000_TXD_POPTS_IXSM) { + net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt); + } +} + +static bool +e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index) +{ + int target_queue = MIN(core->max_queue_num, queue_index); + NetClientState *queue = qemu_get_subqueue(core->owner_nic, target_queue); + + e1000e_setup_tx_offloads(core, tx); + + net_tx_pkt_dump(tx->tx_pkt); + + if ((core->phy[0][PHY_CTRL] & MII_CR_LOOPBACK) || + ((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) { + return net_tx_pkt_send_loopback(tx->tx_pkt, queue); + } else { + return net_tx_pkt_send(tx->tx_pkt, queue); + } +} + +static void +e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt) +{ + static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, + PTC1023, PTC1522 }; + + size_t tot_len = net_tx_pkt_get_total_len(tx_pkt); + + e1000x_increase_size_stats(core->mac, PTCregs, tot_len); + e1000x_inc_reg_if_not_full(core->mac, TPT); + e1000x_grow_8reg_if_not_full(core->mac, TOTL, tot_len); + + switch (net_tx_pkt_get_packet_type(tx_pkt)) { + case ETH_PKT_BCAST: + e1000x_inc_reg_if_not_full(core->mac, BPTC); + break; + case ETH_PKT_MCAST: + e1000x_inc_reg_if_not_full(core->mac, MPTC); + break; + case ETH_PKT_UCAST: + break; + default: + g_assert_not_reached(); + } + + core->mac[GPTC] = core->mac[TPT]; + core->mac[GOTCL] = core->mac[TOTL]; + core->mac[GOTCH] = core->mac[TOTH]; +} + +static void +e1000e_process_tx_desc(E1000ECore *core, + struct e1000e_tx *tx, + struct e1000_tx_desc *dp, + int queue_index) +{ + uint32_t txd_lower = le32_to_cpu(dp->lower.data); + uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); + unsigned int split_size = txd_lower & 0xffff; + uint64_t addr; + struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; + bool eop = txd_lower & E1000_TXD_CMD_EOP; + + if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */ + e1000x_read_tx_ctx_descr(xp, &tx->props); + e1000e_process_snap_option(core, le32_to_cpu(xp->cmd_and_length)); + return; + } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { + /* data descriptor */ + tx->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8; + tx->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0; + e1000e_process_ts_option(core, dp); + } else { + /* legacy descriptor */ + e1000e_process_ts_option(core, dp); + tx->props.cptse = 0; + } + + addr = le64_to_cpu(dp->buffer_addr); + + if (!tx->skip_cp) { + if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) { + tx->skip_cp = true; + } + } + + if (eop) { + if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { + if (e1000x_vlan_enabled(core->mac) && + e1000x_is_vlan_txd(txd_lower)) { + net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, + le16_to_cpu(dp->upper.fields.special), core->vet); + } + if (e1000e_tx_pkt_send(core, tx, queue_index)) { + e1000e_on_tx_done_update_stats(core, tx->tx_pkt); + } + } + + tx->skip_cp = false; + net_tx_pkt_reset(tx->tx_pkt); + + tx->props.sum_needed = 0; + tx->props.cptse = 0; + } +} + +static inline uint32_t +e1000e_tx_wb_interrupt_cause(E1000ECore *core, int queue_idx) +{ + if (!msix_enabled(core->owner)) { + return E1000_ICR_TXDW; + } + + return (queue_idx == 0) ? E1000_ICR_TXQ0 : E1000_ICR_TXQ1; +} + +static inline uint32_t +e1000e_rx_wb_interrupt_cause(E1000ECore *core, int queue_idx, + bool min_threshold_hit) +{ + if (!msix_enabled(core->owner)) { + return E1000_ICS_RXT0 | (min_threshold_hit ? E1000_ICS_RXDMT0 : 0); + } + + return (queue_idx == 0) ? E1000_ICR_RXQ0 : E1000_ICR_RXQ1; +} + +static uint32_t +e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base, + struct e1000_tx_desc *dp, bool *ide, int queue_idx) +{ + uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data); + + if (!(txd_lower & E1000_TXD_CMD_RS) && + !(core->mac[IVAR] & E1000_IVAR_TX_INT_EVERY_WB)) { + return 0; + } + + *ide = (txd_lower & E1000_TXD_CMD_IDE) ? true : false; + + txd_upper = le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD; + + dp->upper.data = cpu_to_le32(txd_upper); + pci_dma_write(core->owner, base + ((char *)&dp->upper - (char *)dp), + &dp->upper, sizeof(dp->upper)); + return e1000e_tx_wb_interrupt_cause(core, queue_idx); +} + +typedef struct E1000E_RingInfo_st { + int dbah; + int dbal; + int dlen; + int dh; + int dt; + int idx; +} E1000E_RingInfo; + +static inline bool +e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dh] == core->mac[r->dt]; +} + +static inline uint64_t +e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) +{ + uint64_t bah = core->mac[r->dbah]; + uint64_t bal = core->mac[r->dbal]; + + return (bah << 32) + bal; +} + +static inline uint64_t +e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r) +{ + return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; +} + +static inline void +e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) +{ + core->mac[r->dh] += count; + + if (core->mac[r->dh] * E1000_RING_DESC_LEN >= core->mac[r->dlen]) { + core->mac[r->dh] = 0; + } +} + +static inline uint32_t +e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) +{ + trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], + core->mac[r->dh], core->mac[r->dt]); + + if (core->mac[r->dh] <= core->mac[r->dt]) { + return core->mac[r->dt] - core->mac[r->dh]; + } + + if (core->mac[r->dh] > core->mac[r->dt]) { + return core->mac[r->dlen] / E1000_RING_DESC_LEN + + core->mac[r->dt] - core->mac[r->dh]; + } + + g_assert_not_reached(); + return 0; +} + +static inline bool +e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dlen] > 0; +} + +static inline uint32_t +e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dlen]; +} + +typedef struct E1000E_TxRing_st { + const E1000E_RingInfo *i; + struct e1000e_tx *tx; +} E1000E_TxRing; + +static inline int +e1000e_mq_queue_idx(int base_reg_idx, int reg_idx) +{ + return (reg_idx - base_reg_idx) / (0x100 >> 2); +} + +static inline void +e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) +{ + static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + { TDBAH, TDBAL, TDLEN, TDH, TDT, 0 }, + { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 } + }; + + assert(idx < ARRAY_SIZE(i)); + + txr->i = &i[idx]; + txr->tx = &core->tx[idx]; +} + +typedef struct E1000E_RxRing_st { + const E1000E_RingInfo *i; +} E1000E_RxRing; + +static inline void +e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) +{ + static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, + { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 } + }; + + assert(idx < ARRAY_SIZE(i)); + + rxr->i = &i[idx]; +} + +static void +e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) +{ + dma_addr_t base; + struct e1000_tx_desc desc; + bool ide = false; + const E1000E_RingInfo *txi = txr->i; + uint32_t cause = E1000_ICS_TXQE; + + if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + trace_e1000e_tx_disabled(); + return; + } + + while (!e1000e_ring_empty(core, txi)) { + base = e1000e_ring_head_descr(core, txi); + + pci_dma_read(core->owner, base, &desc, sizeof(desc)); + + trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr, + desc.lower.data, desc.upper.data); + + e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx); + cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx); + + e1000e_ring_advance(core, txi, 1); + } + + if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) { + e1000e_set_interrupt_cause(core, cause); + } +} + +static bool +e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, + size_t total_size) +{ + uint32_t bufs = e1000e_ring_free_descr_num(core, r); + + trace_e1000e_rx_has_buffers(r->idx, bufs, total_size, + core->rx_desc_buf_size); + + return total_size <= bufs / (core->rx_desc_len / E1000_MIN_RX_DESC_LEN) * + core->rx_desc_buf_size; +} + +static inline void +e1000e_start_recv(E1000ECore *core) +{ + int i; + + trace_e1000e_rx_start_recv(); + + for (i = 0; i <= core->max_queue_num; i++) { + qemu_flush_queued_packets(qemu_get_subqueue(core->owner_nic, i)); + } +} + +int +e1000e_can_receive(E1000ECore *core) +{ + int i; + + if (!e1000x_rx_ready(core->owner, core->mac)) { + return false; + } + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + E1000E_RxRing rxr; + + e1000e_rx_ring_init(core, &rxr, i); + if (e1000e_ring_enabled(core, rxr.i) && + e1000e_has_rxbufs(core, rxr.i, 1)) { + trace_e1000e_rx_can_recv(); + return true; + } + } + + trace_e1000e_rx_can_recv_rings_full(); + return false; +} + +ssize_t +e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return e1000e_receive_iov(core, &iov, 1); +} + +static inline bool +e1000e_rx_l3_cso_enabled(E1000ECore *core) +{ + return !!(core->mac[RXCSUM] & E1000_RXCSUM_IPOFLD); +} + +static inline bool +e1000e_rx_l4_cso_enabled(E1000ECore *core) +{ + return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); +} + +static bool +e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size) +{ + uint32_t rctl = core->mac[RCTL]; + + if (e1000x_is_vlan_packet(buf, core->vet) && + e1000x_vlan_rx_filter_enabled(core->mac)) { + uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); + uint32_t vfta = le32_to_cpup((uint32_t *)(core->mac + VFTA) + + ((vid >> 5) & 0x7f)); + if ((vfta & (1 << (vid & 0x1f))) == 0) { + trace_e1000e_rx_flt_vlan_mismatch(vid); + return false; + } else { + trace_e1000e_rx_flt_vlan_match(vid); + } + } + + switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { + case ETH_PKT_UCAST: + if (rctl & E1000_RCTL_UPE) { + return true; /* promiscuous ucast */ + } + break; + + case ETH_PKT_BCAST: + if (rctl & E1000_RCTL_BAM) { + return true; /* broadcast enabled */ + } + break; + + case ETH_PKT_MCAST: + if (rctl & E1000_RCTL_MPE) { + return true; /* promiscuous mcast */ + } + break; + + default: + g_assert_not_reached(); + } + + return e1000x_rx_group_filter(core->mac, buf); +} + +static inline void +e1000e_read_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr) +{ + struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; + *buff_addr = le64_to_cpu(d->buffer_addr); +} + +static inline void +e1000e_read_ext_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr) +{ + union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; + *buff_addr = le64_to_cpu(d->read.buffer_addr); +} + +static inline void +e1000e_read_ps_rx_descr(E1000ECore *core, uint8_t *desc, + hwaddr (*buff_addr)[MAX_PS_BUFFERS]) +{ + int i; + union e1000_rx_desc_packet_split *d = + (union e1000_rx_desc_packet_split *) desc; + + for (i = 0; i < MAX_PS_BUFFERS; i++) { + (*buff_addr)[i] = le64_to_cpu(d->read.buffer_addr[i]); + } + + trace_e1000e_rx_desc_ps_read((*buff_addr)[0], (*buff_addr)[1], + (*buff_addr)[2], (*buff_addr)[3]); +} + +static inline void +e1000e_read_rx_descr(E1000ECore *core, uint8_t *desc, + hwaddr (*buff_addr)[MAX_PS_BUFFERS]) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + e1000e_read_lgcy_rx_descr(core, desc, &(*buff_addr)[0]); + (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0; + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + e1000e_read_ps_rx_descr(core, desc, buff_addr); + } else { + e1000e_read_ext_rx_descr(core, desc, &(*buff_addr)[0]); + (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0; + } + } +} + +static void +e1000e_verify_csum_in_sw(E1000ECore *core, + struct NetRxPkt *pkt, + uint32_t *status_flags, + bool istcp, bool isudp) +{ + bool csum_valid; + uint32_t csum_error; + + if (e1000e_rx_l3_cso_enabled(core)) { + if (!net_rx_pkt_validate_l3_csum(pkt, &csum_valid)) { + trace_e1000e_rx_metadata_l3_csum_validation_failed(); + } else { + csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_IPE; + *status_flags |= E1000_RXD_STAT_IPCS | csum_error; + } + } else { + trace_e1000e_rx_metadata_l3_cso_disabled(); + } + + if (!e1000e_rx_l4_cso_enabled(core)) { + trace_e1000e_rx_metadata_l4_cso_disabled(); + return; + } + + if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) { + trace_e1000e_rx_metadata_l4_csum_validation_failed(); + return; + } + + csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_TCPE; + + if (istcp) { + *status_flags |= E1000_RXD_STAT_TCPCS | + csum_error; + } else if (isudp) { + *status_flags |= E1000_RXD_STAT_TCPCS | + E1000_RXD_STAT_UDPCS | + csum_error; + } +} + +static inline bool +e1000e_is_tcp_ack(E1000ECore *core, struct NetRxPkt *rx_pkt) +{ + if (!net_rx_pkt_is_tcp_ack(rx_pkt)) { + return false; + } + + if (core->mac[RFCTL] & E1000_RFCTL_ACK_DATA_DIS) { + return !net_rx_pkt_has_tcp_data(rx_pkt); + } + + return true; +} + +static void +e1000e_build_rx_metadata(E1000ECore *core, + struct NetRxPkt *pkt, + bool is_eop, + const E1000E_RSSInfo *rss_info, + uint32_t *rss, uint32_t *mrq, + uint32_t *status_flags, + uint16_t *ip_id, + uint16_t *vlan_tag) +{ + struct virtio_net_hdr *vhdr; + bool isip4, isip6, istcp, isudp; + uint32_t pkt_type; + + *status_flags = E1000_RXD_STAT_DD; + + /* No additional metadata needed for non-EOP descriptors */ + if (!is_eop) { + goto func_exit; + } + + *status_flags |= E1000_RXD_STAT_EOP; + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + trace_e1000e_rx_metadata_protocols(isip4, isip6, isudp, istcp); + + /* VLAN state */ + if (net_rx_pkt_is_vlan_stripped(pkt)) { + *status_flags |= E1000_RXD_STAT_VP; + *vlan_tag = cpu_to_le16(net_rx_pkt_get_vlan_tag(pkt)); + trace_e1000e_rx_metadata_vlan(*vlan_tag); + } + + /* Packet parsing results */ + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { + if (rss_info->enabled) { + *rss = cpu_to_le32(rss_info->hash); + *mrq = cpu_to_le32(rss_info->type | (rss_info->queue << 8)); + trace_e1000e_rx_metadata_rss(*rss, *mrq); + } + } else if (isip4) { + *status_flags |= E1000_RXD_STAT_IPIDV; + *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id(*ip_id); + } + + if (istcp && e1000e_is_tcp_ack(core, pkt)) { + *status_flags |= E1000_RXD_STAT_ACK; + trace_e1000e_rx_metadata_ack(); + } + + if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + trace_e1000e_rx_metadata_ipv6_filtering_disabled(); + pkt_type = E1000_RXD_PKT_MAC; + } else if (istcp || isudp) { + pkt_type = isip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP; + } else if (isip4 || isip6) { + pkt_type = isip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6; + } else { + pkt_type = E1000_RXD_PKT_MAC; + } + + *status_flags |= E1000_RXD_PKT_TYPE(pkt_type); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + + /* RX CSO information */ + if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { + trace_e1000e_rx_metadata_ipv6_sum_disabled(); + goto func_exit; + } + + if (!net_rx_pkt_has_virt_hdr(pkt)) { + trace_e1000e_rx_metadata_no_virthdr(); + e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp); + goto func_exit; + } + + vhdr = net_rx_pkt_get_vhdr(pkt); + + if (!(vhdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) && + !(vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { + trace_e1000e_rx_metadata_virthdr_no_csum_info(); + e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp); + goto func_exit; + } + + if (e1000e_rx_l3_cso_enabled(core)) { + *status_flags |= isip4 ? E1000_RXD_STAT_IPCS : 0; + } else { + trace_e1000e_rx_metadata_l3_cso_disabled(); + } + + if (e1000e_rx_l4_cso_enabled(core)) { + if (istcp) { + *status_flags |= E1000_RXD_STAT_TCPCS; + } else if (isudp) { + *status_flags |= E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS; + } + } else { + trace_e1000e_rx_metadata_l4_cso_disabled(); + } + + trace_e1000e_rx_metadata_status_flags(*status_flags); + +func_exit: + *status_flags = cpu_to_le32(*status_flags); +} + +static inline void +e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t length) +{ + uint32_t status_flags, rss, mrq; + uint16_t ip_id; + + struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; + + memset(d, 0, sizeof(*d)); + + assert(!rss_info->enabled); + + d->length = cpu_to_le16(length); + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &rss, &mrq, + &status_flags, &ip_id, + &d->special); + d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); + d->status = (uint8_t) le32_to_cpu(status_flags); +} + +static inline void +e1000e_write_ext_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t length) +{ + union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; + + memset(d, 0, sizeof(*d)); + + d->wb.upper.length = cpu_to_le16(length); + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &d->wb.lower.hi_dword.rss, + &d->wb.lower.mrq, + &d->wb.upper.status_error, + &d->wb.lower.hi_dword.csum_ip.ip_id, + &d->wb.upper.vlan); +} + +static inline void +e1000e_write_ps_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + size_t ps_hdr_len, + uint16_t(*written)[MAX_PS_BUFFERS]) +{ + int i; + union e1000_rx_desc_packet_split *d = + (union e1000_rx_desc_packet_split *) desc; + + memset(d, 0, sizeof(*d)); + + d->wb.middle.length0 = cpu_to_le16((*written)[0]); + + for (i = 0; i < PS_PAGE_BUFFERS; i++) { + d->wb.upper.length[i] = cpu_to_le16((*written)[i + 1]); + } + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &d->wb.lower.hi_dword.rss, + &d->wb.lower.mrq, + &d->wb.middle.status_error, + &d->wb.lower.hi_dword.csum_ip.ip_id, + &d->wb.middle.vlan); + + d->wb.upper.header_status = + cpu_to_le16(ps_hdr_len | (ps_hdr_len ? E1000_RXDPS_HDRSTAT_HDRSP : 0)); + + trace_e1000e_rx_desc_ps_write((*written)[0], (*written)[1], + (*written)[2], (*written)[3]); +} + +static inline void +e1000e_write_rx_descr(E1000ECore *core, uint8_t *desc, +struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, + size_t ps_hdr_len, uint16_t(*written)[MAX_PS_BUFFERS]) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + assert(ps_hdr_len == 0); + e1000e_write_lgcy_rx_descr(core, desc, pkt, rss_info, (*written)[0]); + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + e1000e_write_ps_rx_descr(core, desc, pkt, rss_info, + ps_hdr_len, written); + } else { + assert(ps_hdr_len == 0); + e1000e_write_ext_rx_descr(core, desc, pkt, rss_info, + (*written)[0]); + } + } +} + +typedef struct e1000e_ba_state_st { + uint16_t written[MAX_PS_BUFFERS]; + uint8_t cur_idx; +} e1000e_ba_state; + +static inline void +e1000e_write_hdr_to_rx_buffers(E1000ECore *core, + hwaddr (*ba)[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]); + + pci_dma_write(core->owner, (*ba)[0] + bastate->written[0], data, data_len); + bastate->written[0] += data_len; + + bastate->cur_idx = 1; +} + +static void +e1000e_write_to_rx_buffers(E1000ECore *core, + hwaddr (*ba)[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) +{ + while (data_len > 0) { + uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; + uint32_t cur_buf_bytes_left = cur_buf_len - + bastate->written[bastate->cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_e1000e_rx_desc_buff_write(bastate->cur_idx, + (*ba)[bastate->cur_idx], + bastate->written[bastate->cur_idx], + data, + bytes_to_write); + + pci_dma_write(core->owner, + (*ba)[bastate->cur_idx] + bastate->written[bastate->cur_idx], + data, bytes_to_write); + + bastate->written[bastate->cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (bastate->written[bastate->cur_idx] == cur_buf_len) { + bastate->cur_idx++; + } + + assert(bastate->cur_idx < MAX_PS_BUFFERS); + } +} + +static void +e1000e_update_rx_stats(E1000ECore *core, + size_t data_size, + size_t data_fcs_size) +{ + e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); + + switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { + case ETH_PKT_BCAST: + e1000x_inc_reg_if_not_full(core->mac, BPRC); + break; + + case ETH_PKT_MCAST: + e1000x_inc_reg_if_not_full(core->mac, MPRC); + break; + + default: + break; + } +} + +static inline bool +e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi) +{ + return e1000e_ring_free_descr_num(core, rxi) == + e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift; +} + +static bool +e1000e_do_ps(E1000ECore *core, struct NetRxPkt *pkt, size_t *hdr_len) +{ + bool isip4, isip6, isudp, istcp; + bool fragment; + + if (!e1000e_rx_use_ps_descriptor(core)) { + return false; + } + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + + if (isip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (isip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + return false; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + return false; + } + + if (!fragment && (isudp || istcp)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + if ((*hdr_len > core->rxbuf_sizes[0]) || + (*hdr_len > net_rx_pkt_get_total_len(pkt))) { + return false; + } + + return true; +} + +static void +e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, + const E1000E_RxRing *rxr, + const E1000E_RSSInfo *rss_info) +{ + PCIDevice *d = core->owner; + dma_addr_t base; + uint8_t desc[E1000_MAX_RX_DESC_LEN]; + size_t desc_size; + size_t desc_offset = 0; + size_t iov_ofs = 0; + + struct iovec *iov = net_rx_pkt_get_iovec(pkt); + size_t size = net_rx_pkt_get_total_len(pkt); + size_t total_size = size + e1000x_fcs_len(core->mac); + const E1000E_RingInfo *rxi; + size_t ps_hdr_len = 0; + bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len); + + rxi = rxr->i; + + do { + hwaddr ba[MAX_PS_BUFFERS]; + e1000e_ba_state bastate = { { 0 } }; + bool is_last = false; + bool is_first = true; + + desc_size = total_size - desc_offset; + + if (desc_size > core->rx_desc_buf_size) { + desc_size = core->rx_desc_buf_size; + } + + base = e1000e_ring_head_descr(core, rxi); + + pci_dma_read(d, base, &desc, core->rx_desc_len); + + trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); + + e1000e_read_rx_descr(core, desc, &ba); + + if (ba[0]) { + if (desc_offset < size) { + static const uint32_t fcs_pad; + size_t iov_copy; + size_t copy_size = size - desc_offset; + if (copy_size > core->rx_desc_buf_size) { + copy_size = core->rx_desc_buf_size; + } + + /* For PS mode copy the packet header first */ + if (do_ps) { + if (is_first) { + size_t ps_hdr_copied = 0; + do { + iov_copy = MIN(ps_hdr_len - ps_hdr_copied, + iov->iov_len - iov_ofs); + + e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate, + iov->iov_base, iov_copy); + + copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } while (ps_hdr_copied < ps_hdr_len); + + is_first = false; + } else { + /* Leave buffer 0 of each descriptor except first */ + /* empty as per spec 7.1.5.1 */ + e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate, + NULL, 0); + } + } + + /* Copy packet payload */ + while (copy_size) { + iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); + + e1000e_write_to_rx_buffers(core, &ba, &bastate, + iov->iov_base + iov_ofs, iov_copy); + + copy_size -= iov_copy; + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } + + if (desc_offset + desc_size >= total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + e1000e_write_to_rx_buffers(core, &ba, &bastate, + (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); + } + } + desc_offset += desc_size; + if (desc_offset >= total_size) { + is_last = true; + } + } else { /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + } + + e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, + rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); + pci_dma_write(d, base, &desc, core->rx_desc_len); + + e1000e_ring_advance(core, rxi, + core->rx_desc_len / E1000_MIN_RX_DESC_LEN); + + } while (desc_offset < total_size); + + e1000e_update_rx_stats(core, size, total_size); +} + +static inline void +e1000e_rx_fix_l4_csum(E1000ECore *core, struct NetRxPkt *pkt) +{ + if (net_rx_pkt_has_virt_hdr(pkt)) { + struct virtio_net_hdr *vhdr = net_rx_pkt_get_vhdr(pkt); + + if (vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + net_rx_pkt_fix_l4_csum(pkt); + } + } +} + +ssize_t +e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt) +{ + static const int maximum_ethernet_hdr_len = (14 + 4); + /* Min. octets in an ethernet frame sans FCS */ + static const int min_buf_size = 60; + + uint32_t n = 0; + uint8_t min_buf[min_buf_size]; + struct iovec min_iov; + uint8_t *filter_buf; + size_t size, orig_size; + size_t iov_ofs = 0; + E1000E_RxRing rxr; + E1000E_RSSInfo rss_info; + size_t total_size; + ssize_t retval; + bool rdmts_hit; + + trace_e1000e_rx_receive_iov(iovcnt); + + if (!e1000x_hw_rx_enabled(core->mac)) { + return -1; + } + + /* Pull virtio header in */ + if (core->has_vnet) { + net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt); + iov_ofs = sizeof(struct virtio_net_hdr); + } + + filter_buf = iov->iov_base + iov_ofs; + orig_size = iov_size(iov, iovcnt); + size = orig_size - iov_ofs; + + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + e1000x_inc_reg_if_not_full(core->mac, RUC); + min_iov.iov_base = filter_buf = min_buf; + min_iov.iov_len = size = sizeof(min_buf); + iovcnt = 1; + iov = &min_iov; + iov_ofs = 0; + } else if (iov->iov_len < maximum_ethernet_hdr_len) { + /* This is very unlikely, but may happen. */ + iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len); + filter_buf = min_buf; + } + + /* Discard oversized packets if !LPE and !SBP. */ + if (e1000x_is_oversized(core->mac, size)) { + return orig_size; + } + + net_rx_pkt_set_packet_type(core->rx_pkt, + get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf))); + + if (!e1000e_receive_filter(core, filter_buf, size)) { + trace_e1000e_rx_flt_dropped(); + return orig_size; + } + + net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, + e1000x_vlan_enabled(core->mac), core->vet); + + e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info); + e1000e_rx_ring_init(core, &rxr, rss_info.queue); + + trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx); + + total_size = net_rx_pkt_get_total_len(core->rx_pkt) + + e1000x_fcs_len(core->mac); + + if (e1000e_has_rxbufs(core, rxr.i, total_size)) { + e1000e_rx_fix_l4_csum(core, core->rx_pkt); + + e1000e_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); + + retval = orig_size; + + /* Perform small receive detection (RSRPD) */ + if (total_size < core->mac[RSRPD]) { + n |= E1000_ICS_SRPD; + } + + /* Perform ACK receive detection */ + if (e1000e_is_tcp_ack(core, core->rx_pkt)) { + n |= E1000_ICS_ACK; + } + + /* Check if receive descriptor minimum threshold hit */ + rdmts_hit = e1000e_rx_descr_threshold_hit(core, rxr.i); + n |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit); + + trace_e1000e_rx_written_to_guest(n); + } else { + n |= E1000_ICS_RXO; + retval = 0; + + trace_e1000e_rx_not_written_to_guest(n); + } + + if (!e1000e_intrmgr_delay_rx_causes(core, &n)) { + trace_e1000e_rx_interrupt_set(n); + e1000e_set_interrupt_cause(core, n); + } else { + trace_e1000e_rx_interrupt_delayed(n); + } + + return retval; +} + +static inline bool +e1000e_have_autoneg(E1000ECore *core) +{ + return core->phy[0][PHY_CTRL] & MII_CR_AUTO_NEG_EN; +} + +static void e1000e_update_flowctl_status(E1000ECore *core) +{ + if (e1000e_have_autoneg(core) && + core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE) { + trace_e1000e_link_autoneg_flowctl(true); + core->mac[CTRL] |= E1000_CTRL_TFCE | E1000_CTRL_RFCE; + } else { + trace_e1000e_link_autoneg_flowctl(false); + } +} + +static inline void +e1000e_link_down(E1000ECore *core) +{ + e1000x_update_regs_on_link_down(core->mac, core->phy[0]); + e1000e_update_flowctl_status(core); +} + +static inline void +e1000e_set_phy_ctrl(E1000ECore *core, int index, uint16_t val) +{ + /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */ + core->phy[0][PHY_CTRL] = val & ~(0x3f | + MII_CR_RESET | + MII_CR_RESTART_AUTO_NEG); + + if ((val & MII_CR_RESTART_AUTO_NEG) && + e1000e_have_autoneg(core)) { + e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer); + } +} + +static void +e1000e_set_phy_oem_bits(E1000ECore *core, int index, uint16_t val) +{ + core->phy[0][PHY_OEM_BITS] = val & ~BIT(10); + + if (val & BIT(10)) { + e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer); + } +} + +static void +e1000e_set_phy_page(E1000ECore *core, int index, uint16_t val) +{ + core->phy[0][PHY_PAGE] = val & PHY_PAGE_RW_MASK; +} + +void +e1000e_core_set_link_status(E1000ECore *core) +{ + NetClientState *nc = qemu_get_queue(core->owner_nic); + uint32_t old_status = core->mac[STATUS]; + + trace_e1000e_link_status_changed(nc->link_down ? false : true); + + if (nc->link_down) { + e1000x_update_regs_on_link_down(core->mac, core->phy[0]); + } else { + if (e1000e_have_autoneg(core) && + !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + e1000x_restart_autoneg(core->mac, core->phy[0], + core->autoneg_timer); + } else { + e1000x_update_regs_on_link_up(core->mac, core->phy[0]); + } + } + + if (core->mac[STATUS] != old_status) { + e1000e_set_interrupt_cause(core, E1000_ICR_LSC); + } +} + +static void +e1000e_set_ctrl(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_core_ctrl_write(index, val); + + /* RST is self clearing */ + core->mac[CTRL] = val & ~E1000_CTRL_RST; + core->mac[CTRL_DUP] = core->mac[CTRL]; + + trace_e1000e_link_set_params( + !!(val & E1000_CTRL_ASDE), + (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT, + !!(val & E1000_CTRL_FRCSPD), + !!(val & E1000_CTRL_FRCDPX), + !!(val & E1000_CTRL_RFCE), + !!(val & E1000_CTRL_TFCE)); + + if (val & E1000_CTRL_RST) { + trace_e1000e_core_ctrl_sw_reset(); + e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + } + + if (val & E1000_CTRL_PHY_RST) { + trace_e1000e_core_ctrl_phy_reset(); + core->mac[STATUS] |= E1000_STATUS_PHYRA; + } +} + +static void +e1000e_set_rfctl(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_rx_set_rfctl(val); + + if (!(val & E1000_RFCTL_ISCSI_DIS)) { + trace_e1000e_wrn_iscsi_filtering_not_supported(); + } + + if (!(val & E1000_RFCTL_NFSW_DIS)) { + trace_e1000e_wrn_nfsw_filtering_not_supported(); + } + + if (!(val & E1000_RFCTL_NFSR_DIS)) { + trace_e1000e_wrn_nfsr_filtering_not_supported(); + } + + core->mac[RFCTL] = val; +} + +static void +e1000e_calc_per_desc_buf_size(E1000ECore *core) +{ + int i; + core->rx_desc_buf_size = 0; + + for (i = 0; i < ARRAY_SIZE(core->rxbuf_sizes); i++) { + core->rx_desc_buf_size += core->rxbuf_sizes[i]; + } +} + +static void +e1000e_parse_rxbufsize(E1000ECore *core) +{ + uint32_t rctl = core->mac[RCTL]; + + memset(core->rxbuf_sizes, 0, sizeof(core->rxbuf_sizes)); + + if (rctl & E1000_RCTL_DTYP_MASK) { + uint32_t bsize; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE0_MASK; + core->rxbuf_sizes[0] = (bsize >> E1000_PSRCTL_BSIZE0_SHIFT) * 128; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE1_MASK; + core->rxbuf_sizes[1] = (bsize >> E1000_PSRCTL_BSIZE1_SHIFT) * 1024; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE2_MASK; + core->rxbuf_sizes[2] = (bsize >> E1000_PSRCTL_BSIZE2_SHIFT) * 1024; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE3_MASK; + core->rxbuf_sizes[3] = (bsize >> E1000_PSRCTL_BSIZE3_SHIFT) * 1024; + } else if (rctl & E1000_RCTL_FLXBUF_MASK) { + int flxbuf = rctl & E1000_RCTL_FLXBUF_MASK; + core->rxbuf_sizes[0] = (flxbuf >> E1000_RCTL_FLXBUF_SHIFT) * 1024; + } else { + core->rxbuf_sizes[0] = e1000x_rxbufsize(rctl); + } + + trace_e1000e_rx_desc_buff_sizes(core->rxbuf_sizes[0], core->rxbuf_sizes[1], + core->rxbuf_sizes[2], core->rxbuf_sizes[3]); + + e1000e_calc_per_desc_buf_size(core); +} + +static void +e1000e_calc_rxdesclen(E1000ECore *core) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + core->rx_desc_len = sizeof(struct e1000_rx_desc); + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + core->rx_desc_len = sizeof(union e1000_rx_desc_packet_split); + } else { + core->rx_desc_len = sizeof(union e1000_rx_desc_extended); + } + } + trace_e1000e_rx_desc_len(core->rx_desc_len); +} + +static void +e1000e_set_rx_control(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RCTL] = val; + trace_e1000e_rx_set_rctl(core->mac[RCTL]); + + if (val & E1000_RCTL_EN) { + e1000e_parse_rxbufsize(core); + e1000e_calc_rxdesclen(core); + core->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1 + + E1000_RING_DESC_LEN_SHIFT; + + e1000e_start_recv(core); + } +} + +static +void(*e1000e_phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]) +(E1000ECore *, int, uint16_t) = { + [0] = { + [PHY_CTRL] = e1000e_set_phy_ctrl, + [PHY_PAGE] = e1000e_set_phy_page, + [PHY_OEM_BITS] = e1000e_set_phy_oem_bits + } +}; + +static inline void +e1000e_clear_ims_bits(E1000ECore *core, uint32_t bits) +{ + trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits); + core->mac[IMS] &= ~bits; +} + +static inline bool +e1000e_postpone_interrupt(bool *interrupt_pending, + E1000IntrDelayTimer *timer) +{ + if (timer->running) { + trace_e1000e_irq_postponed_by_xitr(timer->delay_reg << 2); + + *interrupt_pending = true; + return true; + } + + if (timer->core->mac[timer->delay_reg] != 0) { + e1000e_intrmgr_rearm_timer(timer); + } + + return false; +} + +static inline bool +e1000e_itr_should_postpone(E1000ECore *core) +{ + return e1000e_postpone_interrupt(&core->itr_intr_pending, &core->itr); +} + +static inline bool +e1000e_eitr_should_postpone(E1000ECore *core, int idx) +{ + return e1000e_postpone_interrupt(&core->eitr_intr_pending[idx], + &core->eitr[idx]); +} + +static void +e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) +{ + uint32_t effective_eiac; + + if (E1000_IVAR_ENTRY_VALID(int_cfg)) { + uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg); + if (vec < E1000E_MSIX_VEC_NUM) { + if (!e1000e_eitr_should_postpone(core, vec)) { + trace_e1000e_irq_msix_notify_vec(vec); + msix_notify(core->owner, vec); + } + } else { + trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg); + } + } else { + trace_e1000e_wrn_msix_invalid(cause, int_cfg); + } + + if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) { + trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause); + e1000e_clear_ims_bits(core, core->mac[IAM] & cause); + } + + trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]); + + if (core->mac[EIAC] & E1000_ICR_OTHER) { + effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) | + E1000_ICR_OTHER_CAUSES; + } else { + effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK; + } + core->mac[ICR] &= ~effective_eiac; +} + +static void +e1000e_msix_notify(E1000ECore *core, uint32_t causes) +{ + if (causes & E1000_ICR_RXQ0) { + e1000e_msix_notify_one(core, E1000_ICR_RXQ0, + E1000_IVAR_RXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_RXQ1) { + e1000e_msix_notify_one(core, E1000_ICR_RXQ1, + E1000_IVAR_RXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ0) { + e1000e_msix_notify_one(core, E1000_ICR_TXQ0, + E1000_IVAR_TXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ1) { + e1000e_msix_notify_one(core, E1000_ICR_TXQ1, + E1000_IVAR_TXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_OTHER) { + e1000e_msix_notify_one(core, E1000_ICR_OTHER, + E1000_IVAR_OTHER(core->mac[IVAR])); + } +} + +static void +e1000e_msix_clear_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) +{ + if (E1000_IVAR_ENTRY_VALID(int_cfg)) { + uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg); + if (vec < E1000E_MSIX_VEC_NUM) { + trace_e1000e_irq_msix_pending_clearing(cause, int_cfg, vec); + msix_clr_pending(core->owner, vec); + } else { + trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg); + } + } else { + trace_e1000e_wrn_msix_invalid(cause, int_cfg); + } +} + +static void +e1000e_msix_clear(E1000ECore *core, uint32_t causes) +{ + if (causes & E1000_ICR_RXQ0) { + e1000e_msix_clear_one(core, E1000_ICR_RXQ0, + E1000_IVAR_RXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_RXQ1) { + e1000e_msix_clear_one(core, E1000_ICR_RXQ1, + E1000_IVAR_RXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ0) { + e1000e_msix_clear_one(core, E1000_ICR_TXQ0, + E1000_IVAR_TXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ1) { + e1000e_msix_clear_one(core, E1000_ICR_TXQ1, + E1000_IVAR_TXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_OTHER) { + e1000e_msix_clear_one(core, E1000_ICR_OTHER, + E1000_IVAR_OTHER(core->mac[IVAR])); + } +} + +static inline void +e1000e_fix_icr_asserted(E1000ECore *core) +{ + core->mac[ICR] &= ~E1000_ICR_ASSERTED; + if (core->mac[ICR]) { + core->mac[ICR] |= E1000_ICR_ASSERTED; + } + + trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]); +} + +static void +e1000e_send_msi(E1000ECore *core, bool msix) +{ + uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED; + + if (msix) { + e1000e_msix_notify(core, causes); + } else { + if (!e1000e_itr_should_postpone(core)) { + trace_e1000e_irq_msi_notify(causes); + msi_notify(core->owner, 0); + } + } +} + +static void +e1000e_update_interrupt_state(E1000ECore *core) +{ + bool interrupts_pending; + bool is_msix = msix_enabled(core->owner); + + /* Set ICR[OTHER] for MSI-X */ + if (is_msix) { + if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) { + core->mac[ICR] |= E1000_ICR_OTHER; + trace_e1000e_irq_add_msi_other(core->mac[ICR]); + } + } + + e1000e_fix_icr_asserted(core); + + /* + * Make sure ICR and ICS registers have the same value. + * The spec says that the ICS register is write-only. However in practice, + * on real hardware ICS is readable, and for reads it has the same value as + * ICR (except that ICS does not have the clear on read behaviour of ICR). + * + * The VxWorks PRO/1000 driver uses this behaviour. + */ + core->mac[ICS] = core->mac[ICR]; + + interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false; + + trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], + core->mac[ICR], core->mac[IMS]); + + if (is_msix || msi_enabled(core->owner)) { + if (interrupts_pending) { + e1000e_send_msi(core, is_msix); + } + } else { + if (interrupts_pending) { + if (!e1000e_itr_should_postpone(core)) { + e1000e_raise_legacy_irq(core); + } + } else { + e1000e_lower_legacy_irq(core); + } + } +} + +static inline void +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) +{ + trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); + + val |= e1000e_intmgr_collect_delayed_causes(core); + core->mac[ICR] |= val; + + trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]); + + e1000e_update_interrupt_state(core); +} + +static inline void +e1000e_autoneg_timer(void *opaque) +{ + E1000ECore *core = opaque; + if (!qemu_get_queue(core->owner_nic)->link_down) { + e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]); + e1000e_update_flowctl_status(core); + /* signal link status change to the guest */ + e1000e_set_interrupt_cause(core, E1000_ICR_LSC); + } +} + +static inline uint16_t +e1000e_get_reg_index_with_offset(const uint16_t *mac_reg_access, hwaddr addr) +{ + uint16_t index = (addr & 0x1ffff) >> 2; + return index + (mac_reg_access[index] & 0xfffe); +} + +static const char e1000e_phy_regcap[E1000E_PHY_PAGES][0x20] = { + [0] = { + [PHY_CTRL] = PHY_ANYPAGE | PHY_RW, + [PHY_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_ID1] = PHY_ANYPAGE | PHY_R, + [PHY_ID2] = PHY_ANYPAGE | PHY_R, + [PHY_AUTONEG_ADV] = PHY_ANYPAGE | PHY_RW, + [PHY_LP_ABILITY] = PHY_ANYPAGE | PHY_R, + [PHY_AUTONEG_EXP] = PHY_ANYPAGE | PHY_R, + [PHY_NEXT_PAGE_TX] = PHY_ANYPAGE | PHY_RW, + [PHY_LP_NEXT_PAGE] = PHY_ANYPAGE | PHY_R, + [PHY_1000T_CTRL] = PHY_ANYPAGE | PHY_RW, + [PHY_1000T_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_EXT_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_PAGE] = PHY_ANYPAGE | PHY_RW, + + [PHY_COPPER_CTRL1] = PHY_RW, + [PHY_COPPER_STAT1] = PHY_R, + [PHY_COPPER_CTRL3] = PHY_RW, + [PHY_RX_ERR_CNTR] = PHY_R, + [PHY_OEM_BITS] = PHY_RW, + [PHY_BIAS_1] = PHY_RW, + [PHY_BIAS_2] = PHY_RW, + [PHY_COPPER_INT_ENABLE] = PHY_RW, + [PHY_COPPER_STAT2] = PHY_R, + [PHY_COPPER_CTRL2] = PHY_RW + }, + [2] = { + [PHY_MAC_CTRL1] = PHY_RW, + [PHY_MAC_INT_ENABLE] = PHY_RW, + [PHY_MAC_STAT] = PHY_R, + [PHY_MAC_CTRL2] = PHY_RW + }, + [3] = { + [PHY_LED_03_FUNC_CTRL1] = PHY_RW, + [PHY_LED_03_POL_CTRL] = PHY_RW, + [PHY_LED_TIMER_CTRL] = PHY_RW, + [PHY_LED_45_CTRL] = PHY_RW + }, + [5] = { + [PHY_1000T_SKEW] = PHY_R, + [PHY_1000T_SWAP] = PHY_R + }, + [6] = { + [PHY_CRC_COUNTERS] = PHY_R + } +}; + +static bool +e1000e_phy_reg_check_cap(E1000ECore *core, uint32_t addr, + char cap, uint8_t *page) +{ + *page = + (e1000e_phy_regcap[0][addr] & PHY_ANYPAGE) ? 0 + : core->phy[0][PHY_PAGE]; + + if (*page >= E1000E_PHY_PAGES) { + return false; + } + + return e1000e_phy_regcap[*page][addr] & cap; +} + +static void +e1000e_phy_reg_write(E1000ECore *core, uint8_t page, + uint32_t addr, uint16_t data) +{ + assert(page < E1000E_PHY_PAGES); + assert(addr < E1000E_PHY_PAGE_SIZE); + + if (e1000e_phyreg_writeops[page][addr]) { + e1000e_phyreg_writeops[page][addr](core, addr, data); + } else { + core->phy[page][addr] = data; + } +} + +static void +e1000e_set_mdic(E1000ECore *core, int index, uint32_t val) +{ + uint32_t data = val & E1000_MDIC_DATA_MASK; + uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + uint8_t page; + + if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) { /* phy # */ + val = core->mac[MDIC] | E1000_MDIC_ERROR; + } else if (val & E1000_MDIC_OP_READ) { + if (!e1000e_phy_reg_check_cap(core, addr, PHY_R, &page)) { + trace_e1000e_core_mdic_read_unhandled(page, addr); + val |= E1000_MDIC_ERROR; + } else { + val = (val ^ data) | core->phy[page][addr]; + trace_e1000e_core_mdic_read(page, addr, val); + } + } else if (val & E1000_MDIC_OP_WRITE) { + if (!e1000e_phy_reg_check_cap(core, addr, PHY_W, &page)) { + trace_e1000e_core_mdic_write_unhandled(page, addr); + val |= E1000_MDIC_ERROR; + } else { + trace_e1000e_core_mdic_write(page, addr, data); + e1000e_phy_reg_write(core, page, addr, data); + } + } + core->mac[MDIC] = val | E1000_MDIC_READY; + + if (val & E1000_MDIC_INT_EN) { + e1000e_set_interrupt_cause(core, E1000_ICR_MDAC); + } +} + +static void +e1000e_set_rdt(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xffff; + trace_e1000e_rx_set_rdt(e1000e_mq_queue_idx(RDT0, index), val); + e1000e_start_recv(core); +} + +static void +e1000e_set_status(E1000ECore *core, int index, uint32_t val) +{ + if ((val & E1000_STATUS_PHYRA) == 0) { + core->mac[index] &= ~E1000_STATUS_PHYRA; + } +} + +static void +e1000e_set_ctrlext(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), + !!(val & E1000_CTRL_EXT_SPD_BYPS)); + + /* Zero self-clearing bits */ + val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST); + core->mac[CTRL_EXT] = val; +} + +static void +e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val) +{ + int i; + + core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK; + + if (msix_enabled(core->owner)) { + return; + } + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + if (core->mac[PBACLR] & BIT(i)) { + msix_clr_pending(core->owner, i); + } + } +} + +static void +e1000e_set_fcrth(E1000ECore *core, int index, uint32_t val) +{ + core->mac[FCRTH] = val & 0xFFF8; +} + +static void +e1000e_set_fcrtl(E1000ECore *core, int index, uint32_t val) +{ + core->mac[FCRTL] = val & 0x8000FFF8; +} + +static inline void +e1000e_set_16bit(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xffff; +} + +static void +e1000e_set_12bit(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xfff; +} + +static void +e1000e_set_vet(E1000ECore *core, int index, uint32_t val) +{ + core->mac[VET] = val & 0xffff; + core->vet = le16_to_cpu(core->mac[VET]); + trace_e1000e_vlan_vet(core->vet); +} + +static void +e1000e_set_dlen(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & E1000_XDLEN_MASK; +} + +static void +e1000e_set_dbal(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & E1000_XDBAL_MASK; +} + +static void +e1000e_set_tctl(E1000ECore *core, int index, uint32_t val) +{ + E1000E_TxRing txr; + core->mac[index] = val; + + if (core->mac[TARC0] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, 0); + e1000e_start_xmit(core, &txr); + } + + if (core->mac[TARC1] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, 1); + e1000e_start_xmit(core, &txr); + } +} + +static void +e1000e_set_tdt(E1000ECore *core, int index, uint32_t val) +{ + E1000E_TxRing txr; + int qidx = e1000e_mq_queue_idx(TDT, index); + uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1; + + core->mac[index] = val & 0xffff; + + if (core->mac[tarc_reg] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, qidx); + e1000e_start_xmit(core, &txr); + } +} + +static void +e1000e_set_ics(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_irq_write_ics(val); + e1000e_set_interrupt_cause(core, val); +} + +static void +e1000e_set_icr(E1000ECore *core, int index, uint32_t val) +{ + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { + trace_e1000e_irq_icr_process_iame(); + e1000e_clear_ims_bits(core, core->mac[IAM]); + } + + trace_e1000e_irq_icr_write(val, core->mac[ICR], core->mac[ICR] & ~val); + core->mac[ICR] &= ~val; + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_imc(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_irq_ims_clear_set_imc(val); + e1000e_clear_ims_bits(core, val); + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_ims(E1000ECore *core, int index, uint32_t val) +{ + static const uint32_t ims_ext_mask = + E1000_IMS_RXQ0 | E1000_IMS_RXQ1 | + E1000_IMS_TXQ0 | E1000_IMS_TXQ1 | + E1000_IMS_OTHER; + + static const uint32_t ims_valid_mask = + E1000_IMS_TXDW | E1000_IMS_TXQE | E1000_IMS_LSC | + E1000_IMS_RXDMT0 | E1000_IMS_RXO | E1000_IMS_RXT0 | + E1000_IMS_MDAC | E1000_IMS_TXD_LOW | E1000_IMS_SRPD | + E1000_IMS_ACK | E1000_IMS_MNG | E1000_IMS_RXQ0 | + E1000_IMS_RXQ1 | E1000_IMS_TXQ0 | E1000_IMS_TXQ1 | + E1000_IMS_OTHER; + + uint32_t valid_val = val & ims_valid_mask; + + trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val); + core->mac[IMS] |= valid_val; + + if ((valid_val & ims_ext_mask) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) && + msix_enabled(core->owner)) { + e1000e_msix_clear(core, valid_val); + } + + if ((valid_val == ims_valid_mask) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA)) { + trace_e1000e_irq_fire_all_timers(val); + e1000e_intrmgr_fire_all_timers(core); + } + + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_rdtr(E1000ECore *core, int index, uint32_t val) +{ + e1000e_set_16bit(core, index, val); + + if ((val & E1000_RDTR_FPD) && (core->rdtr.running)) { + trace_e1000e_irq_rdtr_fpd_running(); + e1000e_intrmgr_fire_delayed_interrupts(core); + } else { + trace_e1000e_irq_rdtr_fpd_not_running(); + } +} + +static void +e1000e_set_tidv(E1000ECore *core, int index, uint32_t val) +{ + e1000e_set_16bit(core, index, val); + + if ((val & E1000_TIDV_FPD) && (core->tidv.running)) { + trace_e1000e_irq_tidv_fpd_running(); + e1000e_intrmgr_fire_delayed_interrupts(core); + } else { + trace_e1000e_irq_tidv_fpd_not_running(); + } +} + +static uint32_t +e1000e_mac_readreg(E1000ECore *core, int index) +{ + return core->mac[index]; +} + +static uint32_t +e1000e_mac_ics_read(E1000ECore *core, int index) +{ + trace_e1000e_irq_read_ics(core->mac[ICS]); + return core->mac[ICS]; +} + +static uint32_t +e1000e_mac_ims_read(E1000ECore *core, int index) +{ + trace_e1000e_irq_read_ims(core->mac[IMS]); + return core->mac[IMS]; +} + +#define E1000E_LOW_BITS_READ_FUNC(num) \ + static uint32_t \ + e1000e_mac_low##num##_read(E1000ECore *core, int index) \ + { \ + return core->mac[index] & (BIT(num) - 1); \ + } \ + +#define E1000E_LOW_BITS_READ(num) \ + e1000e_mac_low##num##_read + +E1000E_LOW_BITS_READ_FUNC(4); +E1000E_LOW_BITS_READ_FUNC(6); +E1000E_LOW_BITS_READ_FUNC(11); +E1000E_LOW_BITS_READ_FUNC(13); +E1000E_LOW_BITS_READ_FUNC(16); + +static uint32_t +e1000e_mac_swsm_read(E1000ECore *core, int index) +{ + uint32_t val = core->mac[SWSM]; + core->mac[SWSM] = val | 1; + return val; +} + +static uint32_t +e1000e_mac_itr_read(E1000ECore *core, int index) +{ + return core->itr_guest_value; +} + +static uint32_t +e1000e_mac_eitr_read(E1000ECore *core, int index) +{ + return core->eitr_guest_value[index - EITR]; +} + +static uint32_t +e1000e_mac_icr_read(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[ICR]; + trace_e1000e_irq_icr_read_entry(ret); + + if (core->mac[IMS] == 0) { + trace_e1000e_irq_icr_clear_zero_ims(); + core->mac[ICR] = 0; + } + + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { + trace_e1000e_irq_icr_clear_iame(); + core->mac[ICR] = 0; + trace_e1000e_irq_icr_process_iame(); + e1000e_clear_ims_bits(core, core->mac[IAM]); + } + + trace_e1000e_irq_icr_read_exit(core->mac[ICR]); + e1000e_update_interrupt_state(core); + return ret; +} + +static uint32_t +e1000e_mac_read_clr4(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[index]; + + core->mac[index] = 0; + return ret; +} + +static uint32_t +e1000e_mac_read_clr8(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[index]; + + core->mac[index] = 0; + core->mac[index - 1] = 0; + return ret; +} + +static uint32_t +e1000e_get_ctrl(E1000ECore *core, int index) +{ + uint32_t val = core->mac[CTRL]; + + trace_e1000e_link_read_params( + !!(val & E1000_CTRL_ASDE), + (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT, + !!(val & E1000_CTRL_FRCSPD), + !!(val & E1000_CTRL_FRCDPX), + !!(val & E1000_CTRL_RFCE), + !!(val & E1000_CTRL_TFCE)); + + return val; +} + +static uint32_t +e1000e_get_status(E1000ECore *core, int index) +{ + uint32_t res = core->mac[STATUS]; + + if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) { + res |= E1000_STATUS_GIO_MASTER_ENABLE; + } + + if (core->mac[CTRL] & E1000_CTRL_FRCDPX) { + res |= (core->mac[CTRL] & E1000_CTRL_FD) ? E1000_STATUS_FD : 0; + } else { + res |= E1000_STATUS_FD; + } + + if ((core->mac[CTRL] & E1000_CTRL_FRCSPD) || + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_SPD_BYPS)) { + switch (core->mac[CTRL] & E1000_CTRL_SPD_SEL) { + case E1000_CTRL_SPD_10: + res |= E1000_STATUS_SPEED_10; + break; + case E1000_CTRL_SPD_100: + res |= E1000_STATUS_SPEED_100; + break; + case E1000_CTRL_SPD_1000: + default: + res |= E1000_STATUS_SPEED_1000; + break; + } + } else { + res |= E1000_STATUS_SPEED_1000; + } + + trace_e1000e_link_status( + !!(res & E1000_STATUS_LU), + !!(res & E1000_STATUS_FD), + (res & E1000_STATUS_SPEED_MASK) >> E1000_STATUS_SPEED_SHIFT, + (res & E1000_STATUS_ASDV) >> E1000_STATUS_ASDV_SHIFT); + + return res; +} + +static uint32_t +e1000e_get_tarc(E1000ECore *core, int index) +{ + return core->mac[index] & ((BIT(11) - 1) | + BIT(27) | + BIT(28) | + BIT(29) | + BIT(30)); +} + +static void +e1000e_mac_writereg(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val; +} + +static void +e1000e_mac_setmacaddr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t macaddr[2]; + + core->mac[index] = val; + + macaddr[0] = cpu_to_le32(core->mac[RA]); + macaddr[1] = cpu_to_le32(core->mac[RA + 1]); + qemu_format_nic_info_str(qemu_get_queue(core->owner_nic), + (uint8_t *) macaddr); + + trace_e1000e_mac_set_sw(MAC_ARG(macaddr)); +} + +static void +e1000e_set_eecd(E1000ECore *core, int index, uint32_t val) +{ + static const uint32_t ro_bits = E1000_EECD_PRES | + E1000_EECD_AUTO_RD | + E1000_EECD_SIZE_EX_MASK; + + core->mac[EECD] = (core->mac[EECD] & ro_bits) | (val & ~ro_bits); +} + +static void +e1000e_set_eerd(E1000ECore *core, int index, uint32_t val) +{ + uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK; + uint32_t flags = 0; + uint32_t data = 0; + + if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) { + data = core->eeprom[addr]; + flags = E1000_EERW_DONE; + } + + core->mac[EERD] = flags | + (addr << E1000_EERW_ADDR_SHIFT) | + (data << E1000_EERW_DATA_SHIFT); +} + +static void +e1000e_set_eewr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK; + uint32_t data = (val >> E1000_EERW_DATA_SHIFT) & E1000_EERW_DATA_MASK; + uint32_t flags = 0; + + if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) { + core->eeprom[addr] = data; + flags = E1000_EERW_DONE; + } + + core->mac[EERD] = flags | + (addr << E1000_EERW_ADDR_SHIFT) | + (data << E1000_EERW_DATA_SHIFT); +} + +static void +e1000e_set_rxdctl(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RXDCTL] = core->mac[RXDCTL1] = val; +} + +static void +e1000e_set_itr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t interval = val & 0xffff; + + trace_e1000e_irq_itr_set(val); + + core->itr_guest_value = interval; + core->mac[index] = MAX(interval, E1000E_MIN_XITR); +} + +static void +e1000e_set_eitr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t interval = val & 0xffff; + uint32_t eitr_num = index - EITR; + + trace_e1000e_irq_eitr_set(eitr_num, val); + + core->eitr_guest_value[eitr_num] = interval; + core->mac[index] = MAX(interval, E1000E_MIN_XITR); +} + +static void +e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val) +{ + if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) { + hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero"); + } + + if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) { + hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero"); + } + + core->mac[PSRCTL] = val; +} + +static void +e1000e_update_rx_offloads(E1000ECore *core) +{ + int cso_state = e1000e_rx_l4_cso_enabled(core); + + trace_e1000e_rx_set_cso(cso_state); + + if (core->has_vnet) { + qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, + cso_state, 0, 0, 0, 0); + } +} + +static void +e1000e_set_rxcsum(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RXCSUM] = val; + e1000e_update_rx_offloads(core); +} + +static void +e1000e_set_gcr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t ro_bits = core->mac[GCR] & E1000_GCR_RO_BITS; + core->mac[GCR] = (val & ~E1000_GCR_RO_BITS) | ro_bits; +} + +#define e1000e_getreg(x) [x] = e1000e_mac_readreg +static uint32_t (*e1000e_macreg_readops[])(E1000ECore *, int) = { + e1000e_getreg(PBA), + e1000e_getreg(WUFC), + e1000e_getreg(MANC), + e1000e_getreg(TOTL), + e1000e_getreg(RDT0), + e1000e_getreg(RDBAH0), + e1000e_getreg(TDBAL1), + e1000e_getreg(RDLEN0), + e1000e_getreg(RDH1), + e1000e_getreg(LATECOL), + e1000e_getreg(SEC), + e1000e_getreg(XONTXC), + e1000e_getreg(WUS), + e1000e_getreg(GORCL), + e1000e_getreg(MGTPRC), + e1000e_getreg(EERD), + e1000e_getreg(EIAC), + e1000e_getreg(PSRCTL), + e1000e_getreg(MANC2H), + e1000e_getreg(RXCSUM), + e1000e_getreg(GSCL_3), + e1000e_getreg(GSCN_2), + e1000e_getreg(RSRPD), + e1000e_getreg(RDBAL1), + e1000e_getreg(FCAH), + e1000e_getreg(FCRTH), + e1000e_getreg(FLOP), + e1000e_getreg(FLASHT), + e1000e_getreg(RXSTMPH), + e1000e_getreg(TXSTMPL), + e1000e_getreg(TIMADJL), + e1000e_getreg(TXDCTL), + e1000e_getreg(RDH0), + e1000e_getreg(TDT1), + e1000e_getreg(TNCRS), + e1000e_getreg(RJC), + e1000e_getreg(IAM), + e1000e_getreg(GSCL_2), + e1000e_getreg(RDBAH1), + e1000e_getreg(FLSWDATA), + e1000e_getreg(RXSATRH), + e1000e_getreg(TIPG), + e1000e_getreg(FLMNGCTL), + e1000e_getreg(FLMNGCNT), + e1000e_getreg(TSYNCTXCTL), + e1000e_getreg(EXTCNF_SIZE), + e1000e_getreg(EXTCNF_CTRL), + e1000e_getreg(EEMNGDATA), + e1000e_getreg(CTRL_EXT), + e1000e_getreg(SYSTIMH), + e1000e_getreg(EEMNGCTL), + e1000e_getreg(FLMNGDATA), + e1000e_getreg(TSYNCRXCTL), + e1000e_getreg(TDH), + e1000e_getreg(LEDCTL), + e1000e_getreg(STATUS), + e1000e_getreg(TCTL), + e1000e_getreg(TDBAL), + e1000e_getreg(TDLEN), + e1000e_getreg(TDH1), + e1000e_getreg(RADV), + e1000e_getreg(ECOL), + e1000e_getreg(DC), + e1000e_getreg(RLEC), + e1000e_getreg(XOFFTXC), + e1000e_getreg(RFC), + e1000e_getreg(RNBC), + e1000e_getreg(MGTPTC), + e1000e_getreg(TIMINCA), + e1000e_getreg(RXCFGL), + e1000e_getreg(MFUTP01), + e1000e_getreg(FACTPS), + e1000e_getreg(GSCL_1), + e1000e_getreg(GSCN_0), + e1000e_getreg(GCR2), + e1000e_getreg(RDT1), + e1000e_getreg(PBACLR), + e1000e_getreg(FCTTV), + e1000e_getreg(EEWR), + e1000e_getreg(FLSWCTL), + e1000e_getreg(RXDCTL1), + e1000e_getreg(RXSATRL), + e1000e_getreg(SYSTIML), + e1000e_getreg(RXUDP), + e1000e_getreg(TORL), + e1000e_getreg(TDLEN1), + e1000e_getreg(MCC), + e1000e_getreg(WUC), + e1000e_getreg(EECD), + e1000e_getreg(MFUTP23), + e1000e_getreg(RAID), + e1000e_getreg(FCRTV), + e1000e_getreg(TXDCTL1), + e1000e_getreg(RCTL), + e1000e_getreg(TDT), + e1000e_getreg(MDIC), + e1000e_getreg(FCRUC), + e1000e_getreg(VET), + e1000e_getreg(RDBAL0), + e1000e_getreg(TDBAH1), + e1000e_getreg(RDTR), + e1000e_getreg(SCC), + e1000e_getreg(COLC), + e1000e_getreg(CEXTERR), + e1000e_getreg(XOFFRXC), + e1000e_getreg(IPAV), + e1000e_getreg(GOTCL), + e1000e_getreg(MGTPDC), + e1000e_getreg(GCR), + e1000e_getreg(IVAR), + e1000e_getreg(POEMB), + e1000e_getreg(MFVAL), + e1000e_getreg(FUNCTAG), + e1000e_getreg(GSCL_4), + e1000e_getreg(GSCN_3), + e1000e_getreg(MRQC), + e1000e_getreg(RDLEN1), + e1000e_getreg(FCT), + e1000e_getreg(FLA), + e1000e_getreg(FLOL), + e1000e_getreg(RXDCTL), + e1000e_getreg(RXSTMPL), + e1000e_getreg(TXSTMPH), + e1000e_getreg(TIMADJH), + e1000e_getreg(FCRTL), + e1000e_getreg(TDBAH), + e1000e_getreg(TADV), + e1000e_getreg(XONRXC), + e1000e_getreg(TSCTFC), + e1000e_getreg(RFCTL), + e1000e_getreg(GSCN_1), + e1000e_getreg(FCAL), + e1000e_getreg(FLSWCNT), + + [TOTH] = e1000e_mac_read_clr8, + [GOTCH] = e1000e_mac_read_clr8, + [PRC64] = e1000e_mac_read_clr4, + [PRC255] = e1000e_mac_read_clr4, + [PRC1023] = e1000e_mac_read_clr4, + [PTC64] = e1000e_mac_read_clr4, + [PTC255] = e1000e_mac_read_clr4, + [PTC1023] = e1000e_mac_read_clr4, + [GPRC] = e1000e_mac_read_clr4, + [TPT] = e1000e_mac_read_clr4, + [RUC] = e1000e_mac_read_clr4, + [BPRC] = e1000e_mac_read_clr4, + [MPTC] = e1000e_mac_read_clr4, + [IAC] = e1000e_mac_read_clr4, + [ICR] = e1000e_mac_icr_read, + [RDFH] = E1000E_LOW_BITS_READ(13), + [RDFHS] = E1000E_LOW_BITS_READ(13), + [RDFPC] = E1000E_LOW_BITS_READ(13), + [TDFH] = E1000E_LOW_BITS_READ(13), + [TDFHS] = E1000E_LOW_BITS_READ(13), + [STATUS] = e1000e_get_status, + [TARC0] = e1000e_get_tarc, + [PBS] = E1000E_LOW_BITS_READ(6), + [ICS] = e1000e_mac_ics_read, + [AIT] = E1000E_LOW_BITS_READ(16), + [TORH] = e1000e_mac_read_clr8, + [GORCH] = e1000e_mac_read_clr8, + [PRC127] = e1000e_mac_read_clr4, + [PRC511] = e1000e_mac_read_clr4, + [PRC1522] = e1000e_mac_read_clr4, + [PTC127] = e1000e_mac_read_clr4, + [PTC511] = e1000e_mac_read_clr4, + [PTC1522] = e1000e_mac_read_clr4, + [GPTC] = e1000e_mac_read_clr4, + [TPR] = e1000e_mac_read_clr4, + [ROC] = e1000e_mac_read_clr4, + [MPRC] = e1000e_mac_read_clr4, + [BPTC] = e1000e_mac_read_clr4, + [TSCTC] = e1000e_mac_read_clr4, + [ITR] = e1000e_mac_itr_read, + [RDFT] = E1000E_LOW_BITS_READ(13), + [RDFTS] = E1000E_LOW_BITS_READ(13), + [TDFPC] = E1000E_LOW_BITS_READ(13), + [TDFT] = E1000E_LOW_BITS_READ(13), + [TDFTS] = E1000E_LOW_BITS_READ(13), + [CTRL] = e1000e_get_ctrl, + [TARC1] = e1000e_get_tarc, + [SWSM] = e1000e_mac_swsm_read, + [IMS] = e1000e_mac_ims_read, + + [CRCERRS ... MPC] = e1000e_mac_readreg, + [IP6AT ... IP6AT + 3] = e1000e_mac_readreg, + [IP4AT ... IP4AT + 6] = e1000e_mac_readreg, + [RA ... RA + 31] = e1000e_mac_readreg, + [WUPM ... WUPM + 31] = e1000e_mac_readreg, + [MTA ... MTA + 127] = e1000e_mac_readreg, + [VFTA ... VFTA + 127] = e1000e_mac_readreg, + [FFMT ... FFMT + 254] = E1000E_LOW_BITS_READ(4), + [FFVT ... FFVT + 254] = e1000e_mac_readreg, + [MDEF ... MDEF + 7] = e1000e_mac_readreg, + [FFLT ... FFLT + 10] = E1000E_LOW_BITS_READ(11), + [FTFT ... FTFT + 254] = e1000e_mac_readreg, + [PBM ... PBM + 10239] = e1000e_mac_readreg, + [RETA ... RETA + 31] = e1000e_mac_readreg, + [RSSRK ... RSSRK + 31] = e1000e_mac_readreg, + [MAVTV0 ... MAVTV3] = e1000e_mac_readreg, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_mac_eitr_read +}; +enum { E1000E_NREADOPS = ARRAY_SIZE(e1000e_macreg_readops) }; + +#define e1000e_putreg(x) [x] = e1000e_mac_writereg +static void (*e1000e_macreg_writeops[])(E1000ECore *, int, uint32_t) = { + e1000e_putreg(PBA), + e1000e_putreg(SWSM), + e1000e_putreg(WUFC), + e1000e_putreg(RDBAH1), + e1000e_putreg(TDBAH), + e1000e_putreg(TXDCTL), + e1000e_putreg(RDBAH0), + e1000e_putreg(LEDCTL), + e1000e_putreg(FCAL), + e1000e_putreg(FCRUC), + e1000e_putreg(AIT), + e1000e_putreg(TDFH), + e1000e_putreg(TDFT), + e1000e_putreg(TDFHS), + e1000e_putreg(TDFTS), + e1000e_putreg(TDFPC), + e1000e_putreg(WUC), + e1000e_putreg(WUS), + e1000e_putreg(RDFH), + e1000e_putreg(RDFT), + e1000e_putreg(RDFHS), + e1000e_putreg(RDFTS), + e1000e_putreg(RDFPC), + e1000e_putreg(IPAV), + e1000e_putreg(TDBAH1), + e1000e_putreg(TIMINCA), + e1000e_putreg(IAM), + e1000e_putreg(EIAC), + e1000e_putreg(IVAR), + e1000e_putreg(TARC0), + e1000e_putreg(TARC1), + e1000e_putreg(FLSWDATA), + e1000e_putreg(POEMB), + e1000e_putreg(PBS), + e1000e_putreg(MFUTP01), + e1000e_putreg(MFUTP23), + e1000e_putreg(MANC), + e1000e_putreg(MANC2H), + e1000e_putreg(MFVAL), + e1000e_putreg(EXTCNF_CTRL), + e1000e_putreg(FACTPS), + e1000e_putreg(FUNCTAG), + e1000e_putreg(GSCL_1), + e1000e_putreg(GSCL_2), + e1000e_putreg(GSCL_3), + e1000e_putreg(GSCL_4), + e1000e_putreg(GSCN_0), + e1000e_putreg(GSCN_1), + e1000e_putreg(GSCN_2), + e1000e_putreg(GSCN_3), + e1000e_putreg(GCR2), + e1000e_putreg(MRQC), + e1000e_putreg(FLOP), + e1000e_putreg(FLOL), + e1000e_putreg(FLSWCTL), + e1000e_putreg(FLSWCNT), + e1000e_putreg(FLA), + e1000e_putreg(RXDCTL1), + e1000e_putreg(TXDCTL1), + e1000e_putreg(TIPG), + e1000e_putreg(RXSTMPH), + e1000e_putreg(RXSTMPL), + e1000e_putreg(RXSATRL), + e1000e_putreg(RXSATRH), + e1000e_putreg(TXSTMPL), + e1000e_putreg(TXSTMPH), + e1000e_putreg(SYSTIML), + e1000e_putreg(SYSTIMH), + e1000e_putreg(TIMADJL), + e1000e_putreg(TIMADJH), + e1000e_putreg(RXUDP), + e1000e_putreg(RXCFGL), + e1000e_putreg(TSYNCRXCTL), + e1000e_putreg(TSYNCTXCTL), + e1000e_putreg(FLSWDATA), + e1000e_putreg(EXTCNF_SIZE), + e1000e_putreg(EEMNGCTL), + e1000e_putreg(RA), + + [TDH1] = e1000e_set_16bit, + [TDT1] = e1000e_set_tdt, + [TCTL] = e1000e_set_tctl, + [TDT] = e1000e_set_tdt, + [MDIC] = e1000e_set_mdic, + [ICS] = e1000e_set_ics, + [TDH] = e1000e_set_16bit, + [RDH0] = e1000e_set_16bit, + [RDT0] = e1000e_set_rdt, + [IMC] = e1000e_set_imc, + [IMS] = e1000e_set_ims, + [ICR] = e1000e_set_icr, + [EECD] = e1000e_set_eecd, + [RCTL] = e1000e_set_rx_control, + [CTRL] = e1000e_set_ctrl, + [RDTR] = e1000e_set_rdtr, + [RADV] = e1000e_set_16bit, + [TADV] = e1000e_set_16bit, + [ITR] = e1000e_set_itr, + [EERD] = e1000e_set_eerd, + [GCR] = e1000e_set_gcr, + [PSRCTL] = e1000e_set_psrctl, + [RXCSUM] = e1000e_set_rxcsum, + [RAID] = e1000e_set_16bit, + [RSRPD] = e1000e_set_12bit, + [TIDV] = e1000e_set_tidv, + [TDLEN1] = e1000e_set_dlen, + [TDLEN] = e1000e_set_dlen, + [RDLEN0] = e1000e_set_dlen, + [RDLEN1] = e1000e_set_dlen, + [TDBAL] = e1000e_set_dbal, + [TDBAL1] = e1000e_set_dbal, + [RDBAL0] = e1000e_set_dbal, + [RDBAL1] = e1000e_set_dbal, + [RDH1] = e1000e_set_16bit, + [RDT1] = e1000e_set_rdt, + [STATUS] = e1000e_set_status, + [PBACLR] = e1000e_set_pbaclr, + [CTRL_EXT] = e1000e_set_ctrlext, + [FCAH] = e1000e_set_16bit, + [FCT] = e1000e_set_16bit, + [FCTTV] = e1000e_set_16bit, + [FCRTV] = e1000e_set_16bit, + [FCRTH] = e1000e_set_fcrth, + [FCRTL] = e1000e_set_fcrtl, + [VET] = e1000e_set_vet, + [RXDCTL] = e1000e_set_rxdctl, + [FLASHT] = e1000e_set_16bit, + [EEWR] = e1000e_set_eewr, + [CTRL_DUP] = e1000e_set_ctrl, + [RFCTL] = e1000e_set_rfctl, + [RA + 1] = e1000e_mac_setmacaddr, + + [IP6AT ... IP6AT + 3] = e1000e_mac_writereg, + [IP4AT ... IP4AT + 6] = e1000e_mac_writereg, + [RA + 2 ... RA + 31] = e1000e_mac_writereg, + [WUPM ... WUPM + 31] = e1000e_mac_writereg, + [MTA ... MTA + 127] = e1000e_mac_writereg, + [VFTA ... VFTA + 127] = e1000e_mac_writereg, + [FFMT ... FFMT + 254] = e1000e_mac_writereg, + [FFVT ... FFVT + 254] = e1000e_mac_writereg, + [PBM ... PBM + 10239] = e1000e_mac_writereg, + [MDEF ... MDEF + 7] = e1000e_mac_writereg, + [FFLT ... FFLT + 10] = e1000e_mac_writereg, + [FTFT ... FTFT + 254] = e1000e_mac_writereg, + [RETA ... RETA + 31] = e1000e_mac_writereg, + [RSSRK ... RSSRK + 31] = e1000e_mac_writereg, + [MAVTV0 ... MAVTV3] = e1000e_mac_writereg, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_set_eitr +}; +enum { E1000E_NWRITEOPS = ARRAY_SIZE(e1000e_macreg_writeops) }; + +enum { MAC_ACCESS_PARTIAL = 1 }; + +/* The array below combines alias offsets of the index values for the + * MAC registers that have aliases, with the indication of not fully + * implemented registers (lowest bit). This combination is possible + * because all of the offsets are even. */ +static const uint16_t mac_reg_access[E1000E_MAC_SIZE] = { + /* Alias index offsets */ + [FCRTL_A] = 0x07fe, [FCRTH_A] = 0x0802, + [RDH0_A] = 0x09bc, [RDT0_A] = 0x09bc, [RDTR_A] = 0x09c6, + [RDFH_A] = 0xe904, [RDFT_A] = 0xe904, + [TDH_A] = 0x0cf8, [TDT_A] = 0x0cf8, [TIDV_A] = 0x0cf8, + [TDFH_A] = 0xed00, [TDFT_A] = 0xed00, + [RA_A ... RA_A + 31] = 0x14f0, + [VFTA_A ... VFTA_A + 127] = 0x1400, + [RDBAL0_A ... RDLEN0_A] = 0x09bc, + [TDBAL_A ... TDLEN_A] = 0x0cf8, + /* Access options */ + [RDFH] = MAC_ACCESS_PARTIAL, [RDFT] = MAC_ACCESS_PARTIAL, + [RDFHS] = MAC_ACCESS_PARTIAL, [RDFTS] = MAC_ACCESS_PARTIAL, + [RDFPC] = MAC_ACCESS_PARTIAL, + [TDFH] = MAC_ACCESS_PARTIAL, [TDFT] = MAC_ACCESS_PARTIAL, + [TDFHS] = MAC_ACCESS_PARTIAL, [TDFTS] = MAC_ACCESS_PARTIAL, + [TDFPC] = MAC_ACCESS_PARTIAL, [EECD] = MAC_ACCESS_PARTIAL, + [PBM] = MAC_ACCESS_PARTIAL, [FLA] = MAC_ACCESS_PARTIAL, + [FCAL] = MAC_ACCESS_PARTIAL, [FCAH] = MAC_ACCESS_PARTIAL, + [FCT] = MAC_ACCESS_PARTIAL, [FCTTV] = MAC_ACCESS_PARTIAL, + [FCRTV] = MAC_ACCESS_PARTIAL, [FCRTL] = MAC_ACCESS_PARTIAL, + [FCRTH] = MAC_ACCESS_PARTIAL, [TXDCTL] = MAC_ACCESS_PARTIAL, + [TXDCTL1] = MAC_ACCESS_PARTIAL, + [MAVTV0 ... MAVTV3] = MAC_ACCESS_PARTIAL +}; + +void +e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size) +{ + uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr); + + if (index < E1000E_NWRITEOPS && e1000e_macreg_writeops[index]) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + trace_e1000e_wrn_regs_write_trivial(index << 2); + } + trace_e1000e_core_write(index << 2, size, val); + e1000e_macreg_writeops[index](core, index, val); + } else if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) { + trace_e1000e_wrn_regs_write_ro(index << 2, size, val); + } else { + trace_e1000e_wrn_regs_write_unknown(index << 2, size, val); + } +} + +uint64_t +e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size) +{ + uint64_t val; + uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr); + + if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + trace_e1000e_wrn_regs_read_trivial(index << 2); + } + val = e1000e_macreg_readops[index](core, index); + trace_e1000e_core_read(index << 2, size, val); + return val; + } else { + trace_e1000e_wrn_regs_read_unknown(index << 2, size); + } + return 0; +} + +static inline void +e1000e_autoneg_pause(E1000ECore *core) +{ + timer_del(core->autoneg_timer); +} + +static void +e1000e_autoneg_resume(E1000ECore *core) +{ + if (e1000e_have_autoneg(core) && + !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + qemu_get_queue(core->owner_nic)->link_down = false; + timer_mod(core->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static void +e1000e_vm_state_change(void *opaque, int running, RunState state) +{ + E1000ECore *core = opaque; + + if (running) { + trace_e1000e_vm_state_running(); + e1000e_intrmgr_resume(core); + e1000e_autoneg_resume(core); + } else { + trace_e1000e_vm_state_stopped(); + e1000e_autoneg_pause(core); + e1000e_intrmgr_pause(core); + } +} + +void +e1000e_core_pci_realize(E1000ECore *core, + const uint16_t *eeprom_templ, + uint32_t eeprom_size, + const uint8_t *macaddr) +{ + int i; + + core->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + e1000e_autoneg_timer, core); + e1000e_intrmgr_pci_realize(core); + + core->vmstate = + qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, + E1000E_MAX_TX_FRAGS, core->has_vnet); + } + + net_rx_pkt_init(&core->rx_pkt, core->has_vnet); + + e1000x_core_prepare_eeprom(core->eeprom, + eeprom_templ, + eeprom_size, + PCI_DEVICE_GET_CLASS(core->owner)->device_id, + macaddr); + e1000e_update_rx_offloads(core); +} + +void +e1000e_core_pci_uninit(E1000ECore *core) +{ + int i; + + timer_del(core->autoneg_timer); + timer_free(core->autoneg_timer); + + e1000e_intrmgr_pci_unint(core); + + qemu_del_vm_change_state_handler(core->vmstate); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_uninit(core->tx[i].tx_pkt); + } + + net_rx_pkt_uninit(core->rx_pkt); +} + +static const uint16_t +e1000e_phy_reg_init[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE] = { + [0] = { + [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB | + MII_CR_FULL_DUPLEX | + MII_CR_AUTO_NEG_EN, + + [PHY_STATUS] = MII_SR_EXTENDED_CAPS | + MII_SR_LINK_STATUS | + MII_SR_AUTONEG_CAPS | + MII_SR_PREAMBLE_SUPPRESS | + MII_SR_EXTENDED_STATUS | + MII_SR_10T_HD_CAPS | + MII_SR_10T_FD_CAPS | + MII_SR_100X_HD_CAPS | + MII_SR_100X_FD_CAPS, + + [PHY_ID1] = 0x141, + [PHY_ID2] = E1000_PHY_ID2_82574x, + [PHY_AUTONEG_ADV] = 0xde1, + [PHY_LP_ABILITY] = 0x7e0, + [PHY_AUTONEG_EXP] = BIT(2), + [PHY_NEXT_PAGE_TX] = BIT(0) | BIT(13), + [PHY_1000T_CTRL] = BIT(8) | BIT(9) | BIT(10) | BIT(11), + [PHY_1000T_STATUS] = 0x3c00, + [PHY_EXT_STATUS] = BIT(12) | BIT(13), + + [PHY_COPPER_CTRL1] = BIT(5) | BIT(6) | BIT(8) | BIT(9) | + BIT(12) | BIT(13), + [PHY_COPPER_STAT1] = BIT(3) | BIT(10) | BIT(11) | BIT(13) | BIT(15) + }, + [2] = { + [PHY_MAC_CTRL1] = BIT(3) | BIT(7), + [PHY_MAC_CTRL2] = BIT(1) | BIT(2) | BIT(6) | BIT(12) + }, + [3] = { + [PHY_LED_TIMER_CTRL] = BIT(0) | BIT(2) | BIT(14) + } +}; + +static const uint32_t e1000e_mac_reg_init[] = { + [PBA] = 0x00140014, + [LEDCTL] = BIT(1) | BIT(8) | BIT(9) | BIT(15) | BIT(17) | BIT(18), + [EXTCNF_CTRL] = BIT(3), + [EEMNGCTL] = BIT(31), + [FLASHT] = 0x2, + [FLSWCTL] = BIT(30) | BIT(31), + [FLOL] = BIT(0), + [RXDCTL] = BIT(16), + [RXDCTL1] = BIT(16), + [TIPG] = 0x8 | (0x8 << 10) | (0x6 << 20), + [RXCFGL] = 0x88F7, + [RXUDP] = 0x319, + [CTRL] = E1000_CTRL_FD | E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | + E1000_CTRL_SPD_1000 | E1000_CTRL_SLU | + E1000_CTRL_ADVD3WUC, + [STATUS] = E1000_STATUS_ASDV_1000 | E1000_STATUS_LU, + [PSRCTL] = (2 << E1000_PSRCTL_BSIZE0_SHIFT) | + (4 << E1000_PSRCTL_BSIZE1_SHIFT) | + (4 << E1000_PSRCTL_BSIZE2_SHIFT), + [TARC0] = 0x3 | E1000_TARC_ENABLE, + [TARC1] = 0x3 | E1000_TARC_ENABLE, + [EECD] = E1000_EECD_AUTO_RD | E1000_EECD_PRES, + [EERD] = E1000_EERW_DONE, + [EEWR] = E1000_EERW_DONE, + [GCR] = E1000_L0S_ADJUST | + E1000_L1_ENTRY_LATENCY_MSB | + E1000_L1_ENTRY_LATENCY_LSB, + [TDFH] = 0x600, + [TDFT] = 0x600, + [TDFHS] = 0x600, + [TDFTS] = 0x600, + [POEMB] = 0x30D, + [PBS] = 0x028, + [MANC] = E1000_MANC_DIS_IP_CHK_ARP, + [FACTPS] = E1000_FACTPS_LAN0_ON | 0x20000000, + [SWSM] = 1, + [RXCSUM] = E1000_RXCSUM_IPOFLD | E1000_RXCSUM_TUOFLD, + [ITR] = E1000E_MIN_XITR, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = E1000E_MIN_XITR, +}; + +void +e1000e_core_reset(E1000ECore *core) +{ + int i; + + timer_del(core->autoneg_timer); + + e1000e_intrmgr_reset(core); + + memset(core->phy, 0, sizeof core->phy); + memmove(core->phy, e1000e_phy_reg_init, sizeof e1000e_phy_reg_init); + memset(core->mac, 0, sizeof core->mac); + memmove(core->mac, e1000e_mac_reg_init, sizeof e1000e_mac_reg_init); + + core->rxbuf_min_shift = 1 + E1000_RING_DESC_LEN_SHIFT; + + if (qemu_get_queue(core->owner_nic)->link_down) { + e1000e_link_down(core); + } + + e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); + core->tx[i].skip_cp = false; + } +} + +void e1000e_core_pre_save(E1000ECore *core) +{ + int i; + NetClientState *nc = qemu_get_queue(core->owner_nic); + + /* + * If link is down and auto-negotiation is supported and ongoing, + * complete auto-negotiation immediately. This allows us to look + * at MII_SR_AUTONEG_COMPLETE to infer link status on load. + */ + if (nc->link_down && e1000e_have_autoneg(core)) { + core->phy[0][PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + e1000e_update_flowctl_status(core); + } + + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { + if (net_tx_pkt_has_fragments(core->tx[i].tx_pkt)) { + core->tx[i].skip_cp = true; + } + } +} + +int +e1000e_core_post_load(E1000ECore *core) +{ + NetClientState *nc = qemu_get_queue(core->owner_nic); + + /* nc.link_down can't be migrated, so infer link_down according + * to link status bit in core.mac[STATUS]. + */ + nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0; + + return 0; +} diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h new file mode 100644 index 0000000000..5f413a9e08 --- /dev/null +++ b/hw/net/e1000e_core.h @@ -0,0 +1,146 @@ +/* +* Core code for QEMU e1000e emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman <dmitry@daynix.com> +* Leonid Bloch <leonid@daynix.com> +* Yan Vugenfirer <yan@daynix.com> +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#define E1000E_PHY_PAGE_SIZE (0x20) +#define E1000E_PHY_PAGES (0x07) +#define E1000E_MAC_SIZE (0x8000) +#define E1000E_EEPROM_SIZE (64) +#define E1000E_MSIX_VEC_NUM (5) +#define E1000E_NUM_QUEUES (2) + +typedef struct E1000Core E1000ECore; + +enum { PHY_R = BIT(0), + PHY_W = BIT(1), + PHY_RW = PHY_R | PHY_W, + PHY_ANYPAGE = BIT(2) }; + +typedef struct E1000IntrDelayTimer_st { + QEMUTimer *timer; + bool running; + uint32_t delay_reg; + uint32_t delay_resolution_ns; + E1000ECore *core; +} E1000IntrDelayTimer; + +struct E1000Core { + uint32_t mac[E1000E_MAC_SIZE]; + uint16_t phy[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]; + uint16_t eeprom[E1000E_EEPROM_SIZE]; + + uint32_t rxbuf_sizes[E1000_PSRCTL_BUFFS_PER_DESC]; + uint32_t rx_desc_buf_size; + uint32_t rxbuf_min_shift; + uint8_t rx_desc_len; + + QEMUTimer *autoneg_timer; + + struct e1000e_tx { + e1000x_txd_props props; + + bool skip_cp; + struct NetTxPkt *tx_pkt; + } tx[E1000E_NUM_QUEUES]; + + struct NetRxPkt *rx_pkt; + + bool has_vnet; + int max_queue_num; + + /* Interrupt moderation management */ + uint32_t delayed_causes; + + E1000IntrDelayTimer radv; + E1000IntrDelayTimer rdtr; + E1000IntrDelayTimer raid; + + E1000IntrDelayTimer tadv; + E1000IntrDelayTimer tidv; + + E1000IntrDelayTimer itr; + bool itr_intr_pending; + + E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM]; + bool eitr_intr_pending[E1000E_MSIX_VEC_NUM]; + + VMChangeStateEntry *vmstate; + + uint32_t itr_guest_value; + uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM]; + + uint16_t vet; + + uint8_t permanent_mac[ETH_ALEN]; + + NICState *owner_nic; + PCIDevice *owner; + void (*owner_start_recv)(PCIDevice *d); +}; + +void +e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size); + +uint64_t +e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size); + +void +e1000e_core_pci_realize(E1000ECore *regs, + const uint16_t *eeprom_templ, + uint32_t eeprom_size, + const uint8_t *macaddr); + +void +e1000e_core_reset(E1000ECore *core); + +void +e1000e_core_pre_save(E1000ECore *core); + +int +e1000e_core_post_load(E1000ECore *core); + +void +e1000e_core_set_link_status(E1000ECore *core); + +void +e1000e_core_pci_uninit(E1000ECore *core); + +int +e1000e_can_receive(E1000ECore *core); + +ssize_t +e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size); + +ssize_t +e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt); |