diff options
author | Scott Feldman <sfeldma@gmail.com> | 2015-03-13 21:09:30 -0700 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2015-05-11 14:49:04 +0100 |
commit | dc488f888060afdc129e0cc8812cf50c4c083423 (patch) | |
tree | e0f534912a160080fa7cbb726894b6a02c1ff2b2 /hw/net/rocker/rocker_desc.c | |
parent | dc407ae8a75d03cf48e114d3812d077fa29a8bd9 (diff) |
rocker: add new rocker switch device
Rocker is a simulated ethernet switch device. The device supports up to 62
front-panel ports and supports L2 switching and L3 routing functions, as well
as L2/L3/L4 ACLs. The device presents a single PCI device for each switch,
with a memory-mapped register space for device driver access.
Rocker device is invoked with -device, for example a 4-port switch:
-device rocker,name=sw1,len-ports=4,ports[0]=dev0,ports[1]=dev1, \
ports[2]=dev2,ports[3]=dev3
Each port is a netdev and can be paired with using -netdev id=<port name>.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David Ahern <dsahern@gmail.com>
Message-id: 1426306173-24884-7-git-send-email-sfeldma@gmail.com
rocker: fix clang compiler errors
Consolidate all forward typedef declarations to rocker.h.
Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
rocker: add support for flow modification
We had support for flow add/del. This adds support for flow mod. I needed
this for L3 support where an existing route is modified using NLM_F_REPLACE.
For example:
ip route add 12.0.0.0/30 nexthop via 11.0.0.1 dev swp1
ip route change 12.0.0.0/30 nexthop via 11.0.0.9 dev swp2
The first cmd adds the route. The second cmd changes the existing route by
changing its nexthop info.
In the device, a mod operation results in the matching flow enty being modified
with the new settings. This is atomic to the device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/net/rocker/rocker_desc.c')
-rw-r--r-- | hw/net/rocker/rocker_desc.c | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/hw/net/rocker/rocker_desc.c b/hw/net/rocker/rocker_desc.c new file mode 100644 index 0000000000..9d896fe470 --- /dev/null +++ b/hw/net/rocker/rocker_desc.c @@ -0,0 +1,377 @@ +/* + * QEMU rocker switch emulation - Descriptor ring support + * + * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "net/net.h" +#include "hw/hw.h" +#include "hw/pci/pci.h" + +#include "rocker.h" +#include "rocker_hw.h" +#include "rocker_desc.h" + +struct desc_ring { + hwaddr base_addr; + uint32_t size; + uint32_t head; + uint32_t tail; + uint32_t ctrl; + uint32_t credits; + Rocker *r; + DescInfo *info; + int index; + desc_ring_consume *consume; + unsigned msix_vector; +}; + +struct desc_info { + DescRing *ring; + RockerDesc desc; + char *buf; + size_t buf_size; +}; + +uint16_t desc_buf_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.buf_size); +} + +uint16_t desc_tlv_size(DescInfo *info) +{ + return le16_to_cpu(info->desc.tlv_size); +} + +char *desc_get_buf(DescInfo *info, bool read_only) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + size_t size = read_only ? le16_to_cpu(info->desc.tlv_size) : + le16_to_cpu(info->desc.buf_size); + + if (size > info->buf_size) { + info->buf = g_realloc(info->buf, size); + info->buf_size = size; + } + + if (!info->buf) { + return NULL; + } + + if (pci_dma_read(dev, le64_to_cpu(info->desc.buf_addr), info->buf, size)) { + return NULL; + } + + return info->buf; +} + +int desc_set_buf(DescInfo *info, size_t tlv_size) +{ + PCIDevice *dev = PCI_DEVICE(info->ring->r); + + if (tlv_size > info->buf_size) { + DPRINTF("ERROR: trying to write more to desc buf than it " + "can hold buf_size %zu tlv_size %zu\n", + info->buf_size, tlv_size); + return -ROCKER_EMSGSIZE; + } + + info->desc.tlv_size = cpu_to_le16(tlv_size); + pci_dma_write(dev, le64_to_cpu(info->desc.buf_addr), info->buf, tlv_size); + + return ROCKER_OK; +} + +DescRing *desc_get_ring(DescInfo *info) +{ + return info->ring; +} + +int desc_ring_index(DescRing *ring) +{ + return ring->index; +} + +static bool desc_ring_empty(DescRing *ring) +{ + return ring->head == ring->tail; +} + +bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr) +{ + if (base_addr & 0x7) { + DPRINTF("ERROR: ring[%d] desc base addr (0x" TARGET_FMT_plx + ") not 8-byte aligned\n", ring->index, base_addr); + return false; + } + + ring->base_addr = base_addr; + + return true; +} + +uint64_t desc_ring_get_base_addr(DescRing *ring) +{ + return ring->base_addr; +} + +bool desc_ring_set_size(DescRing *ring, uint32_t size) +{ + int i; + + if (size < 2 || size > 0x10000 || (size & (size - 1))) { + DPRINTF("ERROR: ring[%d] size (%d) not a power of 2 " + "or in range [2, 64K]\n", ring->index, size); + return false; + } + + for (i = 0; i < ring->size; i++) { + if (ring->info[i].buf) { + g_free(ring->info[i].buf); + } + } + + ring->size = size; + ring->head = ring->tail = 0; + + ring->info = g_realloc(ring->info, size * sizeof(DescInfo)); + if (!ring->info) { + return false; + } + + memset(ring->info, 0, size * sizeof(DescInfo)); + + for (i = 0; i < size; i++) { + ring->info[i].ring = ring; + } + + return true; +} + +uint32_t desc_ring_get_size(DescRing *ring) +{ + return ring->size; +} + +static DescInfo *desc_read(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_read(dev, addr, &info->desc, sizeof(info->desc)); + + return info; +} + +static void desc_write(DescRing *ring, uint32_t index) +{ + PCIDevice *dev = PCI_DEVICE(ring->r); + DescInfo *info = &ring->info[index]; + hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index); + + pci_dma_write(dev, addr, &info->desc, sizeof(info->desc)); +} + +static bool desc_ring_base_addr_check(DescRing *ring) +{ + if (!ring->base_addr) { + DPRINTF("ERROR: ring[%d] not-initialized desc base address!\n", + ring->index); + return false; + } + return true; +} + +static DescInfo *__desc_ring_fetch_desc(DescRing *ring) +{ + return desc_read(ring, ring->tail); +} + +DescInfo *desc_ring_fetch_desc(DescRing *ring) +{ + if (desc_ring_empty(ring) || !desc_ring_base_addr_check(ring)) { + return NULL; + } + + return desc_read(ring, ring->tail); +} + +static bool __desc_ring_post_desc(DescRing *ring, int err) +{ + uint16_t comp_err = 0x8000 | (uint16_t)-err; + DescInfo *info = &ring->info[ring->tail]; + + info->desc.comp_err = cpu_to_le16(comp_err); + desc_write(ring, ring->tail); + ring->tail = (ring->tail + 1) % ring->size; + + /* return true if starting credit count */ + + return ring->credits++ == 0; +} + +bool desc_ring_post_desc(DescRing *ring, int err) +{ + if (desc_ring_empty(ring)) { + DPRINTF("ERROR: ring[%d] trying to post desc to empty ring\n", + ring->index); + return false; + } + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + return __desc_ring_post_desc(ring, err); +} + +static bool ring_pump(DescRing *ring) +{ + DescInfo *info; + bool primed = false; + int err; + + /* If the ring has a consumer, call consumer for each + * desc starting at tail and stopping when tail reaches + * head (the empty ring condition). + */ + + if (ring->consume) { + while (ring->head != ring->tail) { + info = __desc_ring_fetch_desc(ring); + err = ring->consume(ring->r, info); + if (__desc_ring_post_desc(ring, err)) { + primed = true; + } + } + } + + return primed; +} + +bool desc_ring_set_head(DescRing *ring, uint32_t new) +{ + uint32_t tail = ring->tail; + uint32_t head = ring->head; + + if (!desc_ring_base_addr_check(ring)) { + return false; + } + + if (new >= ring->size) { + DPRINTF("ERROR: trying to set head (%d) past ring[%d] size (%d)\n", + new, ring->index, ring->size); + return false; + } + + if (((head < tail) && ((new >= tail) || (new < head))) || + ((head > tail) && ((new >= tail) && (new < head)))) { + DPRINTF("ERROR: trying to wrap ring[%d] " + "(head %d, tail %d, new head %d)\n", + ring->index, head, tail, new); + return false; + } + + if (new == ring->head) { + DPRINTF("WARNING: setting head (%d) to current head position\n", new); + } + + ring->head = new; + + return ring_pump(ring); +} + +uint32_t desc_ring_get_head(DescRing *ring) +{ + return ring->head; +} + +uint32_t desc_ring_get_tail(DescRing *ring) +{ + return ring->tail; +} + +void desc_ring_set_ctrl(DescRing *ring, uint32_t val) +{ + if (val & ROCKER_DMA_DESC_CTRL_RESET) { + DPRINTF("ring[%d] resetting\n", ring->index); + desc_ring_reset(ring); + } +} + +bool desc_ring_ret_credits(DescRing *ring, uint32_t credits) +{ + if (credits > ring->credits) { + DPRINTF("ERROR: trying to return more credits (%d) " + "than are outstanding (%d)\n", credits, ring->credits); + ring->credits = 0; + return false; + } + + ring->credits -= credits; + + /* return true if credits are still outstanding */ + + return ring->credits > 0; +} + +uint32_t desc_ring_get_credits(DescRing *ring) +{ + return ring->credits; +} + +void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume, + unsigned vector) +{ + ring->consume = consume; + ring->msix_vector = vector; +} + +unsigned desc_ring_get_msix_vector(DescRing *ring) +{ + return ring->msix_vector; +} + +DescRing *desc_ring_alloc(Rocker *r, int index) +{ + DescRing *ring; + + ring = g_malloc0(sizeof(DescRing)); + if (!ring) { + return NULL; + } + + ring->r = r; + ring->index = index; + + return ring; +} + +void desc_ring_free(DescRing *ring) +{ + if (ring->info) { + g_free(ring->info); + } + g_free(ring); +} + +void desc_ring_reset(DescRing *ring) +{ + ring->base_addr = 0; + ring->size = 0; + ring->head = 0; + ring->tail = 0; + ring->ctrl = 0; + ring->credits = 0; +} |