aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-05-12 10:40:31 +0100
committerPeter Maydell <peter.maydell@linaro.org>2015-05-12 10:40:31 +0100
commit19fbe5084c1da6af95177c86e4cab64241d479a8 (patch)
tree876b9c0cb523ca69238a48f9e36b09506c9853b8
parent704eb1c09963149db4a3407d5ba173ba2a9244bb (diff)
parent7db161f6dd144760b2912026d992837ef80ca7e7 (diff)
Merge remote-tracking branch 'remotes/stefanha/tags/net-pull-request' into staging
# gpg: Signature made Mon May 11 16:25:58 2015 BST using RSA key ID 81AB73C8 # gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" # gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" * remotes/stefanha/tags/net-pull-request: rocker: timestamp on the debug logs helps correlate with events in the VM MAINTAINERS: add rocker rocker: add tests rocker: add new rocker switch device pci: add network device class 'other' for network switches pci: add rocker device ID rocker: add register programming guide virtio-net: use qemu_mac_strdup_printf net: add MAC address string printer Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS6
-rw-r--r--default-configs/pci.mak1
-rw-r--r--docs/specs/pci-ids.txt1
-rw-r--r--docs/specs/rocker.txt1009
-rw-r--r--hw/net/Makefile.objs4
-rw-r--r--hw/net/rocker/rocker.c1480
-rw-r--r--hw/net/rocker/rocker.h84
-rw-r--r--hw/net/rocker/rocker_desc.c377
-rw-r--r--hw/net/rocker/rocker_desc.h53
-rw-r--r--hw/net/rocker/rocker_fp.c234
-rw-r--r--hw/net/rocker/rocker_fp.h51
-rw-r--r--hw/net/rocker/rocker_hw.h491
-rw-r--r--hw/net/rocker/rocker_of_dpa.c2315
-rw-r--r--hw/net/rocker/rocker_of_dpa.h22
-rw-r--r--hw/net/rocker/rocker_tlv.h244
-rw-r--r--hw/net/rocker/rocker_world.c106
-rw-r--r--hw/net/rocker/rocker_world.h60
-rw-r--r--hw/net/virtio-net.c12
-rw-r--r--include/hw/pci/pci.h1
-rw-r--r--include/hw/pci/pci_ids.h1
-rw-r--r--include/net/net.h1
-rw-r--r--net/net.c7
-rw-r--r--tests/rocker/README5
-rwxr-xr-xtests/rocker/all19
-rwxr-xr-xtests/rocker/bridge48
-rwxr-xr-xtests/rocker/bridge-stp57
-rwxr-xr-xtests/rocker/bridge-vlan57
-rwxr-xr-xtests/rocker/bridge-vlan-stp69
-rwxr-xr-xtests/rocker/port22
-rw-r--r--tests/rocker/tut.dot8
30 files changed, 6836 insertions, 9 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d858c49566..b3552b2c19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -760,6 +760,12 @@ S: Maintained
F: hw/net/vmxnet*
F: hw/scsi/vmw_pvscsi*
+Rocker
+M: Scott Feldman <sfeldma@gmail.com>
+M: Jiri Pirko <jiri@resnulli.us>
+S: Maintained
+F: hw/net/rocker/
+
Subsystems
----------
Audio
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 58a2c0ace1..7e10903baa 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -36,3 +36,4 @@ CONFIG_EDU=y
CONFIG_VGA=y
CONFIG_VGA_PCI=y
CONFIG_IVSHMEM=$(CONFIG_KVM)
+CONFIG_ROCKER=y
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index c6732fe003..e4a44908cb 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -45,6 +45,7 @@ PCI devices (other than virtio):
1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
1b36:0005 PCI test device (docs/specs/pci-testdev.txt)
+1b36:0006 PCI Rocker Ethernet switch device
1b36:0007 PCI SD Card Host Controller Interface (SDHCI)
All these devices are documented in docs/specs.
diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt
new file mode 100644
index 0000000000..1e7e1e1859
--- /dev/null
+++ b/docs/specs/rocker.txt
@@ -0,0 +1,1009 @@
+Rocker Network Switch Register Programming Guide
+Copyright (c) Scott Feldman <sfeldma@gmail.com>
+Copyright (c) Neil Horman <nhorman@tuxdriver.com>
+Version 0.11, 12/29/2014
+
+LICENSE
+=======
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+SECTION 1: Introduction
+=======================
+
+Overview
+--------
+
+This document describes the hardware/software interface for the Rocker switch
+device. The intended audience is authors of OS drivers and device emulation
+software.
+
+Notations and Conventions
+-------------------------
+
+o In register descriptions, [n:m] indicates a range from bit n to bit m,
+inclusive.
+o Use of leading 0x indicates a hexadecimal number.
+o Use of leading 0b indicates a binary number.
+o The use of RSVD or Reserved indicates that a bit or field is reserved for
+future use.
+o Field width is in bytes, unless otherwise noted.
+o Register are (R) read-only, (R/W) read/write, (W) write-only, or (COR) clear
+on read
+o TLV values in network-byte-order are designated with (N).
+
+
+SECTION 2: PCI Configuration Registers
+======================================
+
+PCI Configuration Space
+-----------------------
+
+Each switch instance registers as a PCI device with PCI configuration space:
+
+ offset width description value
+ ---------------------------------------------
+ 0x0 2 Vendor ID 0x1b36
+ 0x2 2 Device ID 0x0006
+ 0x4 4 Command/Status
+ 0x8 1 Revision ID 0x01
+ 0x9 3 Class code 0x2800
+ 0xC 1 Cache line size
+ 0xD 1 Latency timer
+ 0xE 1 Header type
+ 0xF 1 Built-in self test
+ 0x10 4 Base address low
+ 0x14 4 Base address high
+ 0x18-28 Reserved
+ 0x2C 2 Subsystem vendor ID *
+ 0x2E 2 Subsystem ID *
+ 0x30-38 Reserved
+ 0x3C 1 Interrupt line
+ 0x3D 1 Interrupt pin 0x00
+ 0x3E 1 Min grant 0x00
+ 0x3D 1 Max latency 0x00
+ 0x40 1 TRDY timeout
+ 0x41 1 Retry count
+ 0x42 2 Reserved
+
+
+* Assigned by sub-system implementation
+
+SECTION 3: Memory-Mapped Register Space
+=======================================
+
+There are two memory-mapped BARs. BAR0 maps device register space and is
+0x2000 in size. BAR1 maps MSI-X vector and PBA tables and is also 0x2000 in
+size, allowing for 256 MSI-X vectors.
+
+All registers are 4 or 8 bytes long. It is assumed host software will access 4
+byte registers with one 4-byte access, and 8 byte registers with either two
+4-byte accesses or a single 8-byte access. In the case of two 4-byte accesses,
+access must be lower and then upper 4-bytes, in that order.
+
+BAR0 device register space is organized as follows:
+
+ offset description
+ ------------------------------------------------------
+ 0x0000-0x000f Bogus registers to catch misbehaving
+ drivers. Writes do nothing. Reads
+ back as 0xDEADBABE.
+ 0x0010-0x00ff Test registers
+ 0x0300-0x03ff General purpose registers
+ 0x1000-0x1fff Descriptor control
+
+Holes in register space are reserved. Writes to reserved registers do nothing.
+Reads to reserved registers read back as 0.
+
+No fancy stuff like write-combining is enabled on any of the registers.
+
+BAR1 MSI-X register space is organized as follows:
+
+ offset description
+ ------------------------------------------------------
+ 0x0000-0x0fff MSI-X vector table (256 vectors total)
+ 0x1000-0x1fff MSI-X PBA table
+
+
+SECTION 4: Interrupts, DMA, and Endianness
+==========================================
+
+PCI Interrupts
+--------------
+
+The device supports only MSI-X interrupts. BAR1 memory-mapped region contains
+the MSI-X vector and PBA tables, with support for up to 256 MSI-X vectors.
+
+The vector assignment is:
+
+ vector description
+ -----------------------------------------------------
+ 0 Command descriptor ring completion
+ 1 Event descriptor ring completion
+ 2 Test operation completion
+ 3 RSVD
+ 4-255 Tx and Rx descriptor ring completion
+ Tx vector is even
+ Rx vector is odd
+
+A MSI-X vector table entry is 16 bytes:
+
+ field offset width description
+ -------------------------------------------------------------
+ lower_addr 0x0 4 [31:2] message address[31:2]
+ [1:0] Rsvd (4 byte alignment
+ required)
+ upper_addr 0x4 4 [31:19] Rsvd
+ [14:0] message address[46:32]
+ data 0x8 4 message data[31:0]
+ control 0xc 4 [31:1] Rsvd
+ [0] mask (0 = enable,
+ 1 = masked)
+
+Software should install the Interrupt Service Routine (ISR) before any ports
+are enabled or any commands are issued on the command ring.
+
+DMA Operations
+--------------
+
+DMA operations are used for packet DMA to/from the CPU, command and event
+processing. Command processing includes statistical counters and table dumps,
+table insertion/deletion, and more. Event processing provides an async
+notification method for device-originating events. Each DMA operation has a
+set of control registers to manage a descriptor ring. The descriptor rings are
+allocated from contiguous host DMA-able memory and registers specify the rings
+base address, size and current head and tail indices. Software always writes
+the head, and hardware always writes the tail.
+
+The higher-order bit of DMA_DESC_COMP_ERR is used to mark hardware completion
+of a descriptor. Software will clear this bit when posting a descriptor to the
+ring, and hardware will set this bit when the descriptor is complete.
+
+Descriptor ring sizes must be a power of 2 and range from 2 to 64K entries.
+Descriptor rings' base address must be 8-byte aligned. Descriptors must be
+packed within ring. Each descriptor in each ring must also be aligned on an 8
+byte boundary. Each descriptor ring will have these registers:
+
+ DMA_DESC_xxx_BASE_ADDR, offset 0x1000 + (x * 32), 64-bit, (R/W)
+ DMA_DESC_xxx_SIZE, offset 0x1008 + (x * 32), 32-bit, (R/W)
+ DMA_DESC_xxx_HEAD, offset 0x100c + (x * 32), 32-bit, (R/W)
+ DMA_DESC_xxx_TAIL, offset 0x1010 + (x * 32), 32-bit, (R)
+ DMA_DESC_xxx_CTRL, offset 0x1014 + (x * 32), 32-bit, (W)
+ DMA_DESC_xxx_CREDITS, offset 0x1018 + (x * 32), 32-bit, (R/W)
+ DMA_DESC_xxx_RSVD1, offset 0x101c + (x * 32), 32-bit, (R/W)
+
+Where x is descriptor ring index:
+
+ index ring
+ --------------------
+ 0 CMD
+ 1 EVENT
+ 2 TX (port 0)
+ 3 RX (port 0)
+ 4 TX (port 1)
+ 5 RX (port 1)
+ .
+ .
+ .
+ 124 TX (port 61)
+ 125 RX (port 61)
+ 126 Resv
+ 127 Resv
+
+Writing BASE_ADDR or SIZE will reset HEAD and TAIL to zero. HEAD cannot be
+written past TAIL. To do so would wrap the ring. An empty ring is when HEAD
+== TAIL. A full ring is when HEAD is one position behind TAIL. Both HEAD and
+TAIL increment and modulo wrap at the ring size.
+
+CTRL register bits:
+
+ bit name description
+ ------------------------------------------------------------------------
+ [0] CTRL_RESET Reset the descriptor ring
+ [1:31] Reserved
+
+All descriptor types share some common fields:
+
+ field width description
+ -------------------------------------------------------------------
+ DMA_DESC_BUF_ADDR 8 Phys addr of desc payload, 8-byte
+ aligned
+ DMA_DESC_COOKIE 8 Desc cookie for completion matching,
+ upper-most bit is reserved
+ DMA_DESC_BUF_SIZE 2 Desc payload size in bytes
+ DMA_DESC_TLV_SIZE 2 Desc payload total size in bytes
+ used for TLVs. Must be <=
+ DMA_DESC_BUF_SIZE.
+ DMA_DESC_COMP_ERR 2 Completion status of associated
+ desc payload. High order bit is
+ clear on new descs, toggled by
+ hw for completed items.
+
+To support forward- and backward-compatibility, descriptor and completion
+payloads are specified in TLV format. Fields are packed with Type=field name,
+Length=field length, and Value=field value. Software will ignore unknown fields
+filled in by the switch. Likewise, the switch will ignore unknown fields
+filled in by software.
+
+Descriptor payload buffer is 8-byte aligned and TLVs are 8-byte aligned. The
+value within a TLV is also 8-byte aligned. The (packed, 8 byte) TLV header is:
+
+ field width description
+ -----------------------------
+ type 4 TLV type
+ len 2 TLV value length
+ pad 2 Reserved
+
+The alignment requirements for descriptors and TLVs are to avoid unaligned
+access exceptions in software. Note that the payload for each TLV is also
+8 byte aligned.
+
+Figure 1 shows an example descriptor buffer with two TLVs.
+
+ <------- 8 bytes ------->
+
+ 8-byte +––––+ +–––––––––––+–––––+–––––+ +–+
+ align | type | len | pad | TLV#1 hdr |
+ +–––––––––––+–––––+–––––+ (len=22) |
+ | | |
+ | value | TVL#1 value |
+ | | (padded to 8-byte |
+ | +–––––+ alignment) |
+ | |/////| |
+ 8-byte +––––+ +–––––––––––+–––––––––––+ |
+ align | type | len | pad | TLV#2 hdr DESC_BUF_SIZE
+ +–––––+–––––+–––––+–––––+ (len=2) |
+ |value|/////////////////| TLV#2 value |
+ +–––––+/////////////////| |
+ |///////////////////////| |
+ |///////////////////////| |
+ |///////////////////////| |
+ |////////unused/////////| |
+ |////////space//////////| |
+ |///////////////////////| |
+ |///////////////////////| |
+ |///////////////////////| |
+ +–––––––––––––––––––––––+ +–+
+
+ fig. 1
+
+TLVs can be nested within the NEST TLV type.
+
+Interrupt credits
+^^^^^^^^^^^^^^^^^
+
+MSI-X vectors used for descriptor ring completions use a credit mechanism for
+efficient device, PCIe bus, OS and driver operations. Each descriptor ring has
+a credit count which represents the number of outstanding descriptors to be
+processed by the driver. As the device marks descriptors complete, the credit
+count is incremented. As the driver processes those outstanding descriptors,
+it returns credits back to the device. This way, the device knows the driver's
+progress and can make decisions about when to fire the next interrupt or not.
+When the credit count is zero, and the first descriptors are posted for the
+driver, a single interrupt is fired. Once the interrupt is fired, the
+interrupt is disabled (auto-masked*). In response to the interrupt, the driver
+will process descriptors and PIO write a returned credit value for that
+descriptor ring. If the driver returns all credits (the driver caught up with
+the device and there is no outstanding work), then the interrupt is unmasked,
+but not fired. If only partial credits are returned, the interrupt remains
+masked but the device generates an interrupt, signaling the driver that more
+outstanding work is available.
+
+(* this masking is unrelated to to the MSI-X interrupt mask register)
+
+Endianness
+----------
+
+Device registers are hard-coded to little-endian (LE). The driver should
+convert to/from host endianess to LE for device register accesses.
+
+Descriptors are LE. Descriptor buffer TLVs will have LE type and length
+fields, but the value field can either be LE or network-byte-order, depending
+on context. TLV values containing network packet data will be in network-byte
+order. A TLV value containing a field or mask used to compare against network
+packet data is network-byte order. For example, flow match fields (and masks)
+are network-byte-order since they're matched directly, byte-by-byte, against
+network packet data. All non-network-packet TLV multi-byte values will be LE.
+
+TLV values in network-byte-order are designated with (N).
+
+
+SECTION 5: Test Registers
+=========================
+
+Rocker has several test registers to support troubleshooting register access,
+interrupt generation, and DMA operations:
+
+ TEST_REG, offset 0x0010, 32-bit (R/W)
+ TEST_REG64, offset 0x0018, 64-bit (R/W)
+ TEST_IRQ, offset 0x0020, 32-bit (R/W)
+ TEST_DMA_ADDR, offset 0x0028, 64-bit (R/W)
+ TEST_DMA_SIZE, offset 0x0030, 32-bit (R/W)
+ TEST_DMA_CTRL, offset 0x0034, 32-bit (R/W)
+
+Reads to TEST_REG and TEST_REG64 will read a value equal to twice the last
+value written to the register. The 32-bit and 64-bit versions are for testing
+32-bit and 64-bit host accesses.
+
+A vector can be written to TEST_IRQ and the device will generate an interrupt
+for that vector.
+
+To test basic DMA operations, allocate a DMA-able host buffer and put the
+buffer address into TEST_DMA_ADDR and size into TEST_DMA_SIZE. Then, write to
+TEST_DMA_CTRL to manipulate the buffer contents. TEST_DMA_CTRL operations are:
+
+ operation value description
+ -----------------------------------------------------------
+ TEST_DMA_CTRL_CLEAR 1 clear buffer
+ TEST_DMA_CTRL_FILL 2 fill buffer bytes with 0x96
+ TEST_DMA_CTRL_INVERT 4 invert bytes in buffer
+
+Various buffer address and sizes should be tested to verify no address boundary
+issue exists. In particular, buffers that start on odd-8-byte boundary and/or
+span multiple PAGE sizes should be tested.
+
+
+SECTION 6: Ports
+================
+
+Physical and Logical Ports
+------------------------------------
+
+The switch supports up to 62 physical (front-panel) ports. Register
+PORT_PHYS_COUNT returns the actual number of physical ports available:
+
+ PORT_PHYS_COUNT, offset 0x0304, 32-bit, (R)
+
+In addition to front-panel ports, the switch supports logical ports for
+tunnels.
+
+Front-panel ports and logical tunnel ports are mapped into a single 32-bit port
+space. A special CPU port is assigned port 0. The front-panel ports are
+mapped to ports 1-62. A special loopback port is assigned port 63. Logical
+tunnel ports are assigned ports 0x0001000-0x0001ffff.
+To summarize the port assignments:
+
+ port mapping
+ -------------------------------------------------------
+ 0 CPU port (for packets to/from host CPU)
+ 1-62 front-panel physical ports
+ 63 loopback port
+ 64-0x0000ffff RSVD
+ 0x00010000-0x0001ffff logical tunnel ports
+ 0x00020000-0xffffffff RSVD
+
+Physical Port Mode
+------------------
+
+Switch front-panel ports operate in a mode. Currently, the only mode is
+OF-DPA. OF-DPA[1] mode is based on OpenFlow Data Plane Abstraction (OF-DPA)
+Abstract Switch Specification, Version 1.0, from Broadcom Corporation. To
+set/get the mode for front-panel ports, see port settings, below.
+
+Port Settings
+-------------
+
+Link status for all front-panel ports is available via PORT_PHYS_LINK_STATUS:
+
+ PORT_PHYS_LINK_STATUS, offset 0x0310, 64-bit, (R)
+
+ Value is port bitmap. Bits 0 and 63 always read 0. Bits 1-62
+ read 1 for link UP and 0 for link DOWN for respective front-panel ports.
+
+Other properties for front-panel ports are available via DMA CMD descriptors:
+
+ Get PORT_SETTINGS descriptor:
+
+ field width description
+ ----------------------------------------------
+ PORT_SETTINGS 2 CMD_GET
+ PPORT 4 Physical port #
+
+ Get PORT_SETTINGS completion:
+
+ field width description
+ ----------------------------------------------
+ PPORT 4 Physical port #
+ SPEED 4 Current port interface speed, in Mbps
+ DUPLEX 1 1 = Full, 0 = Half
+ AUTONEG 1 1 = enabled, 0 = disabled
+ MACADDR 6 Port MAC address
+ MODE 1 0 = OF-DPA
+ LEARNING 1 MAC address learning on port
+ 1 = enabled
+ 0 = disabled
+
+ Set PORT_SETTINGS descriptor:
+
+ field width description
+ ----------------------------------------------
+ PORT_SETTINGS 2 CMD_SET
+ PPORT 4 Physical port #
+ SPEED 4 Port interface speed, in Mbps
+ DUPLEX 1 1 = Full, 0 = Half
+ AUTONEG 1 1 = enabled, 0 = disabled
+ MACADDR 6 Port MAC address
+ MODE 1 0 = OF-DPA
+
+Port Enable
+-----------
+
+Front-panel ports are initially disabled, which means port ingress and egress
+packets will be dropped. To enable or disable a port, use PORT_PHYS_ENABLE:
+
+ PORT_PHYS_ENABLE: offset 0x0318, 64-bit, (R/W)
+
+ Value is bitmap of first 64 ports. Bits 0 and 63 are ignored
+ and always read as 0. Write 1 to enable port; write 0 to disable it.
+ Default is 0.
+
+
+SECTION 7: Switch Control
+=========================
+
+This section covers switch-wide register settings.
+
+Control
+-------
+
+This register is used for low level control of the switch.
+
+ CONTROL: offset 0x0300, 32-bit, (W)
+
+ bit name description
+ ------------------------------------------------------------------------
+ [0] CONTROL_RESET If set, device will perform reset
+ [1:31] Reserved
+
+Switch ID
+---------
+
+The switch has a SWITCH_ID to be used by software to uniquely identify the
+switch:
+
+ SWITCH_ID: offset 0x0320, 64-bit, (R)
+
+ Value is opaque to switch software and no special encoding is implied.
+
+
+SECTION 8: Events
+=================
+
+Non-I/O asynchronous events from the device are notified to the host using the
+event ring. The TLV structure for events is:
+
+ field width description
+ ---------------------------------------------------
+ TYPE 4 Event type, one of:
+ 1: LINK_CHANGED
+ 2: MAC_VLAN_SEEN
+ INFO <nest> Event info (details below)
+
+Link Changed Event
+------------------
+
+When link status changes on a physical port, this event is generated.
+
+ field width description
+ ---------------------------------------------------
+ INFO <nest>
+ PPORT 4 Physical port
+ LINKUP 1 Link status:
+ 0: down
+ 1: up
+
+MAC VLAN Seen Event
+-------------------
+
+When a packet ingresses on a port and the source MAC/VLAN isn't known to the
+device, the device will generate this event. In response to the event, the
+driver should install to the device the MAC/VLAN on the port into the bridge
+table. Once installed, the MAC/VLAN is known on the port and this event will
+no longer be generated.
+
+ field width description
+ ---------------------------------------------------
+ INFO <nest>
+ PPORT 4 Physical port
+ MAC 6 MAC address
+ VLAN 2 VLAN ID
+
+
+SECTION 9: CPU Packet Processing
+================================
+
+Ingress packets directed to the host CPU for further processing are delivered
+in the DMA RX ring. Likewise, host CPU originating packets destined to egress
+on switch ports are scheduled by software using the DMA TX ring.
+
+Tx Packet Processing
+--------------------
+
+Software schedules packets for egress on switch ports using the DMA TX ring. A
+TX descriptor buffer describes the packet location and size in host DMA-able
+memory, the destination port, and any hardware-offload functions (such as L3
+payload checksum offload). Software then bumps the descriptor head to signal
+hardware of new Tx work. In response, hardware will DMA read Tx descriptors up
+to head, DMA read descriptor buffer and packet data, perform offloading
+functions, and finally frame packet on wire (network). Once packet processing
+is complete, hardware will writeback status to descriptor(s) to signal to
+software that Tx is complete and software resources (e.g. skb) backing packet
+can be released.
+
+Figure 2 shows an example 3-fragment packet queued with one Tx descriptor. A
+TLV is used for each packet fragment.
+
+ pkt frag 1
+ +–––––––+ +–+
+ +–––+ | |
+ desc buf | | | |
+ +––––––––+ | | | |
+ Tx ring +–––+ +–––––+ | | |
+ +–––––––––+ | | TLVs | +–––––––+ |
+ | +–––+ +––––––––+ pkt frag 2 |
+ | desc 0 | | +–––––+ +–––––––+ |
+ +–––––––––+ | TLVs | +–––+ | |
+ head+–+ | +––––––––+ | | |
+ | desc 1 | | +–––––+ +–––––––+ |pkt
+ +–––––––––+ | TLVs | | |
+ | | +––––––––+ | pkt frag 3 |
+ | | | +–––––––+ |
+ +–––––––––+ +–––+ | |
+ | | | | |
+ | | | | |
+ +–––––––––+ | | |
+ | | | | |
+ | | | | |
+ +–––––––––+ | | |
+ | | +–––––––+ +–+
+ | |
+ +–––––––––+
+
+ fig 2.
+
+The TLVs for Tx descriptor buffer are:
+
+ field width description
+ ---------------------------------------------------------------------
+ PPORT 4 Destination physical port #
+ TX_OFFLOAD 1 Hardware offload modes:
+ 0: no offload
+ 1: insert IP csum (ipv4 only)
+ 2: insert TCP/UDP csum
+ 3: L3 csum calc and insert
+ into csum offset (TX_L3_CSUM_OFF)
+ 16-bit 1's complement csum value.
+ IPv4 pseudo-header and IP
+ already calculated by OS
+ and inserted.
+ 4: TSO (TCP Segmentation Offload)
+ TX_L3_CSUM_OFF 2 For L3 csum offload mode, the offset,
+ from the beginning of the packet,
+ of the csum field in the L3 header
+ TX_TSO_MSS 2 For TSO offload mode, the
+ Maximum Segment Size in bytes
+ TX_TSO_HDR_LEN 2 For TSO offload mode, the
+ length of ethernet, IP, and
+ TCP/UDP headers, including IP
+ and TCP options.
+ TX_FRAGS <array> Packet fragments
+ TX_FRAG <nest> Packet fragment
+ TX_FRAG_ADDR 8 DMA address of packet fragment
+ TX_FRAG_LEN 2 Packet fragment length
+
+Possible status return codes in descriptor on completion are:
+
+ DESC_COMP_ERR reason
+ --------------------------------------------------------------------
+ 0 OK
+ -ROCKER_ENXIO address or data read err on desc buf or packet
+ fragment
+ -ROCKER_EINVAL bad pport or TSO or csum offloading error
+ -ROCKER_ENOMEM no memory for internal staging tx fragment
+
+Rx Packet Processing
+--------------------
+
+For packets ingressing on switch ports that are not forwarded by the switch but
+rather directed to the host CPU for further processing are delivered in the DMA
+RX ring. Rx descriptor buffers are allocated by software and placed on the
+ring. Hardware will fill Rx descriptor buffers with packet data, write the
+completion, and signal to software that a new packet is ready. Since Rx packet
+size is not known a-priori, the Rx descriptor buffer must be allocated for
+worst-case packet size. A single Rx descriptor will contain the entire Rx
+packet data in one RX_FRAG. Other Rx TLVs describe and hardware offloads
+performed on the packet, such as checksum validation.
+
+The TLVs for Rx descriptor buffer are:
+
+ field width description
+ ---------------------------------------------------
+ PPORT 4 Source physical port #
+ RX_FLAGS 2 Packet parsing flags:
+ (1 << 0): IPv4 packet
+ (1 << 1): IPv6 packet
+ (1 << 2): csum calculated
+ (1 << 3): IPv4 csum good
+ (1 << 4): IP fragment
+ (1 << 5): TCP packet
+ (1 << 6): UDP packet
+ (1 << 7): TCP/UDP csum good
+ RX_CSUM 2 IP calculated checksum:
+ IPv4: IP payload csum
+ IPv6: header and payload csum
+ (Only valid is RX_FLAGS:csum calc is set)
+ RX_FRAG_ADDR 8 DMA address of packet fragment
+ RX_FRAG_MAX_LEN 2 Packet maximum fragment length
+ RX_FRAG_LEN 2 Actual packet fragment length after receive
+
+Possible status return codes in descriptor on completion are:
+
+ DESC_COMP_ERR reason
+ --------------------------------------------------------------------
+ 0 OK
+ -ROCKER_ENXIO address or data read err on desc buf
+ -ROCKER_ENOMEM no memory for internal staging desc buf
+ -ROCKER_EMSGSIZE Rx descriptor buffer wasn't big enough to contain
+ packet data TLV and other TLVs.
+
+
+SECTION 10: OF-DPA Mode
+======================
+
+OF-DPA mode allows the switch to offload flow packet processing functions to
+hardware. An OpenFlow controller would communicate with an OpenFlow agent
+installed on the switch. The OpenFlow agent would (directly or indirectly)
+communicate with the Rocker switch driver, which in turn would program switch
+hardware with flow functionality, as defined in OF-DPA. The block diagram is:
+
+ +–––––––––––––––----–––+
+ | OF |
+ | Remote Controller |
+ +––––––––+––----–––––––+
+ |
+ |
+ +––––––––+–––––––––+
+ | OF |
+ | Local Agent |
+ +––––––––––––––––––+
+ | |
+ | Rocker Driver |
+ +––––––––––––––––––+
+ <this spec>
+ +––––––––––––––––––+
+ | |
+ | Rocker Switch |
+ +––––––––––––––––––+
+
+To participate in flow functions, ports must be configure for OF-DPA mode
+during switch initialization.
+
+OF-DPA Flow Table Interface
+---------------------------
+
+There are commands to add, modify, delete, and get stats of flow table entries.
+The commands are issued using the DMA CMD descriptor ring. The following
+commands are defined:
+
+ CMD_ADD: add an entry to flow table
+ CMD_MOD: modify an entry in flow table
+ CMD_DEL: delete an entry from flow table
+ CMD_GET_STATS: get stats for flow entry
+
+TLVs for add and modify commands are:
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_CMD 2 CMD_[ADD|MOD]
+ OF_DPA_TBL 2 Flow table ID
+ 0: ingress port
+ 10: vlan
+ 20: termination mac
+ 30: unicast routing
+ 40: multicast routing
+ 50: bridging
+ 60: ACL policy
+ OF_DPA_PRIORITY 4 Flow priority
+ OF_DPA_HARDTIME 4 Hard timeout for flow
+ OF_DPA_IDLETIME 4 Idle timeout for flow
+ OF_DPA_COOKIE 8 Cookie
+
+Additional TLVs based on flow table ID:
+
+Table ID 0: ingress port
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_IN_PPORT 4 ingress physical port number
+ OF_DPA_GOTO_TBL 2 goto table ID; zero to drop
+
+Table ID 10: vlan
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_IN_PPORT 4 ingress physical port number
+ OF_DPA_VLAN_ID 2 (N) vlan ID
+ OF_DPA_VLAN_ID_MASK 2 (N) vlan ID mask
+ OF_DPA_GOTO_TBL 2 goto table ID; zero to drop
+ OF_DPA_NEW_VLAN_ID 2 (N) new vlan ID
+
+Table ID 20: termination mac
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_IN_PPORT 4 ingress physical port number
+ OF_DPA_IN_PPORT_MASK 4 ingress physical port number mask
+ OF_DPA_ETHERTYPE 2 (N) must be either 0x0800 or 0x86dd
+ OF_DPA_DST_MAC 6 (N) destination MAC
+ OF_DPA_DST_MAC_MASK 6 (N) destination MAC mask
+ OF_DPA_VLAN_ID 2 (N) vlan ID
+ OF_DPA_VLAN_ID_MASK 2 (N) vlan ID mask
+ OF_DPA_GOTO_TBL 2 only acceptable values are
+ unicast or multicast routing
+ table IDs
+ OF_DPA_OUT_PPORT 2 if specified, must be
+ controller, set zero otherwise
+
+Table ID 30: unicast routing
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_ETHERTYPE 2 (N) must be either 0x0800 or 0x86dd
+ OF_DPA_DST_IP 4 (N) destination IPv4 address.
+ Must be unicast address
+ OF_DPA_DST_IP_MASK 4 (N) IP mask. Must be prefix mask
+ OF_DPA_DST_IPV6 16 (N) destination IPv6 address.
+ Must be unicast address
+ OF_DPA_DST_IPV6_MASK 16 (N) IPv6 mask. Must be prefix mask
+ OF_DPA_GOTO_TBL 2 goto table ID; zero to drop
+ OF_DPA_GROUP_ID 4 data for GROUP action must
+ be an L3 Unicast group entry
+
+Table ID 40: multicast routing
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_ETHERTYPE 2 (N) must be either 0x0800 or 0x86dd
+ OF_DPA_VLAN_ID 2 (N) vlan ID
+ OF_DPA_SRC_IP 4 (N) source IPv4. Optional,
+ can contain IPv4 address,
+ must be completely masked
+ if not used
+ OF_DPA_SRC_IP_MASK 4 (N) IP Mask
+ OF_DPA_DST_IP 4 (N) destination IPv4 address.
+ Must be multicast address
+ OF_DPA_SRC_IPV6 16 (N) source IPv6 Address. Optional.
+ Can contain IPv6 address,
+ must be completely masked
+ if not used
+ OF_DPA_SRC_IPV6_MASK 16 (N) IPv6 mask.
+ OF_DPA_DST_IPV6 16 (N) destination IPv6 Address. Must
+ be multicast address
+ Must be multicast address
+ OF_DPA_GOTO_TBL 2 goto table ID; zero to drop
+ OF_DPA_GROUP_ID 4 data for GROUP action must
+ be an L3 multicast group entry
+
+Table ID 50: bridging
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_VLAN_ID 2 (N) vlan ID
+ OF_DPA_TUNNEL_ID 4 tunnel ID
+ OF_DPA_DST_MAC 6 (N) destination MAC
+ OF_DPA_DST_MAC_MASK 6 (N) destination MAC mask
+ OF_DPA_GOTO_TBL 2 goto table ID; zero to drop
+ OF_DPA_GROUP_ID 4 data for GROUP action must
+ be a L2 Interface, L2
+ Multicast, L2 Flood,
+ or L2 Overlay group entry
+ as appropriate
+ OF_DPA_TUNNEL_LPORT 4 unicast Tenant Bridging
+ flows specify a tunnel
+ logical port ID
+ OF_DPA_OUT_PPORT 2 data for OUTPUT action,
+ restricted to CONTROLLER,
+ set to 0 otherwise
+
+Table ID 60: acl policy
+
+ field width description
+ ----------------------------------------------------
+ OF_DPA_IN_PPORT 4 ingress physical port number
+ OF_DPA_IN_PPORT_MASK 4 ingress physical port number mask
+ OF_DPA_ETHERTYPE 2 (N) ethertype
+ OF_DPA_VLAN_ID 2 (N) vlan ID
+ OF_DPA_VLAN_ID_MASK 2 (N) vlan ID mask
+ OF_DPA_VLAN_PCP 2 (N) vlan Priority Code Point
+ OF_DPA_VLAN_PCP_MASK 2 (N) vlan Priority Code Point mask
+ OF_DPA_SRC_MAC 6 (N) source MAC
+ OF_DPA_SRC_MAC_MASK 6 (N) source MAC mask
+ OF_DPA_DST_MAC 6 (N) destination MAC
+ OF_DPA_DST_MAC_MASK 6 (N) destination MAC mask
+ OF_DPA_TUNNEL_ID 4 tunnel ID
+ OF_DPA_SRC_IP 4 (N) source IPv4. Optional,
+ can contain IPv4 address,
+ must be completely masked
+ if not used
+ OF_DPA_SRC_IP_MASK 4 (N) IP Mask
+ OF_DPA_DST_IP 4 (N) destination IPv4 address.
+ Must be multicast address
+ OF_DPA_DST_IP_MASK 4 (N) IP Mask
+ OF_DPA_SRC_IPV6 16 (N) source IPv6 Address. Optional.
+ Can contain IPv6 address,
+ must be completely masked
+ if not used
+ OF_DPA_SRC_IPV6_MASK 16 (N) IPv6 mask
+ OF_DPA_DST_IPV6 16 (N) destination IPv6 Address. Must
+ be multicast address.
+ OF_DPA_DST_IPV6_MASK 16 (N) IPv6 mask
+ OF_DPA_SRC_ARP_IP 4 (N) source IPv4 address in the ARP
+ payload. Only used if ethertype
+ == 0x0806.
+ OF_DPA_SRC_ARP_IP_MASK 4 (N) IP Mask
+ OF_DPA_IP_PROTO 1 IP protocol
+ OF_DPA_IP_PROTO_MASK 1 IP protocol mask
+ OF_DPA_IP_DSCP 1 DSCP
+ OF_DPA_IP_DSCP_MASK 1 DSCP mask
+ OF_DPA_IP_ECN 1 ECN
+ OF_DPA_IP_ECN_MASK 1 ECN mask
+ OF_DPA_L4_SRC_PORT 2 (N) L4 source port, only for
+ TCP, UDP, or SCTP
+ OF_DPA_L4_SRC_PORT_MASK 2 (N) L4 source port mask
+ OF_DPA_L4_DST_PORT 2 (N) L4 source port, only for
+ TCP, UDP, or SCTP
+ OF_DPA_L4_DST_PORT_MASK 2 (N) L4 source port mask
+ OF_DPA_ICMP_TYPE 1 ICMP type, only if IP
+ protocol is 1
+ OF_DPA_ICMP_TYPE_MASK 1 ICMP type mask
+ OF_DPA_ICMP_CODE 1 ICMP code
+ OF_DPA_ICMP_CODE_MASK 1 ICMP code mask
+ OF_DPA_IPV6_LABEL 4 (N) IPv6 flow label
+ OF_DPA_IPV6_LABEL_MASK 4 (N) IPv6 flow label mask
+ OF_DPA_GROUP_ID 4 data for GROUP action
+ OF_DPA_QUEUE_ID_ACTION 1 write the queue ID
+ OF_DPA_NEW_QUEUE_ID 1 queue ID
+ OF_DPA_VLAN_PCP_ACTION 1 write the VLAN priority
+ OF_DPA_NEW_VLAN_PCP 1 VLAN priority
+ OF_DPA_IP_DSCP_ACTION 1 write the DSCP
+ OF_DPA_NEW_IP_DSCP 1 new DSCP
+ OF_DPA_TUNNEL_LPORT 4 restrct to valid tunnel
+ logical port, set to 0
+ otherwise.
+ OF_DPA_OUT_PPORT 2 data for OUTPUT action,
+ restricted to CONTROLLER,
+ set to 0 otherwise
+ OF_DPA_CLEAR_ACTIONS 4 if 1 packets matching flow are
+ dropped (all other instructions
+ ignored)
+
+TLVs for flow delete and get stats command are:
+
+ field width description
+ ---------------------------------------------------
+ OF_DPA_CMD 2 CMD_[DEL|GET_STATS]
+ OF_DPA_COOKIE 8 Cookie
+
+On completion of get stats command, the descriptor buffer is written back with
+the following TLVs:
+
+ field width description
+ ---------------------------------------------------
+ OF_DPA_STAT_DURATION 4 Flow duration
+ OF_DPA_STAT_RX_PKTS 8 Received packets
+ OF_DPA_STAT_TX_PKTS 8 Transmit packets
+
+Possible status return codes in descriptor on completion are:
+
+ DESC_COMP_ERR command reason
+ --------------------------------------------------------------------
+ 0 all OK
+ -ROCKER_EFAULT all head or tail index outside
+ of ring
+ -ROCKER_ENXIO all address or data read err on
+ desc buf
+ -ROCKER_EMSGSIZE GET_STATS cmd descriptor buffer wasn't
+ big enough to contain write-back
+ TLVs
+ -ROCKER_EINVAL all invalid parameters passed in
+ -ROCKER_EEXIST ADD entry already exists
+ -ROCKER_ENOSPC ADD no space left in flow table
+ -ROCKER_ENOENT MOD|DEL|GET_STATS cookie invalid
+
+Group Table Interface
+---------------------
+
+There are commands to add, modify, delete, and get stats of group table
+entries. The commands are issued using the DMA CMD descriptor ring. The
+following commands are defined:
+
+ CMD_ADD: add an entry to group table
+ CMD_MOD: modify an entry in group table
+ CMD_DEL: delete an entry from group table
+ CMD_GET_STATS: get stats for group entry
+
+TLVs for add and modify commands are:
+
+ field width description
+ -----------------------------------------------------------
+ FLOW_GROUP_CMD 2 CMD_[ADD|MOD]
+ FLOW_GROUP_ID 2 Flow group ID
+ FLOW_GROUP_TYPE 1 Group type:
+ 0: L2 interface
+ 1: L2 rewrite
+ 2: L3 unicast
+ 3: L2 multicast
+ 4: L2 flood
+ 5: L3 interface
+ 6: L3 multicast
+ 7: L3 ECMP
+ 8: L2 overlay
+ FLOW_VLAN_ID 2 Vlan ID (types 0, 3, 4, 6)
+ FLOW_L2_PORT 2 Port (types 0)
+ FLOW_INDEX 4 Index (all types but 0)
+ FLOW_OVERLAY_TYPE 1 Overlay sub-type (type 8):
+ 0: Flood unicast tunnel
+ 1: Flood multicast tunnel
+ 2: Multicast unicast tunnel
+ 3: Multicast multicast tunnel
+ FLOW_GROUP_ACTION nest
+ FLOW_GROUP_ID 2 next group ID in chain (all
+ types except 0)
+ FLOW_OUT_PORT 4 egress port (types 0, 8)
+ FLOW_POP_VLAN_TAG 1 strip outer VLAN tag (type 1
+ only)
+ FLOW_VLAN_ID 2 (types 1, 5)
+ FLOW_SRC_MAC 6 (types 1, 2, 5)
+ FLOW_DST_MAC 6 (types 1, 2)
+
+TLVs for flow delete and get stats command are:
+
+ field width description
+ -----------------------------------------------------------
+ FLOW_GROUP_CMD 2 CMD_[DEL|GET_STATS]
+ FLOW_GROUP_ID 2 Flow group ID
+
+On completion of get stats command, the descriptor buffer is written back with
+the following TLVs:
+
+ field width description
+ ---------------------------------------------------
+ FLOW_GROUP_ID 2 Flow group ID
+ FLOW_STAT_DURATION 4 Flow duration
+ FLOW_STAT_REF_COUNT 4 Flow reference count
+ FLOW_STAT_BUCKET_COUNT 4 Flow bucket count
+
+Possible status return codes in descriptor on completion are:
+
+ DESC_COMP_ERR command reason
+ --------------------------------------------------------------------
+ 0 all OK
+ -ROCKER_EFAULT all head or tail index outside
+ of ring
+ -ROCKER_ENXIO all address or data read err on
+ desc buf
+ -ROCKER_ENOSPC GET_STATS cmd descriptor buffer wasn't
+ big enough to contain write-back
+ TLVs
+ -ROCKER_EINVAL ADD|MOD invalid parameters passed in
+ -ROCKER_EEXIST ADD entry already exists
+ -ROCKER_ENOSPC ADD no space left in flow table
+ -ROCKER_ENOENT MOD|DEL|GET_STATS group ID invalid
+ -ROCKER_EBUSY DEL group reference count non-zero
+ -ROCKER_ENODEV ADD next group ID doesn't exist
+
+
+
+References
+==========
+
+[1] OpenFlow Data Plane Abstraction (OF-DPA) Abstract Switch Specification,
+Version 1.0, from Broadcom Corporation, February 21, 2014.
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index ea93293122..7b91c4e51d 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -35,3 +35,7 @@ obj-y += vhost_net.o
obj-$(CONFIG_ETSEC) += fsl_etsec/etsec.o fsl_etsec/registers.o \
fsl_etsec/rings.o fsl_etsec/miim.o
+
+common-obj-$(CONFIG_ROCKER) += rocker/rocker.o rocker/rocker_fp.o \
+ rocker/rocker_desc.o rocker/rocker_world.o \
+ rocker/rocker_of_dpa.o
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
new file mode 100644
index 0000000000..55b6c46157
--- /dev/null
+++ b/hw/net/rocker/rocker.c
@@ -0,0 +1,1480 @@
+/*
+ * QEMU rocker switch emulation - PCI device
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msix.h"
+#include "net/net.h"
+#include "net/eth.h"
+#include "qemu/iov.h"
+#include "qemu/bitops.h"
+#include "qmp-commands.h"
+
+#include "rocker.h"
+#include "rocker_hw.h"
+#include "rocker_fp.h"
+#include "rocker_desc.h"
+#include "rocker_tlv.h"
+#include "rocker_world.h"
+#include "rocker_of_dpa.h"
+
+struct rocker {
+ /* private */
+ PCIDevice parent_obj;
+ /* public */
+
+ MemoryRegion mmio;
+ MemoryRegion msix_bar;
+
+ /* switch configuration */
+ char *name; /* switch name */
+ uint32_t fp_ports; /* front-panel port count */
+ NICPeers *fp_ports_peers;
+ MACAddr fp_start_macaddr; /* front-panel port 0 mac addr */
+ uint64_t switch_id; /* switch id */
+
+ /* front-panel ports */
+ FpPort *fp_port[ROCKER_FP_PORTS_MAX];
+
+ /* register backings */
+ uint32_t test_reg;
+ uint64_t test_reg64;
+ dma_addr_t test_dma_addr;
+ uint32_t test_dma_size;
+ uint64_t lower32; /* lower 32-bit val in 2-part 64-bit access */
+
+ /* desc rings */
+ DescRing **rings;
+
+ /* switch worlds */
+ World *worlds[ROCKER_WORLD_TYPE_MAX];
+ World *world_dflt;
+
+ QLIST_ENTRY(rocker) next;
+};
+
+#define ROCKER "rocker"
+
+#define to_rocker(obj) \
+ OBJECT_CHECK(Rocker, (obj), ROCKER)
+
+static QLIST_HEAD(, rocker) rockers;
+
+Rocker *rocker_find(const char *name)
+{
+ Rocker *r;
+
+ QLIST_FOREACH(r, &rockers, next)
+ if (strcmp(r->name, name) == 0) {
+ return r;
+ }
+
+ return NULL;
+}
+
+World *rocker_get_world(Rocker *r, enum rocker_world_type type)
+{
+ if (type < ROCKER_WORLD_TYPE_MAX) {
+ return r->worlds[type];
+ }
+ return NULL;
+}
+
+uint32_t rocker_fp_ports(Rocker *r)
+{
+ return r->fp_ports;
+}
+
+static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
+ DescRing *ring)
+{
+ return (desc_ring_index(ring) - 2) / 2 + 1;
+}
+
+static int tx_consume(Rocker *r, DescInfo *info)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+ char *buf = desc_get_buf(info, true);
+ RockerTlv *tlv_frag;
+ RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
+ struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
+ uint32_t pport;
+ uint32_t port;
+ uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
+ uint16_t tx_l3_csum_off = 0;
+ uint16_t tx_tso_mss = 0;
+ uint16_t tx_tso_hdr_len = 0;
+ int iovcnt = 0;
+ int err = ROCKER_OK;
+ int rem;
+ int i;
+
+ if (!buf) {
+ return -ROCKER_ENXIO;
+ }
+
+ rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
+
+ if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
+ return -ROCKER_EINVAL;
+ }
+
+ pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
+ if (!fp_port_from_pport(pport, &port)) {
+ return -ROCKER_EINVAL;
+ }
+
+ if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
+ tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
+ }
+
+ switch (tx_offload) {
+ case ROCKER_TX_OFFLOAD_L3_CSUM:
+ if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
+ return -ROCKER_EINVAL;
+ }
+ case ROCKER_TX_OFFLOAD_TSO:
+ if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
+ !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
+ tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
+ }
+
+ if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
+ tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
+ }
+
+ if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
+ tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
+ }
+
+ rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
+ hwaddr frag_addr;
+ uint16_t frag_len;
+
+ if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
+ err = -ROCKER_EINVAL;
+ goto err_bad_attr;
+ }
+
+ rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
+
+ if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
+ !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
+ err = -ROCKER_EINVAL;
+ goto err_bad_attr;
+ }
+
+ frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
+ frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
+
+ iov[iovcnt].iov_len = frag_len;
+ iov[iovcnt].iov_base = g_malloc(frag_len);
+ if (!iov[iovcnt].iov_base) {
+ err = -ROCKER_ENOMEM;
+ goto err_no_mem;
+ }
+
+ if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
+ iov[iovcnt].iov_len)) {
+ err = -ROCKER_ENXIO;
+ goto err_bad_io;
+ }
+
+ if (++iovcnt > ROCKER_TX_FRAGS_MAX) {
+ goto err_too_many_frags;
+ }
+ }
+
+ if (iovcnt) {
+ /* XXX perform Tx offloads */
+ /* XXX silence compiler for now */
+ tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
+ }
+
+ err = fp_port_eg(r->fp_port[port], iov, iovcnt);
+
+err_too_many_frags:
+err_bad_io:
+err_no_mem:
+err_bad_attr:
+ for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
+ if (iov[i].iov_base) {
+ g_free(iov[i].iov_base);
+ }
+ }
+
+ return err;
+}
+
+static int cmd_get_port_settings(Rocker *r,
+ DescInfo *info, char *buf,
+ RockerTlv *cmd_info_tlv)
+{
+ RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ RockerTlv *nest;
+ FpPort *fp_port;
+ uint32_t pport;
+ uint32_t port;
+ uint32_t speed;
+ uint8_t duplex;
+ uint8_t autoneg;
+ uint8_t learning;
+ MACAddr macaddr;
+ enum rocker_world_type mode;
+ size_t tlv_size;
+ int pos;
+ int err;
+
+ rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ cmd_info_tlv);
+
+ if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
+ return -ROCKER_EINVAL;
+ }
+
+ pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
+ if (!fp_port_from_pport(pport, &port)) {
+ return -ROCKER_EINVAL;
+ }
+ fp_port = r->fp_port[port];
+
+ err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
+ if (err) {
+ return err;
+ }
+
+ fp_port_get_macaddr(fp_port, &macaddr);
+ mode = world_type(fp_port_get_world(fp_port));
+ learning = fp_port_get_learning(fp_port);
+
+ tlv_size = rocker_tlv_total_size(0) + /* nest */
+ rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */
+ rocker_tlv_total_size(sizeof(uint32_t)) + /* speed */
+ rocker_tlv_total_size(sizeof(uint8_t)) + /* duplex */
+ rocker_tlv_total_size(sizeof(uint8_t)) + /* autoneg */
+ rocker_tlv_total_size(sizeof(macaddr.a)) + /* macaddr */
+ rocker_tlv_total_size(sizeof(uint8_t)) + /* mode */
+ rocker_tlv_total_size(sizeof(uint8_t)); /* learning */
+
+ if (tlv_size > desc_buf_size(info)) {
+ return -ROCKER_EMSGSIZE;
+ }
+
+ pos = 0;
+ nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
+ rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
+ rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
+ rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
+ sizeof(macaddr.a), macaddr.a);
+ rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
+ rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
+ learning);
+ rocker_tlv_nest_end(buf, &pos, nest);
+
+ return desc_set_buf(info, tlv_size);
+}
+
+static int cmd_set_port_settings(Rocker *r,
+ RockerTlv *cmd_info_tlv)
+{
+ RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ FpPort *fp_port;
+ uint32_t pport;
+ uint32_t port;
+ uint32_t speed;
+ uint8_t duplex;
+ uint8_t autoneg;
+ uint8_t learning;
+ MACAddr macaddr;
+ enum rocker_world_type mode;
+ int err;
+
+ rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ cmd_info_tlv);
+
+ if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
+ return -ROCKER_EINVAL;
+ }
+
+ pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
+ if (!fp_port_from_pport(pport, &port)) {
+ return -ROCKER_EINVAL;
+ }
+ fp_port = r->fp_port[port];
+
+ if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
+ tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
+ tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
+
+ speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
+ duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
+ autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
+
+ err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
+ if (err) {
+ return err;
+ }
+ }
+
+ if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
+ if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
+ sizeof(macaddr.a)) {
+ return -ROCKER_EINVAL;
+ }
+ memcpy(macaddr.a,
+ rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
+ sizeof(macaddr.a));
+ fp_port_set_macaddr(fp_port, &macaddr);
+ }
+
+ if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
+ mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
+ fp_port_set_world(fp_port, r->worlds[mode]);
+ }
+
+ if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
+ learning =
+ rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
+ fp_port_set_learning(fp_port, learning);
+ }
+
+ return ROCKER_OK;
+}
+
+static int cmd_consume(Rocker *r, DescInfo *info)
+{
+ char *buf = desc_get_buf(info, false);
+ RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
+ RockerTlv *info_tlv;
+ World *world;
+ uint16_t cmd;
+ int err;
+
+ if (!buf) {
+ return -ROCKER_ENXIO;
+ }
+
+ rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
+
+ if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
+ return -ROCKER_EINVAL;
+ }
+
+ cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
+ info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
+
+ /* This might be reworked to something like this:
+ * Every world will have an array of command handlers from
+ * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
+ * up to each world to implement whatever command it want.
+ * It can reference "generic" commands as cmd_set_port_settings or
+ * cmd_get_port_settings
+ */
+
+ switch (cmd) {
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
+ world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
+ err = world_do_cmd(world, info, buf, cmd, info_tlv);
+ break;
+ case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
+ err = cmd_get_port_settings(r, info, buf, info_tlv);
+ break;
+ case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
+ err = cmd_set_port_settings(r, info_tlv);
+ break;
+ default:
+ err = -ROCKER_EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static void rocker_msix_irq(Rocker *r, unsigned vector)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+
+ DPRINTF("MSI-X notify request for vector %d\n", vector);
+ if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
+ DPRINTF("incorrect vector %d\n", vector);
+ return;
+ }
+ msix_notify(dev, vector);
+}
+
+int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
+{
+ DescRing *ring = r->rings[ROCKER_RING_EVENT];
+ DescInfo *info = desc_ring_fetch_desc(ring);
+ RockerTlv *nest;
+ char *buf;
+ size_t tlv_size;
+ int pos;
+ int err;
+
+ if (!info) {
+ return -ROCKER_ENOBUFS;
+ }
+
+ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */
+ rocker_tlv_total_size(0) + /* nest */
+ rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */
+ rocker_tlv_total_size(sizeof(uint8_t)); /* link up */
+
+ if (tlv_size > desc_buf_size(info)) {
+ err = -ROCKER_EMSGSIZE;
+ goto err_too_big;
+ }
+
+ buf = desc_get_buf(info, false);
+ if (!buf) {
+ err = -ROCKER_ENOMEM;
+ goto err_no_mem;
+ }
+
+ pos = 0;
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
+ ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
+ nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
+ rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
+ link_up ? 1 : 0);
+ rocker_tlv_nest_end(buf, &pos, nest);
+
+ err = desc_set_buf(info, tlv_size);
+
+err_too_big:
+err_no_mem:
+ if (desc_ring_post_desc(ring, err)) {
+ rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
+ }
+
+ return err;
+}
+
+int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
+ uint16_t vlan_id)
+{
+ DescRing *ring = r->rings[ROCKER_RING_EVENT];
+ DescInfo *info;
+ FpPort *fp_port;
+ uint32_t port;
+ RockerTlv *nest;
+ char *buf;
+ size_t tlv_size;
+ int pos;
+ int err;
+
+ if (!fp_port_from_pport(pport, &port)) {
+ return -ROCKER_EINVAL;
+ }
+ fp_port = r->fp_port[port];
+ if (!fp_port_get_learning(fp_port)) {
+ return ROCKER_OK;
+ }
+
+ info = desc_ring_fetch_desc(ring);
+ if (!info) {
+ return -ROCKER_ENOBUFS;
+ }
+
+ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */
+ rocker_tlv_total_size(0) + /* nest */
+ rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */
+ rocker_tlv_total_size(ETH_ALEN) + /* mac addr */
+ rocker_tlv_total_size(sizeof(uint16_t)); /* vlan_id */
+
+ if (tlv_size > desc_buf_size(info)) {
+ err = -ROCKER_EMSGSIZE;
+ goto err_too_big;
+ }
+
+ buf = desc_get_buf(info, false);
+ if (!buf) {
+ err = -ROCKER_ENOMEM;
+ goto err_no_mem;
+ }
+
+ pos = 0;
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
+ ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
+ nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
+ rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
+ rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
+ rocker_tlv_nest_end(buf, &pos, nest);
+
+ err = desc_set_buf(info, tlv_size);
+
+err_too_big:
+err_no_mem:
+ if (desc_ring_post_desc(ring, err)) {
+ rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
+ }
+
+ return err;
+}
+
+static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
+ uint32_t pport)
+{
+ return r->rings[(pport - 1) * 2 + 3];
+}
+
+int rx_produce(World *world, uint32_t pport,
+ const struct iovec *iov, int iovcnt)
+{
+ Rocker *r = world_rocker(world);
+ PCIDevice *dev = (PCIDevice *)r;
+ DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
+ DescInfo *info = desc_ring_fetch_desc(ring);
+ char *data;
+ size_t data_size = iov_size(iov, iovcnt);
+ char *buf;
+ uint16_t rx_flags = 0;
+ uint16_t rx_csum = 0;
+ size_t tlv_size;
+ RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
+ hwaddr frag_addr;
+ uint16_t frag_max_len;
+ int pos;
+ int err;
+
+ if (!info) {
+ return -ROCKER_ENOBUFS;
+ }
+
+ buf = desc_get_buf(info, false);
+ if (!buf) {
+ err = -ROCKER_ENXIO;
+ goto out;
+ }
+ rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
+
+ if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
+ !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
+ err = -ROCKER_EINVAL;
+ goto out;
+ }
+
+ frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
+ frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
+
+ if (data_size > frag_max_len) {
+ err = -ROCKER_EMSGSIZE;
+ goto out;
+ }
+
+ /* XXX calc rx flags/csum */
+
+ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
+ rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
+ rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
+ rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
+ rocker_tlv_total_size(sizeof(uint16_t)); /* frag len */
+
+ if (tlv_size > desc_buf_size(info)) {
+ err = -ROCKER_EMSGSIZE;
+ goto out;
+ }
+
+ /* TODO:
+ * iov dma write can be optimized in similar way e1000 does it in
+ * e1000_receive_iov. But maybe if would make sense to introduce
+ * generic helper iov_dma_write.
+ */
+
+ data = g_malloc(data_size);
+ if (!data) {
+ err = -ROCKER_ENOMEM;
+ goto out;
+ }
+ iov_to_buf(iov, iovcnt, 0, data, data_size);
+ pci_dma_write(dev, frag_addr, data, data_size);
+ g_free(data);
+
+ pos = 0;
+ rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
+ rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
+ rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
+ rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
+ rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
+
+ err = desc_set_buf(info, tlv_size);
+
+out:
+ if (desc_ring_post_desc(ring, err)) {
+ rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
+ }
+
+ return err;
+}
+
+int rocker_port_eg(Rocker *r, uint32_t pport,
+ const struct iovec *iov, int iovcnt)
+{
+ FpPort *fp_port;
+ uint32_t port;
+
+ if (!fp_port_from_pport(pport, &port)) {
+ return -ROCKER_EINVAL;
+ }
+
+ fp_port = r->fp_port[port];
+
+ return fp_port_eg(fp_port, iov, iovcnt);
+}
+
+static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+ char *buf;
+ int i;
+
+ buf = g_malloc(r->test_dma_size);
+
+ if (!buf) {
+ DPRINTF("test dma buffer alloc failed");
+ return;
+ }
+
+ switch (val) {
+ case ROCKER_TEST_DMA_CTRL_CLEAR:
+ memset(buf, 0, r->test_dma_size);
+ break;
+ case ROCKER_TEST_DMA_CTRL_FILL:
+ memset(buf, 0x96, r->test_dma_size);
+ break;
+ case ROCKER_TEST_DMA_CTRL_INVERT:
+ pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
+ for (i = 0; i < r->test_dma_size; i++) {
+ buf[i] = ~buf[i];
+ }
+ break;
+ default:
+ DPRINTF("not test dma control val=0x%08x\n", val);
+ goto err_out;
+ }
+ pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
+
+ rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
+
+err_out:
+ g_free(buf);
+}
+
+static void rocker_reset(DeviceState *dev);
+
+static void rocker_control(Rocker *r, uint32_t val)
+{
+ if (val & ROCKER_CONTROL_RESET) {
+ rocker_reset(DEVICE(r));
+ }
+}
+
+static int rocker_pci_ring_count(Rocker *r)
+{
+ /* There are:
+ * - command ring
+ * - event ring
+ * - tx and rx ring per each port
+ */
+ return 2 + (2 * r->fp_ports);
+}
+
+static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
+{
+ hwaddr start = ROCKER_DMA_DESC_BASE;
+ hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
+
+ return addr >= start && addr < end;
+}
+
+static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
+{
+ int i;
+ bool old_enabled;
+ bool new_enabled;
+ FpPort *fp_port;
+
+ for (i = 0; i < r->fp_ports; i++) {
+ fp_port = r->fp_port[i];
+ old_enabled = fp_port_enabled(fp_port);
+ new_enabled = (new >> (i + 1)) & 0x1;
+ if (new_enabled == old_enabled) {
+ continue;
+ }
+ if (new_enabled) {
+ fp_port_enable(r->fp_port[i]);
+ } else {
+ fp_port_disable(r->fp_port[i]);
+ }
+ }
+}
+
+static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
+{
+ Rocker *r = opaque;
+
+ if (rocker_addr_is_desc_reg(r, addr)) {
+ unsigned index = ROCKER_RING_INDEX(addr);
+ unsigned offset = addr & ROCKER_DMA_DESC_MASK;
+
+ switch (offset) {
+ case ROCKER_DMA_DESC_ADDR_OFFSET:
+ r->lower32 = (uint64_t)val;
+ break;
+ case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
+ desc_ring_set_base_addr(r->rings[index],
+ ((uint64_t)val) << 32 | r->lower32);
+ r->lower32 = 0;
+ break;
+ case ROCKER_DMA_DESC_SIZE_OFFSET:
+ desc_ring_set_size(r->rings[index], val);
+ break;
+ case ROCKER_DMA_DESC_HEAD_OFFSET:
+ if (desc_ring_set_head(r->rings[index], val)) {
+ rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
+ }
+ break;
+ case ROCKER_DMA_DESC_CTRL_OFFSET:
+ desc_ring_set_ctrl(r->rings[index], val);
+ break;
+ case ROCKER_DMA_DESC_CREDITS_OFFSET:
+ if (desc_ring_ret_credits(r->rings[index], val)) {
+ rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
+ }
+ break;
+ default:
+ DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
+ " val=0x%08x (ring %d, addr=0x%02x)\n",
+ addr, val, index, offset);
+ break;
+ }
+ return;
+ }
+
+ switch (addr) {
+ case ROCKER_TEST_REG:
+ r->test_reg = val;
+ break;
+ case ROCKER_TEST_REG64:
+ case ROCKER_TEST_DMA_ADDR:
+ case ROCKER_PORT_PHYS_ENABLE:
+ r->lower32 = (uint64_t)val;
+ break;
+ case ROCKER_TEST_REG64 + 4:
+ r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
+ r->lower32 = 0;
+ break;
+ case ROCKER_TEST_IRQ:
+ rocker_msix_irq(r, val);
+ break;
+ case ROCKER_TEST_DMA_SIZE:
+ r->test_dma_size = val;
+ break;
+ case ROCKER_TEST_DMA_ADDR + 4:
+ r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
+ r->lower32 = 0;
+ break;
+ case ROCKER_TEST_DMA_CTRL:
+ rocker_test_dma_ctrl(r, val);
+ break;
+ case ROCKER_CONTROL:
+ rocker_control(r, val);
+ break;
+ case ROCKER_PORT_PHYS_ENABLE + 4:
+ rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
+ r->lower32 = 0;
+ break;
+ default:
+ DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
+ " val=0x%08x\n", addr, val);
+ break;
+ }
+}
+
+static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
+{
+ Rocker *r = opaque;
+
+ if (rocker_addr_is_desc_reg(r, addr)) {
+ unsigned index = ROCKER_RING_INDEX(addr);
+ unsigned offset = addr & ROCKER_DMA_DESC_MASK;
+
+ switch (offset) {
+ case ROCKER_DMA_DESC_ADDR_OFFSET:
+ desc_ring_set_base_addr(r->rings[index], val);
+ break;
+ default:
+ DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
+ " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
+ addr, val, index, offset);
+ break;
+ }
+ return;
+ }
+
+ switch (addr) {
+ case ROCKER_TEST_REG64:
+ r->test_reg64 = val;
+ break;
+ case ROCKER_TEST_DMA_ADDR:
+ r->test_dma_addr = val;
+ break;
+ case ROCKER_PORT_PHYS_ENABLE:
+ rocker_port_phys_enable_write(r, val);
+ break;
+ default:
+ DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
+ " val=0x" TARGET_FMT_plx "\n", addr, val);
+ break;
+ }
+}
+
+#ifdef DEBUG_ROCKER
+#define regname(reg) case (reg): return #reg
+static const char *rocker_reg_name(void *opaque, hwaddr addr)
+{
+ Rocker *r = opaque;
+
+ if (rocker_addr_is_desc_reg(r, addr)) {
+ unsigned index = ROCKER_RING_INDEX(addr);
+ unsigned offset = addr & ROCKER_DMA_DESC_MASK;
+ static char buf[100];
+ char ring_name[10];
+
+ switch (index) {
+ case 0:
+ sprintf(ring_name, "cmd");
+ break;
+ case 1:
+ sprintf(ring_name, "event");
+ break;
+ default:
+ sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
+ (index - 2) / 2);
+ }
+
+ switch (offset) {
+ case ROCKER_DMA_DESC_ADDR_OFFSET:
+ sprintf(buf, "Ring[%s] ADDR", ring_name);
+ return buf;
+ case ROCKER_DMA_DESC_ADDR_OFFSET+4:
+ sprintf(buf, "Ring[%s] ADDR+4", ring_name);
+ return buf;
+ case ROCKER_DMA_DESC_SIZE_OFFSET:
+ sprintf(buf, "Ring[%s] SIZE", ring_name);
+ return buf;
+ case ROCKER_DMA_DESC_HEAD_OFFSET:
+ sprintf(buf, "Ring[%s] HEAD", ring_name);
+ return buf;
+ case ROCKER_DMA_DESC_TAIL_OFFSET:
+ sprintf(buf, "Ring[%s] TAIL", ring_name);
+ return buf;
+ case ROCKER_DMA_DESC_CTRL_OFFSET:
+ sprintf(buf, "Ring[%s] CTRL", ring_name);
+ return buf;
+ case ROCKER_DMA_DESC_CREDITS_OFFSET:
+ sprintf(buf, "Ring[%s] CREDITS", ring_name);
+ return buf;
+ default:
+ sprintf(buf, "Ring[%s] ???", ring_name);
+ return buf;
+ }
+ } else {
+ switch (addr) {
+ regname(ROCKER_BOGUS_REG0);
+ regname(ROCKER_BOGUS_REG1);
+ regname(ROCKER_BOGUS_REG2);
+ regname(ROCKER_BOGUS_REG3);
+ regname(ROCKER_TEST_REG);
+ regname(ROCKER_TEST_REG64);
+ regname(ROCKER_TEST_REG64+4);
+ regname(ROCKER_TEST_IRQ);
+ regname(ROCKER_TEST_DMA_ADDR);
+ regname(ROCKER_TEST_DMA_ADDR+4);
+ regname(ROCKER_TEST_DMA_SIZE);
+ regname(ROCKER_TEST_DMA_CTRL);
+ regname(ROCKER_CONTROL);
+ regname(ROCKER_PORT_PHYS_COUNT);
+ regname(ROCKER_PORT_PHYS_LINK_STATUS);
+ regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
+ regname(ROCKER_PORT_PHYS_ENABLE);
+ regname(ROCKER_PORT_PHYS_ENABLE+4);
+ regname(ROCKER_SWITCH_ID);
+ regname(ROCKER_SWITCH_ID+4);
+ }
+ }
+ return "???";
+}
+#else
+static const char *rocker_reg_name(void *opaque, hwaddr addr)
+{
+ return NULL;
+}
+#endif
+
+static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size)
+{
+ DPRINTF("Write %s addr " TARGET_FMT_plx
+ ", size %u, val " TARGET_FMT_plx "\n",
+ rocker_reg_name(opaque, addr), addr, size, val);
+
+ switch (size) {
+ case 4:
+ rocker_io_writel(opaque, addr, val);
+ break;
+ case 8:
+ rocker_io_writeq(opaque, addr, val);
+ break;
+ }
+}
+
+static uint64_t rocker_port_phys_link_status(Rocker *r)
+{
+ int i;
+ uint64_t status = 0;
+
+ for (i = 0; i < r->fp_ports; i++) {
+ FpPort *port = r->fp_port[i];
+
+ if (fp_port_get_link_up(port)) {
+ status |= 1 << (i + 1);
+ }
+ }
+ return status;
+}
+
+static uint64_t rocker_port_phys_enable_read(Rocker *r)
+{
+ int i;
+ uint64_t ret = 0;
+
+ for (i = 0; i < r->fp_ports; i++) {
+ FpPort *port = r->fp_port[i];
+
+ if (fp_port_enabled(port)) {
+ ret |= 1 << (i + 1);
+ }
+ }
+ return ret;
+}
+
+static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
+{
+ Rocker *r = opaque;
+ uint32_t ret;
+
+ if (rocker_addr_is_desc_reg(r, addr)) {
+ unsigned index = ROCKER_RING_INDEX(addr);
+ unsigned offset = addr & ROCKER_DMA_DESC_MASK;
+
+ switch (offset) {
+ case ROCKER_DMA_DESC_ADDR_OFFSET:
+ ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
+ break;
+ case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
+ ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
+ break;
+ case ROCKER_DMA_DESC_SIZE_OFFSET:
+ ret = desc_ring_get_size(r->rings[index]);
+ break;
+ case ROCKER_DMA_DESC_HEAD_OFFSET:
+ ret = desc_ring_get_head(r->rings[index]);
+ break;
+ case ROCKER_DMA_DESC_TAIL_OFFSET:
+ ret = desc_ring_get_tail(r->rings[index]);
+ break;
+ case ROCKER_DMA_DESC_CREDITS_OFFSET:
+ ret = desc_ring_get_credits(r->rings[index]);
+ break;
+ default:
+ DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
+ " (ring %d, addr=0x%02x)\n", addr, index, offset);
+ ret = 0;
+ break;
+ }
+ return ret;
+ }
+
+ switch (addr) {
+ case ROCKER_BOGUS_REG0:
+ case ROCKER_BOGUS_REG1:
+ case ROCKER_BOGUS_REG2:
+ case ROCKER_BOGUS_REG3:
+ ret = 0xDEADBABE;
+ break;
+ case ROCKER_TEST_REG:
+ ret = r->test_reg * 2;
+ break;
+ case ROCKER_TEST_REG64:
+ ret = (uint32_t)(r->test_reg64 * 2);
+ break;
+ case ROCKER_TEST_REG64 + 4:
+ ret = (uint32_t)((r->test_reg64 * 2) >> 32);
+ break;
+ case ROCKER_TEST_DMA_SIZE:
+ ret = r->test_dma_size;
+ break;
+ case ROCKER_TEST_DMA_ADDR:
+ ret = (uint32_t)r->test_dma_addr;
+ break;
+ case ROCKER_TEST_DMA_ADDR + 4:
+ ret = (uint32_t)(r->test_dma_addr >> 32);
+ break;
+ case ROCKER_PORT_PHYS_COUNT:
+ ret = r->fp_ports;
+ break;
+ case ROCKER_PORT_PHYS_LINK_STATUS:
+ ret = (uint32_t)rocker_port_phys_link_status(r);
+ break;
+ case ROCKER_PORT_PHYS_LINK_STATUS + 4:
+ ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
+ break;
+ case ROCKER_PORT_PHYS_ENABLE:
+ ret = (uint32_t)rocker_port_phys_enable_read(r);
+ break;
+ case ROCKER_PORT_PHYS_ENABLE + 4:
+ ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
+ break;
+ case ROCKER_SWITCH_ID:
+ ret = (uint32_t)r->switch_id;
+ break;
+ case ROCKER_SWITCH_ID + 4:
+ ret = (uint32_t)(r->switch_id >> 32);
+ break;
+ default:
+ DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+
+static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
+{
+ Rocker *r = opaque;
+ uint64_t ret;
+
+ if (rocker_addr_is_desc_reg(r, addr)) {
+ unsigned index = ROCKER_RING_INDEX(addr);
+ unsigned offset = addr & ROCKER_DMA_DESC_MASK;
+
+ switch (addr & ROCKER_DMA_DESC_MASK) {
+ case ROCKER_DMA_DESC_ADDR_OFFSET:
+ ret = desc_ring_get_base_addr(r->rings[index]);
+ break;
+ default:
+ DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
+ " (ring %d, addr=0x%02x)\n", addr, index, offset);
+ ret = 0;
+ break;
+ }
+ return ret;
+ }
+
+ switch (addr) {
+ case ROCKER_BOGUS_REG0:
+ case ROCKER_BOGUS_REG2:
+ ret = 0xDEADBABEDEADBABEULL;
+ break;
+ case ROCKER_TEST_REG64:
+ ret = r->test_reg64 * 2;
+ break;
+ case ROCKER_TEST_DMA_ADDR:
+ ret = r->test_dma_addr;
+ break;
+ case ROCKER_PORT_PHYS_LINK_STATUS:
+ ret = rocker_port_phys_link_status(r);
+ break;
+ case ROCKER_PORT_PHYS_ENABLE:
+ ret = rocker_port_phys_enable_read(r);
+ break;
+ case ROCKER_SWITCH_ID:
+ ret = r->switch_id;
+ break;
+ default:
+ DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+
+static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
+ rocker_reg_name(opaque, addr), addr, size);
+
+ switch (size) {
+ case 4:
+ return rocker_io_readl(opaque, addr);
+ case 8:
+ return rocker_io_readq(opaque, addr);
+ }
+
+ return -1;
+}
+
+static const MemoryRegionOps rocker_mmio_ops = {
+ .read = rocker_mmio_read,
+ .write = rocker_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+};
+
+static void rocker_msix_vectors_unuse(Rocker *r,
+ unsigned int num_vectors)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+ int i;
+
+ for (i = 0; i < num_vectors; i++) {
+ msix_vector_unuse(dev, i);
+ }
+}
+
+static int rocker_msix_vectors_use(Rocker *r,
+ unsigned int num_vectors)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+ int err;
+ int i;
+
+ for (i = 0; i < num_vectors; i++) {
+ err = msix_vector_use(dev, i);
+ if (err) {
+ goto rollback;
+ }
+ }
+ return 0;
+
+rollback:
+ rocker_msix_vectors_unuse(r, i);
+ return err;
+}
+
+static int rocker_msix_init(Rocker *r)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+ int err;
+
+ err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
+ &r->msix_bar,
+ ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
+ &r->msix_bar,
+ ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
+ 0);
+ if (err) {
+ return err;
+ }
+
+ err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
+ if (err) {
+ goto err_msix_vectors_use;
+ }
+
+ return 0;
+
+err_msix_vectors_use:
+ msix_uninit(dev, &r->msix_bar, &r->msix_bar);
+ return err;
+}
+
+static void rocker_msix_uninit(Rocker *r)
+{
+ PCIDevice *dev = PCI_DEVICE(r);
+
+ msix_uninit(dev, &r->msix_bar, &r->msix_bar);
+ rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
+}
+
+static int pci_rocker_init(PCIDevice *dev)
+{
+ Rocker *r = to_rocker(dev);
+ const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
+ const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
+ static int sw_index;
+ int i, err = 0;
+
+ /* allocate worlds */
+
+ r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
+ r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
+
+ for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
+ if (!r->worlds[i]) {
+ goto err_world_alloc;
+ }
+ }
+
+ /* set up memory-mapped region at BAR0 */
+
+ memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
+ "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
+ pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
+
+ /* set up memory-mapped region for MSI-X */
+
+ memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
+ ROCKER_PCI_MSIX_BAR_SIZE);
+ pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
+
+ /* MSI-X init */
+
+ err = rocker_msix_init(r);
+ if (err) {
+ goto err_msix_init;
+ }
+
+ /* validate switch properties */
+
+ if (!r->name) {
+ r->name = g_strdup(ROCKER);
+ }
+
+ if (rocker_find(r->name)) {
+ err = -EEXIST;
+ goto err_duplicate;
+ }
+
+ if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
+ memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
+ r->fp_start_macaddr.a[4] += (sw_index++);
+ }
+
+ if (!r->switch_id) {
+ memcpy(&r->switch_id, &r->fp_start_macaddr,
+ sizeof(r->fp_start_macaddr));
+ }
+
+ if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
+ r->fp_ports = ROCKER_FP_PORTS_MAX;
+ }
+
+ r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r));
+ if (!r->rings) {
+ goto err_rings_alloc;
+ }
+
+ /* Rings are ordered like this:
+ * - command ring
+ * - event ring
+ * - port0 tx ring
+ * - port0 rx ring
+ * - port1 tx ring
+ * - port1 rx ring
+ * .....
+ */
+
+ err = -ENOMEM;
+ for (i = 0; i < rocker_pci_ring_count(r); i++) {
+ DescRing *ring = desc_ring_alloc(r, i);
+
+ if (!ring) {
+ goto err_ring_alloc;
+ }
+
+ if (i == ROCKER_RING_CMD) {
+ desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
+ } else if (i == ROCKER_RING_EVENT) {
+ desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
+ } else if (i % 2 == 0) {
+ desc_ring_set_consume(ring, tx_consume,
+ ROCKER_MSIX_VEC_TX((i - 2) / 2));
+ } else if (i % 2 == 1) {
+ desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
+ }
+
+ r->rings[i] = ring;
+ }
+
+ for (i = 0; i < r->fp_ports; i++) {
+ FpPort *port =
+ fp_port_alloc(r, r->name, &r->fp_start_macaddr,
+ i, &r->fp_ports_peers[i]);
+
+ if (!port) {
+ goto err_port_alloc;
+ }
+
+ r->fp_port[i] = port;
+ fp_port_set_world(port, r->world_dflt);
+ }
+
+ QLIST_INSERT_HEAD(&rockers, r, next);
+
+ return 0;
+
+err_port_alloc:
+ for (--i; i >= 0; i--) {
+ FpPort *port = r->fp_port[i];
+ fp_port_free(port);
+ }
+ i = rocker_pci_ring_count(r);
+err_ring_alloc:
+ for (--i; i >= 0; i--) {
+ desc_ring_free(r->rings[i]);
+ }
+ g_free(r->rings);
+err_rings_alloc:
+err_duplicate:
+ rocker_msix_uninit(r);
+err_msix_init:
+ object_unparent(OBJECT(&r->msix_bar));
+ object_unparent(OBJECT(&r->mmio));
+err_world_alloc:
+ for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
+ if (r->worlds[i]) {
+ world_free(r->worlds[i]);
+ }
+ }
+ return err;
+}
+
+static void pci_rocker_uninit(PCIDevice *dev)
+{
+ Rocker *r = to_rocker(dev);
+ int i;
+
+ QLIST_REMOVE(r, next);
+
+ for (i = 0; i < r->fp_ports; i++) {
+ FpPort *port = r->fp_port[i];
+
+ fp_port_free(port);
+ r->fp_port[i] = NULL;
+ }
+
+ for (i = 0; i < rocker_pci_ring_count(r); i++) {
+ if (r->rings[i]) {
+ desc_ring_free(r->rings[i]);
+ }
+ }
+ g_free(r->rings);
+
+ rocker_msix_uninit(r);
+ object_unparent(OBJECT(&r->msix_bar));
+ object_unparent(OBJECT(&r->mmio));
+
+ for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
+ if (r->worlds[i]) {
+ world_free(r->worlds[i]);
+ }
+ }
+ g_free(r->fp_ports_peers);
+}
+
+static void rocker_reset(DeviceState *dev)
+{
+ Rocker *r = to_rocker(dev);
+ int i;
+
+ for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
+ if (r->worlds[i]) {
+ world_reset(r->worlds[i]);
+ }
+ }
+ for (i = 0; i < r->fp_ports; i++) {
+ fp_port_reset(r->fp_port[i]);
+ fp_port_set_world(r->fp_port[i], r->world_dflt);
+ }
+
+ r->test_reg = 0;
+ r->test_reg64 = 0;
+ r->test_dma_addr = 0;
+ r->test_dma_size = 0;
+
+ for (i = 0; i < rocker_pci_ring_count(r); i++) {
+ desc_ring_reset(r->rings[i]);
+ }
+
+ DPRINTF("Reset done\n");
+}
+
+static Property rocker_properties[] = {
+ DEFINE_PROP_STRING("name", Rocker, name),
+ DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
+ fp_start_macaddr),
+ DEFINE_PROP_UINT64("switch_id", Rocker,
+ switch_id, 0),
+ DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
+ fp_ports_peers, qdev_prop_netdev, NICPeers),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription rocker_vmsd = {
+ .name = ROCKER,
+ .unmigratable = 1,
+};
+
+static void rocker_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+ k->init = pci_rocker_init;
+ k->exit = pci_rocker_uninit;
+ k->vendor_id = PCI_VENDOR_ID_REDHAT;
+ k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
+ k->revision = ROCKER_PCI_REVISION;
+ k->class_id = PCI_CLASS_NETWORK_OTHER;
+ set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+ dc->desc = "Rocker Switch";
+ dc->reset = rocker_reset;
+ dc->props = rocker_properties;
+ dc->vmsd = &rocker_vmsd;
+}
+
+static const TypeInfo rocker_info = {
+ .name = ROCKER,
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(Rocker),
+ .class_init = rocker_class_init,
+};
+
+static void rocker_register_types(void)
+{
+ type_register_static(&rocker_info);
+}
+
+type_init(rocker_register_types)
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
new file mode 100644
index 0000000000..b3310b61eb
--- /dev/null
+++ b/hw/net/rocker/rocker.h
@@ -0,0 +1,84 @@
+/*
+ * QEMU rocker switch emulation
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Neil Horman <nhorman@tuxdriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ROCKER_H_
+#define _ROCKER_H_
+
+#include "qemu/sockets.h"
+
+#if defined(DEBUG_ROCKER)
+# define DPRINTF(fmt, ...) \
+ do { \
+ struct timeval tv; \
+ char timestr[64]; \
+ time_t now; \
+ gettimeofday(&tv, NULL); \
+ now = tv.tv_sec; \
+ strftime(timestr, sizeof(timestr), "%T", localtime(&now)); \
+ fprintf(stderr, "%s.%06ld ", timestr, tv.tv_usec); \
+ fprintf(stderr, "ROCKER: " fmt, ## __VA_ARGS__); \
+ } while (0)
+#else
+static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...)
+{
+ return 0;
+}
+#endif
+
+#define __le16 uint16_t
+#define __le32 uint32_t
+#define __le64 uint64_t
+
+#define __be16 uint16_t
+#define __be32 uint32_t
+#define __be64 uint64_t
+
+static inline bool ipv4_addr_is_multicast(__be32 addr)
+{
+ return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
+}
+
+typedef struct ipv6_addr {
+ union {
+ uint8_t addr8[16];
+ __be16 addr16[8];
+ __be32 addr32[4];
+ };
+} Ipv6Addr;
+
+static inline bool ipv6_addr_is_multicast(const Ipv6Addr *addr)
+{
+ return (addr->addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000);
+}
+
+typedef struct rocker Rocker;
+typedef struct world World;
+typedef struct desc_info DescInfo;
+typedef struct desc_ring DescRing;
+
+Rocker *rocker_find(const char *name);
+uint32_t rocker_fp_ports(Rocker *r);
+int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up);
+int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
+ uint16_t vlan_id);
+int rx_produce(World *world, uint32_t pport,
+ const struct iovec *iov, int iovcnt);
+int rocker_port_eg(Rocker *r, uint32_t pport,
+ const struct iovec *iov, int iovcnt);
+
+#endif /* _ROCKER_H_ */
diff --git a/hw/net/rocker/rocker_desc.c b/hw/net/rocker/rocker_desc.c
new file mode 100644
index 0000000000..9d896fe470
--- /dev/null
+++ b/hw/net/rocker/rocker_desc.c
@@ -0,0 +1,377 @@
+/*
+ * QEMU rocker switch emulation - Descriptor ring support
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "net/net.h"
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+
+#include "rocker.h"
+#include "rocker_hw.h"
+#include "rocker_desc.h"
+
+struct desc_ring {
+ hwaddr base_addr;
+ uint32_t size;
+ uint32_t head;
+ uint32_t tail;
+ uint32_t ctrl;
+ uint32_t credits;
+ Rocker *r;
+ DescInfo *info;
+ int index;
+ desc_ring_consume *consume;
+ unsigned msix_vector;
+};
+
+struct desc_info {
+ DescRing *ring;
+ RockerDesc desc;
+ char *buf;
+ size_t buf_size;
+};
+
+uint16_t desc_buf_size(DescInfo *info)
+{
+ return le16_to_cpu(info->desc.buf_size);
+}
+
+uint16_t desc_tlv_size(DescInfo *info)
+{
+ return le16_to_cpu(info->desc.tlv_size);
+}
+
+char *desc_get_buf(DescInfo *info, bool read_only)
+{
+ PCIDevice *dev = PCI_DEVICE(info->ring->r);
+ size_t size = read_only ? le16_to_cpu(info->desc.tlv_size) :
+ le16_to_cpu(info->desc.buf_size);
+
+ if (size > info->buf_size) {
+ info->buf = g_realloc(info->buf, size);
+ info->buf_size = size;
+ }
+
+ if (!info->buf) {
+ return NULL;
+ }
+
+ if (pci_dma_read(dev, le64_to_cpu(info->desc.buf_addr), info->buf, size)) {
+ return NULL;
+ }
+
+ return info->buf;
+}
+
+int desc_set_buf(DescInfo *info, size_t tlv_size)
+{
+ PCIDevice *dev = PCI_DEVICE(info->ring->r);
+
+ if (tlv_size > info->buf_size) {
+ DPRINTF("ERROR: trying to write more to desc buf than it "
+ "can hold buf_size %zu tlv_size %zu\n",
+ info->buf_size, tlv_size);
+ return -ROCKER_EMSGSIZE;
+ }
+
+ info->desc.tlv_size = cpu_to_le16(tlv_size);
+ pci_dma_write(dev, le64_to_cpu(info->desc.buf_addr), info->buf, tlv_size);
+
+ return ROCKER_OK;
+}
+
+DescRing *desc_get_ring(DescInfo *info)
+{
+ return info->ring;
+}
+
+int desc_ring_index(DescRing *ring)
+{
+ return ring->index;
+}
+
+static bool desc_ring_empty(DescRing *ring)
+{
+ return ring->head == ring->tail;
+}
+
+bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr)
+{
+ if (base_addr & 0x7) {
+ DPRINTF("ERROR: ring[%d] desc base addr (0x" TARGET_FMT_plx
+ ") not 8-byte aligned\n", ring->index, base_addr);
+ return false;
+ }
+
+ ring->base_addr = base_addr;
+
+ return true;
+}
+
+uint64_t desc_ring_get_base_addr(DescRing *ring)
+{
+ return ring->base_addr;
+}
+
+bool desc_ring_set_size(DescRing *ring, uint32_t size)
+{
+ int i;
+
+ if (size < 2 || size > 0x10000 || (size & (size - 1))) {
+ DPRINTF("ERROR: ring[%d] size (%d) not a power of 2 "
+ "or in range [2, 64K]\n", ring->index, size);
+ return false;
+ }
+
+ for (i = 0; i < ring->size; i++) {
+ if (ring->info[i].buf) {
+ g_free(ring->info[i].buf);
+ }
+ }
+
+ ring->size = size;
+ ring->head = ring->tail = 0;
+
+ ring->info = g_realloc(ring->info, size * sizeof(DescInfo));
+ if (!ring->info) {
+ return false;
+ }
+
+ memset(ring->info, 0, size * sizeof(DescInfo));
+
+ for (i = 0; i < size; i++) {
+ ring->info[i].ring = ring;
+ }
+
+ return true;
+}
+
+uint32_t desc_ring_get_size(DescRing *ring)
+{
+ return ring->size;
+}
+
+static DescInfo *desc_read(DescRing *ring, uint32_t index)
+{
+ PCIDevice *dev = PCI_DEVICE(ring->r);
+ DescInfo *info = &ring->info[index];
+ hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index);
+
+ pci_dma_read(dev, addr, &info->desc, sizeof(info->desc));
+
+ return info;
+}
+
+static void desc_write(DescRing *ring, uint32_t index)
+{
+ PCIDevice *dev = PCI_DEVICE(ring->r);
+ DescInfo *info = &ring->info[index];
+ hwaddr addr = ring->base_addr + (sizeof(RockerDesc) * index);
+
+ pci_dma_write(dev, addr, &info->desc, sizeof(info->desc));
+}
+
+static bool desc_ring_base_addr_check(DescRing *ring)
+{
+ if (!ring->base_addr) {
+ DPRINTF("ERROR: ring[%d] not-initialized desc base address!\n",
+ ring->index);
+ return false;
+ }
+ return true;
+}
+
+static DescInfo *__desc_ring_fetch_desc(DescRing *ring)
+{
+ return desc_read(ring, ring->tail);
+}
+
+DescInfo *desc_ring_fetch_desc(DescRing *ring)
+{
+ if (desc_ring_empty(ring) || !desc_ring_base_addr_check(ring)) {
+ return NULL;
+ }
+
+ return desc_read(ring, ring->tail);
+}
+
+static bool __desc_ring_post_desc(DescRing *ring, int err)
+{
+ uint16_t comp_err = 0x8000 | (uint16_t)-err;
+ DescInfo *info = &ring->info[ring->tail];
+
+ info->desc.comp_err = cpu_to_le16(comp_err);
+ desc_write(ring, ring->tail);
+ ring->tail = (ring->tail + 1) % ring->size;
+
+ /* return true if starting credit count */
+
+ return ring->credits++ == 0;
+}
+
+bool desc_ring_post_desc(DescRing *ring, int err)
+{
+ if (desc_ring_empty(ring)) {
+ DPRINTF("ERROR: ring[%d] trying to post desc to empty ring\n",
+ ring->index);
+ return false;
+ }
+
+ if (!desc_ring_base_addr_check(ring)) {
+ return false;
+ }
+
+ return __desc_ring_post_desc(ring, err);
+}
+
+static bool ring_pump(DescRing *ring)
+{
+ DescInfo *info;
+ bool primed = false;
+ int err;
+
+ /* If the ring has a consumer, call consumer for each
+ * desc starting at tail and stopping when tail reaches
+ * head (the empty ring condition).
+ */
+
+ if (ring->consume) {
+ while (ring->head != ring->tail) {
+ info = __desc_ring_fetch_desc(ring);
+ err = ring->consume(ring->r, info);
+ if (__desc_ring_post_desc(ring, err)) {
+ primed = true;
+ }
+ }
+ }
+
+ return primed;
+}
+
+bool desc_ring_set_head(DescRing *ring, uint32_t new)
+{
+ uint32_t tail = ring->tail;
+ uint32_t head = ring->head;
+
+ if (!desc_ring_base_addr_check(ring)) {
+ return false;
+ }
+
+ if (new >= ring->size) {
+ DPRINTF("ERROR: trying to set head (%d) past ring[%d] size (%d)\n",
+ new, ring->index, ring->size);
+ return false;
+ }
+
+ if (((head < tail) && ((new >= tail) || (new < head))) ||
+ ((head > tail) && ((new >= tail) && (new < head)))) {
+ DPRINTF("ERROR: trying to wrap ring[%d] "
+ "(head %d, tail %d, new head %d)\n",
+ ring->index, head, tail, new);
+ return false;
+ }
+
+ if (new == ring->head) {
+ DPRINTF("WARNING: setting head (%d) to current head position\n", new);
+ }
+
+ ring->head = new;
+
+ return ring_pump(ring);
+}
+
+uint32_t desc_ring_get_head(DescRing *ring)
+{
+ return ring->head;
+}
+
+uint32_t desc_ring_get_tail(DescRing *ring)
+{
+ return ring->tail;
+}
+
+void desc_ring_set_ctrl(DescRing *ring, uint32_t val)
+{
+ if (val & ROCKER_DMA_DESC_CTRL_RESET) {
+ DPRINTF("ring[%d] resetting\n", ring->index);
+ desc_ring_reset(ring);
+ }
+}
+
+bool desc_ring_ret_credits(DescRing *ring, uint32_t credits)
+{
+ if (credits > ring->credits) {
+ DPRINTF("ERROR: trying to return more credits (%d) "
+ "than are outstanding (%d)\n", credits, ring->credits);
+ ring->credits = 0;
+ return false;
+ }
+
+ ring->credits -= credits;
+
+ /* return true if credits are still outstanding */
+
+ return ring->credits > 0;
+}
+
+uint32_t desc_ring_get_credits(DescRing *ring)
+{
+ return ring->credits;
+}
+
+void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume,
+ unsigned vector)
+{
+ ring->consume = consume;
+ ring->msix_vector = vector;
+}
+
+unsigned desc_ring_get_msix_vector(DescRing *ring)
+{
+ return ring->msix_vector;
+}
+
+DescRing *desc_ring_alloc(Rocker *r, int index)
+{
+ DescRing *ring;
+
+ ring = g_malloc0(sizeof(DescRing));
+ if (!ring) {
+ return NULL;
+ }
+
+ ring->r = r;
+ ring->index = index;
+
+ return ring;
+}
+
+void desc_ring_free(DescRing *ring)
+{
+ if (ring->info) {
+ g_free(ring->info);
+ }
+ g_free(ring);
+}
+
+void desc_ring_reset(DescRing *ring)
+{
+ ring->base_addr = 0;
+ ring->size = 0;
+ ring->head = 0;
+ ring->tail = 0;
+ ring->ctrl = 0;
+ ring->credits = 0;
+}
diff --git a/hw/net/rocker/rocker_desc.h b/hw/net/rocker/rocker_desc.h
new file mode 100644
index 0000000000..d4041f5c4c
--- /dev/null
+++ b/hw/net/rocker/rocker_desc.h
@@ -0,0 +1,53 @@
+/*
+ * QEMU rocker switch emulation - Descriptor ring support
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#ifndef _ROCKER_DESC_H_
+#define _ROCKER_DESC_H_
+
+#include "rocker_hw.h"
+
+typedef int (desc_ring_consume)(Rocker *r, DescInfo *info);
+
+uint16_t desc_buf_size(DescInfo *info);
+uint16_t desc_tlv_size(DescInfo *info);
+char *desc_get_buf(DescInfo *info, bool read_only);
+int desc_set_buf(DescInfo *info, size_t tlv_size);
+DescRing *desc_get_ring(DescInfo *info);
+
+int desc_ring_index(DescRing *ring);
+bool desc_ring_set_base_addr(DescRing *ring, uint64_t base_addr);
+uint64_t desc_ring_get_base_addr(DescRing *ring);
+bool desc_ring_set_size(DescRing *ring, uint32_t size);
+uint32_t desc_ring_get_size(DescRing *ring);
+bool desc_ring_set_head(DescRing *ring, uint32_t new);
+uint32_t desc_ring_get_head(DescRing *ring);
+uint32_t desc_ring_get_tail(DescRing *ring);
+void desc_ring_set_ctrl(DescRing *ring, uint32_t val);
+bool desc_ring_ret_credits(DescRing *ring, uint32_t credits);
+uint32_t desc_ring_get_credits(DescRing *ring);
+
+DescInfo *desc_ring_fetch_desc(DescRing *ring);
+bool desc_ring_post_desc(DescRing *ring, int status);
+
+void desc_ring_set_consume(DescRing *ring, desc_ring_consume *consume,
+ unsigned vector);
+unsigned desc_ring_get_msix_vector(DescRing *ring);
+DescRing *desc_ring_alloc(Rocker *r, int index);
+void desc_ring_free(DescRing *ring);
+void desc_ring_reset(DescRing *ring);
+
+#endif
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
new file mode 100644
index 0000000000..2f1e3b348a
--- /dev/null
+++ b/hw/net/rocker/rocker_fp.c
@@ -0,0 +1,234 @@
+/*
+ * QEMU rocker switch emulation - front-panel ports
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "net/clients.h"
+
+#include "rocker.h"
+#include "rocker_hw.h"
+#include "rocker_fp.h"
+#include "rocker_world.h"
+
+enum duplex {
+ DUPLEX_HALF = 0,
+ DUPLEX_FULL
+};
+
+struct fp_port {
+ Rocker *r;
+ World *world;
+ unsigned int index;
+ char *name;
+ uint32_t pport;
+ bool enabled;
+ uint32_t speed;
+ uint8_t duplex;
+ uint8_t autoneg;
+ uint8_t learning;
+ NICState *nic;
+ NICConf conf;
+};
+
+bool fp_port_get_link_up(FpPort *port)
+{
+ return !qemu_get_queue(port->nic)->link_down;
+}
+
+void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr)
+{
+ memcpy(macaddr->a, port->conf.macaddr.a, sizeof(macaddr->a));
+}
+
+void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr)
+{
+/* XXX TODO implement and test setting mac addr
+ * XXX memcpy(port->conf.macaddr.a, macaddr.a, sizeof(port->conf.macaddr.a));
+ */
+}
+
+uint8_t fp_port_get_learning(FpPort *port)
+{
+ return port->learning;
+}
+
+void fp_port_set_learning(FpPort *port, uint8_t learning)
+{
+ port->learning = learning;
+}
+
+int fp_port_get_settings(FpPort *port, uint32_t *speed,
+ uint8_t *duplex, uint8_t *autoneg)
+{
+ *speed = port->speed;
+ *duplex = port->duplex;
+ *autoneg = port->autoneg;
+
+ return ROCKER_OK;
+}
+
+int fp_port_set_settings(FpPort *port, uint32_t speed,
+ uint8_t duplex, uint8_t autoneg)
+{
+ /* XXX validate inputs */
+
+ port->speed = speed;
+ port->duplex = duplex;
+ port->autoneg = autoneg;
+
+ return ROCKER_OK;
+}
+
+bool fp_port_from_pport(uint32_t pport, uint32_t *port)
+{
+ if (pport < 1 || pport > ROCKER_FP_PORTS_MAX) {
+ return false;
+ }
+ *port = pport - 1;
+ return true;
+}
+
+int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt)
+{
+ NetClientState *nc = qemu_get_queue(port->nic);
+
+ if (port->enabled) {
+ qemu_sendv_packet(nc, iov, iovcnt);
+ }
+
+ return ROCKER_OK;
+}
+
+static int fp_port_can_receive(NetClientState *nc)
+{
+ FpPort *port = qemu_get_nic_opaque(nc);
+
+ return port->enabled;
+}
+
+static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov,
+ int iovcnt)
+{
+ FpPort *port = qemu_get_nic_opaque(nc);
+
+ return world_ingress(port->world, port->pport, iov, iovcnt);
+}
+
+static ssize_t fp_port_receive(NetClientState *nc, const uint8_t *buf,
+ size_t size)
+{
+ const struct iovec iov = {
+ .iov_base = (uint8_t *)buf,
+ .iov_len = size
+ };
+
+ return fp_port_receive_iov(nc, &iov, 1);
+}
+
+static void fp_port_cleanup(NetClientState *nc)
+{
+}
+
+static void fp_port_set_link_status(NetClientState *nc)
+{
+ FpPort *port = qemu_get_nic_opaque(nc);
+
+ rocker_event_link_changed(port->r, port->pport, !nc->link_down);
+}
+
+static NetClientInfo fp_port_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_NIC,
+ .size = sizeof(NICState),
+ .can_receive = fp_port_can_receive,
+ .receive = fp_port_receive,
+ .receive_iov = fp_port_receive_iov,
+ .cleanup = fp_port_cleanup,
+ .link_status_changed = fp_port_set_link_status,
+};
+
+World *fp_port_get_world(FpPort *port)
+{
+ return port->world;
+}
+
+void fp_port_set_world(FpPort *port, World *world)
+{
+ DPRINTF("port %d setting world \"%s\"\n", port->index, world_name(world));
+ port->world = world;
+}
+
+bool fp_port_enabled(FpPort *port)
+{
+ return port->enabled;
+}
+
+void fp_port_enable(FpPort *port)
+{
+ port->enabled = true;
+ DPRINTF("port %d enabled\n", port->index);
+}
+
+void fp_port_disable(FpPort *port)
+{
+ port->enabled = false;
+ DPRINTF("port %d disabled\n", port->index);
+}
+
+FpPort *fp_port_alloc(Rocker *r, char *sw_name,
+ MACAddr *start_mac, unsigned int index,
+ NICPeers *peers)
+{
+ FpPort *port = g_malloc0(sizeof(FpPort));
+
+ if (!port) {
+ return NULL;
+ }
+
+ port->r = r;
+ port->index = index;
+ port->pport = index + 1;
+
+ /* front-panel switch port names are 1-based */
+
+ port->name = g_strdup_printf("%s.%d", sw_name, port->pport);
+
+ memcpy(port->conf.macaddr.a, start_mac, sizeof(port->conf.macaddr.a));
+ port->conf.macaddr.a[5] += index;
+ port->conf.bootindex = -1;
+ port->conf.peers = *peers;
+
+ port->nic = qemu_new_nic(&fp_port_info, &port->conf,
+ sw_name, NULL, port);
+ qemu_format_nic_info_str(qemu_get_queue(port->nic),
+ port->conf.macaddr.a);
+
+ fp_port_reset(port);
+
+ return port;
+}
+
+void fp_port_free(FpPort *port)
+{
+ qemu_del_nic(port->nic);
+ g_free(port->name);
+ g_free(port);
+}
+
+void fp_port_reset(FpPort *port)
+{
+ fp_port_disable(port);
+ port->speed = 10000; /* 10Gbps */
+ port->duplex = DUPLEX_FULL;
+ port->autoneg = 0;
+}
diff --git a/hw/net/rocker/rocker_fp.h b/hw/net/rocker/rocker_fp.h
new file mode 100644
index 0000000000..a5f28f120d
--- /dev/null
+++ b/hw/net/rocker/rocker_fp.h
@@ -0,0 +1,51 @@
+/*
+ * QEMU rocker switch emulation - front-panel ports
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ROCKER_FP_H_
+#define _ROCKER_FP_H_
+
+#include "net/net.h"
+#include "qemu/iov.h"
+
+#define ROCKER_FP_PORTS_MAX 62
+
+typedef struct fp_port FpPort;
+
+int fp_port_eg(FpPort *port, const struct iovec *iov, int iovcnt);
+
+bool fp_port_get_link_up(FpPort *port);
+void fp_port_get_macaddr(FpPort *port, MACAddr *macaddr);
+void fp_port_set_macaddr(FpPort *port, MACAddr *macaddr);
+uint8_t fp_port_get_learning(FpPort *port);
+void fp_port_set_learning(FpPort *port, uint8_t learning);
+int fp_port_get_settings(FpPort *port, uint32_t *speed,
+ uint8_t *duplex, uint8_t *autoneg);
+int fp_port_set_settings(FpPort *port, uint32_t speed,
+ uint8_t duplex, uint8_t autoneg);
+bool fp_port_from_pport(uint32_t pport, uint32_t *port);
+World *fp_port_get_world(FpPort *port);
+void fp_port_set_world(FpPort *port, World *world);
+bool fp_port_enabled(FpPort *port);
+void fp_port_enable(FpPort *port);
+void fp_port_disable(FpPort *port);
+
+FpPort *fp_port_alloc(Rocker *r, char *sw_name,
+ MACAddr *start_mac, unsigned int index,
+ NICPeers *peers);
+void fp_port_free(FpPort *port);
+void fp_port_reset(FpPort *port);
+
+#endif /* _ROCKER_FP_H_ */
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
new file mode 100644
index 0000000000..c9c85a75bd
--- /dev/null
+++ b/hw/net/rocker/rocker_hw.h
@@ -0,0 +1,491 @@
+/*
+ * Rocker switch hardware register and descriptor definitions.
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ */
+
+#ifndef _ROCKER_HW_
+#define _ROCKER_HW_
+
+#define __le16 uint16_t
+#define __le32 uint32_t
+#define __le64 uint64_t
+
+/*
+ * Return codes
+ */
+
+enum {
+ ROCKER_OK = 0,
+ ROCKER_ENOENT = 2,
+ ROCKER_ENXIO = 6,
+ ROCKER_ENOMEM = 12,
+ ROCKER_EEXIST = 17,
+ ROCKER_EINVAL = 22,
+ ROCKER_EMSGSIZE = 90,
+ ROCKER_ENOTSUP = 95,
+ ROCKER_ENOBUFS = 105,
+};
+
+/*
+ * PCI configuration space
+ */
+
+#define ROCKER_PCI_REVISION 0x1
+#define ROCKER_PCI_BAR0_IDX 0
+#define ROCKER_PCI_BAR0_SIZE 0x2000
+#define ROCKER_PCI_MSIX_BAR_IDX 1
+#define ROCKER_PCI_MSIX_BAR_SIZE 0x2000
+#define ROCKER_PCI_MSIX_TABLE_OFFSET 0x0000
+#define ROCKER_PCI_MSIX_PBA_OFFSET 0x1000
+
+/*
+ * MSI-X vectors
+ */
+
+enum {
+ ROCKER_MSIX_VEC_CMD,
+ ROCKER_MSIX_VEC_EVENT,
+ ROCKER_MSIX_VEC_TEST,
+ ROCKER_MSIX_VEC_RESERVED0,
+ __ROCKER_MSIX_VEC_TX,
+ __ROCKER_MSIX_VEC_RX,
+#define ROCKER_MSIX_VEC_TX(port) \
+ (__ROCKER_MSIX_VEC_TX + ((port) * 2))
+#define ROCKER_MSIX_VEC_RX(port) \
+ (__ROCKER_MSIX_VEC_RX + ((port) * 2))
+#define ROCKER_MSIX_VEC_COUNT(portcnt) \
+ (ROCKER_MSIX_VEC_RX((portcnt) - 1) + 1)
+};
+
+/*
+ * Rocker bogus registers
+ */
+#define ROCKER_BOGUS_REG0 0x0000
+#define ROCKER_BOGUS_REG1 0x0004
+#define ROCKER_BOGUS_REG2 0x0008
+#define ROCKER_BOGUS_REG3 0x000c
+
+/*
+ * Rocker test registers
+ */
+#define ROCKER_TEST_REG 0x0010
+#define ROCKER_TEST_REG64 0x0018 /* 8-byte */
+#define ROCKER_TEST_IRQ 0x0020
+#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */
+#define ROCKER_TEST_DMA_SIZE 0x0030
+#define ROCKER_TEST_DMA_CTRL 0x0034
+
+/*
+ * Rocker test register ctrl
+ */
+#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0)
+#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1)
+#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2)
+
+/*
+ * Rocker DMA ring register offsets
+ */
+#define ROCKER_DMA_DESC_BASE 0x1000
+#define ROCKER_DMA_DESC_SIZE 32
+#define ROCKER_DMA_DESC_MASK 0x1F
+#define ROCKER_DMA_DESC_TOTAL_SIZE \
+ (ROCKER_DMA_DESC_SIZE * 64) /* 62 ports + event + cmd */
+#define ROCKER_DMA_DESC_ADDR_OFFSET 0x00 /* 8-byte */
+#define ROCKER_DMA_DESC_SIZE_OFFSET 0x08
+#define ROCKER_DMA_DESC_HEAD_OFFSET 0x0c
+#define ROCKER_DMA_DESC_TAIL_OFFSET 0x10
+#define ROCKER_DMA_DESC_CTRL_OFFSET 0x14
+#define ROCKER_DMA_DESC_CREDITS_OFFSET 0x18
+#define ROCKER_DMA_DESC_RSVD_OFFSET 0x1c
+
+/*
+ * Rocker dma ctrl register bits
+ */
+#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0)
+
+/*
+ * Rocker ring indices
+ */
+#define ROCKER_RING_CMD 0
+#define ROCKER_RING_EVENT 1
+
+/*
+ * Helper macro to do convert a dma ring register
+ * to its index. Based on the fact that the register
+ * group stride is 32 bytes.
+ */
+#define ROCKER_RING_INDEX(reg) ((reg >> 5) & 0x7F)
+
+/*
+ * Rocker DMA Descriptor
+ */
+
+typedef struct rocker_desc {
+ __le64 buf_addr;
+ uint64_t cookie;
+ __le16 buf_size;
+ __le16 tlv_size;
+ __le16 rsvd[5]; /* pad to 32 bytes */
+ __le16 comp_err;
+} __attribute__((packed, aligned(8))) RockerDesc;
+
+/*
+ * Rocker TLV type fields
+ */
+
+typedef struct rocker_tlv {
+ __le32 type;
+ __le16 len;
+ __le16 rsvd;
+} __attribute__((packed, aligned(8))) RockerTlv;
+
+/* cmd msg */
+enum {
+ ROCKER_TLV_CMD_UNSPEC,
+ ROCKER_TLV_CMD_TYPE, /* u16 */
+ ROCKER_TLV_CMD_INFO, /* nest */
+
+ __ROCKER_TLV_CMD_MAX,
+ ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_CMD_TYPE_UNSPEC,
+ ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS,
+
+ __ROCKER_TLV_CMD_TYPE_MAX,
+ ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1,
+};
+
+/* cmd info nested for set/get port settings */
+enum {
+ ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC,
+ ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, /* u32 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, /* u8 */
+
+ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ ROCKER_TLV_CMD_PORT_SETTINGS_MAX = __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1,
+};
+
+enum {
+ ROCKER_PORT_MODE_OF_DPA,
+};
+
+/* event msg */
+enum {
+ ROCKER_TLV_EVENT_UNSPEC,
+ ROCKER_TLV_EVENT_TYPE, /* u16 */
+ ROCKER_TLV_EVENT_INFO, /* nest */
+
+ __ROCKER_TLV_EVENT_MAX,
+ ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_TYPE_UNSPEC,
+ ROCKER_TLV_EVENT_TYPE_LINK_CHANGED,
+ ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN,
+
+ __ROCKER_TLV_EVENT_TYPE_MAX,
+ ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1,
+};
+
+/* event info nested for link changed */
+enum {
+ ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC,
+ ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, /* u32 */
+ ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */
+
+ __ROCKER_TLV_EVENT_LINK_CHANGED_MAX,
+ ROCKER_TLV_EVENT_LINK_CHANGED_MAX = __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1,
+};
+
+/* event info nested for MAC/VLAN */
+enum {
+ ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC,
+ ROCKER_TLV_EVENT_MAC_VLAN_PPORT, /* u32 */
+ ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */
+ ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */
+
+ __ROCKER_TLV_EVENT_MAC_VLAN_MAX,
+ ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1,
+};
+
+/* Rx msg */
+enum {
+ ROCKER_TLV_RX_UNSPEC,
+ ROCKER_TLV_RX_FLAGS, /* u16, see RX_FLAGS_ */
+ ROCKER_TLV_RX_CSUM, /* u16 */
+ ROCKER_TLV_RX_FRAG_ADDR, /* u64 */
+ ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */
+ ROCKER_TLV_RX_FRAG_LEN, /* u16 */
+
+ __ROCKER_TLV_RX_MAX,
+ ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1,
+};
+
+#define ROCKER_RX_FLAGS_IPV4 (1 << 0)
+#define ROCKER_RX_FLAGS_IPV6 (1 << 1)
+#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2)
+#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3)
+#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4)
+#define ROCKER_RX_FLAGS_TCP (1 << 5)
+#define ROCKER_RX_FLAGS_UDP (1 << 6)
+#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7)
+
+/* Tx msg */
+enum {
+ ROCKER_TLV_TX_UNSPEC,
+ ROCKER_TLV_TX_OFFLOAD, /* u8, see TX_OFFLOAD_ */
+ ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */
+ ROCKER_TLV_TX_TSO_MSS, /* u16 */
+ ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */
+ ROCKER_TLV_TX_FRAGS, /* array */
+
+ __ROCKER_TLV_TX_MAX,
+ ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1,
+};
+
+#define ROCKER_TX_OFFLOAD_NONE 0
+#define ROCKER_TX_OFFLOAD_IP_CSUM 1
+#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2
+#define ROCKER_TX_OFFLOAD_L3_CSUM 3
+#define ROCKER_TX_OFFLOAD_TSO 4
+
+#define ROCKER_TX_FRAGS_MAX 16
+
+enum {
+ ROCKER_TLV_TX_FRAG_UNSPEC,
+ ROCKER_TLV_TX_FRAG, /* nest */
+
+ __ROCKER_TLV_TX_FRAG_MAX,
+ ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_TX_FRAG_ATTR_UNSPEC,
+ ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */
+ ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */
+
+ __ROCKER_TLV_TX_FRAG_ATTR_MAX,
+ ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1,
+};
+
+/*
+ * cmd info nested for OF-DPA msgs
+ */
+
+enum {
+ ROCKER_TLV_OF_DPA_UNSPEC,
+ ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */
+ ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */
+ ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */
+ ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */
+ ROCKER_TLV_OF_DPA_COOKIE, /* u64 */
+ ROCKER_TLV_OF_DPA_IN_PPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_IN_PPORT_MASK, /* u32 */
+ ROCKER_TLV_OF_DPA_OUT_PPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */
+ ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */
+ ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */
+ ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */
+ ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */
+ ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */
+ ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */
+ ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */
+ ROCKER_TLV_OF_DPA_TUNNEL_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */
+ ROCKER_TLV_OF_DPA_DST_MAC, /* binary */
+ ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */
+ ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */
+ ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */
+ ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */
+ ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */
+ ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */
+ ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */
+
+ __ROCKER_TLV_OF_DPA_MAX,
+ ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1,
+};
+
+/*
+ * OF-DPA table IDs
+ */
+
+enum rocker_of_dpa_table_id {
+ ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0,
+ ROCKER_OF_DPA_TABLE_ID_VLAN = 10,
+ ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20,
+ ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30,
+ ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40,
+ ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50,
+ ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60,
+};
+
+/*
+ * OF-DPA flow stats
+ */
+
+enum {
+ ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC,
+ ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */
+ ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */
+ ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */
+
+ __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX,
+ ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1,
+};
+
+/*
+ * OF-DPA group types
+ */
+
+enum rocker_of_dpa_group_type {
+ ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY,
+};
+
+/*
+ * OF-DPA group L2 overlay types
+ */
+
+enum rocker_of_dpa_overlay_type {
+ ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0,
+ ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST,
+ ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST,
+ ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST,
+};
+
+/*
+ * OF-DPA group ID encoding
+ */
+
+#define ROCKER_GROUP_TYPE_SHIFT 28
+#define ROCKER_GROUP_TYPE_MASK 0xf0000000
+#define ROCKER_GROUP_VLAN_ID_SHIFT 16
+#define ROCKER_GROUP_VLAN_ID_MASK 0x0fff0000
+#define ROCKER_GROUP_PORT_SHIFT 0
+#define ROCKER_GROUP_PORT_MASK 0x0000ffff
+#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12
+#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000
+#define ROCKER_GROUP_SUBTYPE_SHIFT 10
+#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00
+#define ROCKER_GROUP_INDEX_SHIFT 0
+#define ROCKER_GROUP_INDEX_MASK 0x0000ffff
+#define ROCKER_GROUP_INDEX_LONG_SHIFT 0
+#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff
+
+#define ROCKER_GROUP_TYPE_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT)
+#define ROCKER_GROUP_TYPE_SET(type) \
+ (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK)
+#define ROCKER_GROUP_VLAN_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT)
+#define ROCKER_GROUP_VLAN_SET(vlan_id) \
+ (((vlan_id) << ROCKER_GROUP_VLAN_ID_SHIFT) & ROCKER_GROUP_VLAN_ID_MASK)
+#define ROCKER_GROUP_PORT_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT)
+#define ROCKER_GROUP_PORT_SET(port) \
+ (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK)
+#define ROCKER_GROUP_INDEX_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT)
+#define ROCKER_GROUP_INDEX_SET(index) \
+ (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK)
+#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \
+ ROCKER_GROUP_INDEX_LONG_SHIFT)
+#define ROCKER_GROUP_INDEX_LONG_SET(index) \
+ (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \
+ ROCKER_GROUP_INDEX_LONG_MASK)
+
+#define ROCKER_GROUP_NONE 0
+#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port))
+#define ROCKER_GROUP_L2_REWRITE(index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\
+ ROCKER_GROUP_INDEX_LONG_SET(index))
+#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index))
+#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index))
+#define ROCKER_GROUP_L3_UNICAST(index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\
+ ROCKER_GROUP_INDEX_LONG_SET(index))
+
+/*
+ * Rocker general purpose registers
+ */
+#define ROCKER_CONTROL 0x0300
+#define ROCKER_PORT_PHYS_COUNT 0x0304
+#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */
+#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */
+#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */
+
+/*
+ * Rocker control bits
+ */
+#define ROCKER_CONTROL_RESET (1 << 0)
+
+#endif /* _ROCKER_HW_ */
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
new file mode 100644
index 0000000000..1bcb7af5ef
--- /dev/null
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -0,0 +1,2315 @@
+/*
+ * QEMU rocker switch emulation - OF-DPA flow processing support
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "net/eth.h"
+#include "qemu/iov.h"
+#include "qemu/timer.h"
+#include "qmp-commands.h"
+
+#include "rocker.h"
+#include "rocker_hw.h"
+#include "rocker_fp.h"
+#include "rocker_tlv.h"
+#include "rocker_world.h"
+#include "rocker_desc.h"
+#include "rocker_of_dpa.h"
+
+static const MACAddr zero_mac = { .a = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } };
+static const MACAddr ff_mac = { .a = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } };
+
+typedef struct of_dpa {
+ World *world;
+ GHashTable *flow_tbl;
+ GHashTable *group_tbl;
+ unsigned int flow_tbl_max_size;
+ unsigned int group_tbl_max_size;
+} OfDpa;
+
+/* flow_key stolen mostly from OVS
+ *
+ * Note: fields that compare with network packet header fields
+ * are stored in network order (BE) to avoid per-packet field
+ * byte-swaps.
+ */
+
+typedef struct of_dpa_flow_key {
+ uint32_t in_pport; /* ingress port */
+ uint32_t tunnel_id; /* overlay tunnel id */
+ uint32_t tbl_id; /* table id */
+ struct {
+ __be16 vlan_id; /* 0 if no VLAN */
+ MACAddr src; /* ethernet source address */
+ MACAddr dst; /* ethernet destination address */
+ __be16 type; /* ethernet frame type */
+ } eth;
+ struct {
+ uint8_t proto; /* IP protocol or ARP opcode */
+ uint8_t tos; /* IP ToS */
+ uint8_t ttl; /* IP TTL/hop limit */
+ uint8_t frag; /* one of FRAG_TYPE_* */
+ } ip;
+ union {
+ struct {
+ struct {
+ __be32 src; /* IP source address */
+ __be32 dst; /* IP destination address */
+ } addr;
+ union {
+ struct {
+ __be16 src; /* TCP/UDP/SCTP source port */
+ __be16 dst; /* TCP/UDP/SCTP destination port */
+ __be16 flags; /* TCP flags */
+ } tp;
+ struct {
+ MACAddr sha; /* ARP source hardware address */
+ MACAddr tha; /* ARP target hardware address */
+ } arp;
+ };
+ } ipv4;
+ struct {
+ struct {
+ Ipv6Addr src; /* IPv6 source address */
+ Ipv6Addr dst; /* IPv6 destination address */
+ } addr;
+ __be32 label; /* IPv6 flow label */
+ struct {
+ __be16 src; /* TCP/UDP/SCTP source port */
+ __be16 dst; /* TCP/UDP/SCTP destination port */
+ __be16 flags; /* TCP flags */
+ } tp;
+ struct {
+ Ipv6Addr target; /* ND target address */
+ MACAddr sll; /* ND source link layer address */
+ MACAddr tll; /* ND target link layer address */
+ } nd;
+ } ipv6;
+ };
+ int width; /* how many uint64_t's in key? */
+} OfDpaFlowKey;
+
+/* Width of key which includes field 'f' in u64s, rounded up */
+#define FLOW_KEY_WIDTH(f) \
+ ((offsetof(OfDpaFlowKey, f) + \
+ sizeof(((OfDpaFlowKey *)0)->f) + \
+ sizeof(uint64_t) - 1) / sizeof(uint64_t))
+
+typedef struct of_dpa_flow_action {
+ uint32_t goto_tbl;
+ struct {
+ uint32_t group_id;
+ uint32_t tun_log_lport;
+ __be16 vlan_id;
+ } write;
+ struct {
+ __be16 new_vlan_id;
+ uint32_t out_pport;
+ uint8_t copy_to_cpu;
+ __be16 vlan_id;
+ } apply;
+} OfDpaFlowAction;
+
+typedef struct of_dpa_flow {
+ uint32_t lpm;
+ uint32_t priority;
+ uint32_t hardtime;
+ uint32_t idletime;
+ uint64_t cookie;
+ OfDpaFlowKey key;
+ OfDpaFlowKey mask;
+ OfDpaFlowAction action;
+ struct {
+ uint64_t hits;
+ int64_t install_time;
+ int64_t refresh_time;
+ uint64_t rx_pkts;
+ uint64_t tx_pkts;
+ } stats;
+} OfDpaFlow;
+
+typedef struct of_dpa_flow_pkt_fields {
+ uint32_t tunnel_id;
+ struct eth_header *ethhdr;
+ __be16 *h_proto;
+ struct vlan_header *vlanhdr;
+ struct ip_header *ipv4hdr;
+ struct ip6_header *ipv6hdr;
+ Ipv6Addr *ipv6_src_addr;
+ Ipv6Addr *ipv6_dst_addr;
+} OfDpaFlowPktFields;
+
+typedef struct of_dpa_flow_context {
+ uint32_t in_pport;
+ uint32_t tunnel_id;
+ struct iovec *iov;
+ int iovcnt;
+ struct eth_header ethhdr_rewrite;
+ struct vlan_header vlanhdr_rewrite;
+ struct vlan_header vlanhdr;
+ OfDpa *of_dpa;
+ OfDpaFlowPktFields fields;
+ OfDpaFlowAction action_set;
+} OfDpaFlowContext;
+
+typedef struct of_dpa_flow_match {
+ OfDpaFlowKey value;
+ OfDpaFlow *best;
+} OfDpaFlowMatch;
+
+typedef struct of_dpa_group {
+ uint32_t id;
+ union {
+ struct {
+ uint32_t out_pport;
+ uint8_t pop_vlan;
+ } l2_interface;
+ struct {
+ uint32_t group_id;
+ MACAddr src_mac;
+ MACAddr dst_mac;
+ __be16 vlan_id;
+ } l2_rewrite;
+ struct {
+ uint16_t group_count;
+ uint32_t *group_ids;
+ } l2_flood;
+ struct {
+ uint32_t group_id;
+ MACAddr src_mac;
+ MACAddr dst_mac;
+ __be16 vlan_id;
+ uint8_t ttl_check;
+ } l3_unicast;
+ };
+} OfDpaGroup;
+
+static int of_dpa_mask2prefix(__be32 mask)
+{
+ int i;
+ int count = 32;
+
+ for (i = 0; i < 32; i++) {
+ if (!(ntohl(mask) & ((2 << i) - 1))) {
+ count--;
+ }
+ }
+
+ return count;
+}
+
+#if defined(DEBUG_ROCKER)
+static void of_dpa_flow_key_dump(OfDpaFlowKey *key, OfDpaFlowKey *mask)
+{
+ char buf[512], *b = buf, *mac;
+
+ b += sprintf(b, " tbl %2d", key->tbl_id);
+
+ if (key->in_pport || (mask && mask->in_pport)) {
+ b += sprintf(b, " in_pport %2d", key->in_pport);
+ if (mask && mask->in_pport != 0xffffffff) {
+ b += sprintf(b, "/0x%08x", key->in_pport);
+ }
+ }
+
+ if (key->tunnel_id || (mask && mask->tunnel_id)) {
+ b += sprintf(b, " tun %8d", key->tunnel_id);
+ if (mask && mask->tunnel_id != 0xffffffff) {
+ b += sprintf(b, "/0x%08x", key->tunnel_id);
+ }
+ }
+
+ if (key->eth.vlan_id || (mask && mask->eth.vlan_id)) {
+ b += sprintf(b, " vlan %4d", ntohs(key->eth.vlan_id));
+ if (mask && mask->eth.vlan_id != 0xffff) {
+ b += sprintf(b, "/0x%04x", ntohs(key->eth.vlan_id));
+ }
+ }
+
+ if (memcmp(key->eth.src.a, zero_mac.a, ETH_ALEN) ||
+ (mask && memcmp(mask->eth.src.a, zero_mac.a, ETH_ALEN))) {
+ mac = qemu_mac_strdup_printf(key->eth.src.a);
+ b += sprintf(b, " src %s", mac);
+ g_free(mac);
+ if (mask && memcmp(mask->eth.src.a, ff_mac.a, ETH_ALEN)) {
+ mac = qemu_mac_strdup_printf(mask->eth.src.a);
+ b += sprintf(b, "/%s", mac);
+ g_free(mac);
+ }
+ }
+
+ if (memcmp(key->eth.dst.a, zero_mac.a, ETH_ALEN) ||
+ (mask && memcmp(mask->eth.dst.a, zero_mac.a, ETH_ALEN))) {
+ mac = qemu_mac_strdup_printf(key->eth.dst.a);
+ b += sprintf(b, " dst %s", mac);
+ g_free(mac);
+ if (mask && memcmp(mask->eth.dst.a, ff_mac.a, ETH_ALEN)) {
+ mac = qemu_mac_strdup_printf(mask->eth.dst.a);
+ b += sprintf(b, "/%s", mac);
+ g_free(mac);
+ }
+ }
+
+ if (key->eth.type || (mask && mask->eth.type)) {
+ b += sprintf(b, " type 0x%04x", ntohs(key->eth.type));
+ if (mask && mask->eth.type != 0xffff) {
+ b += sprintf(b, "/0x%04x", ntohs(mask->eth.type));
+ }
+ switch (ntohs(key->eth.type)) {
+ case 0x0800:
+ case 0x86dd:
+ if (key->ip.proto || (mask && mask->ip.proto)) {
+ b += sprintf(b, " ip proto %2d", key->ip.proto);
+ if (mask && mask->ip.proto != 0xff) {
+ b += sprintf(b, "/0x%02x", mask->ip.proto);
+ }
+ }
+ if (key->ip.tos || (mask && mask->ip.tos)) {
+ b += sprintf(b, " ip tos %2d", key->ip.tos);
+ if (mask && mask->ip.tos != 0xff) {
+ b += sprintf(b, "/0x%02x", mask->ip.tos);
+ }
+ }
+ break;
+ }
+ switch (ntohs(key->eth.type)) {
+ case 0x0800:
+ if (key->ipv4.addr.dst || (mask && mask->ipv4.addr.dst)) {
+ b += sprintf(b, " dst %s",
+ inet_ntoa(*(struct in_addr *)&key->ipv4.addr.dst));
+ if (mask) {
+ b += sprintf(b, "/%d",
+ of_dpa_mask2prefix(mask->ipv4.addr.dst));
+ }
+ }
+ break;
+ }
+ }
+
+ DPRINTF("%s\n", buf);
+}
+#else
+#define of_dpa_flow_key_dump(k, m)
+#endif
+
+static void _of_dpa_flow_match(void *key, void *value, void *user_data)
+{
+ OfDpaFlow *flow = value;
+ OfDpaFlowMatch *match = user_data;
+ uint64_t *k = (uint64_t *)&flow->key;
+ uint64_t *m = (uint64_t *)&flow->mask;
+ uint64_t *v = (uint64_t *)&match->value;
+ int i;
+
+ if (flow->key.tbl_id == match->value.tbl_id) {
+ of_dpa_flow_key_dump(&flow->key, &flow->mask);
+ }
+
+ if (flow->key.width > match->value.width) {
+ return;
+ }
+
+ for (i = 0; i < flow->key.width; i++, k++, m++, v++) {
+ if ((~*k & *m & *v) | (*k & *m & ~*v)) {
+ return;
+ }
+ }
+
+ DPRINTF("match\n");
+
+ if (!match->best ||
+ flow->priority > match->best->priority ||
+ flow->lpm > match->best->lpm) {
+ match->best = flow;
+ }
+}
+
+static OfDpaFlow *of_dpa_flow_match(OfDpa *of_dpa, OfDpaFlowMatch *match)
+{
+ DPRINTF("\nnew search\n");
+ of_dpa_flow_key_dump(&match->value, NULL);
+
+ g_hash_table_foreach(of_dpa->flow_tbl, _of_dpa_flow_match, match);
+
+ return match->best;
+}
+
+static OfDpaFlow *of_dpa_flow_find(OfDpa *of_dpa, uint64_t cookie)
+{
+ return g_hash_table_lookup(of_dpa->flow_tbl, &cookie);
+}
+
+static int of_dpa_flow_add(OfDpa *of_dpa, OfDpaFlow *flow)
+{
+ g_hash_table_insert(of_dpa->flow_tbl, &flow->cookie, flow);
+
+ return ROCKER_OK;
+}
+
+static void of_dpa_flow_del(OfDpa *of_dpa, OfDpaFlow *flow)
+{
+ g_hash_table_remove(of_dpa->flow_tbl, &flow->cookie);
+}
+
+static OfDpaFlow *of_dpa_flow_alloc(uint64_t cookie)
+{
+ OfDpaFlow *flow;
+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000;
+
+ flow = g_malloc0(sizeof(OfDpaFlow));
+ if (!flow) {
+ return NULL;
+ }
+
+ flow->cookie = cookie;
+ flow->mask.tbl_id = 0xffffffff;
+
+ flow->stats.install_time = flow->stats.refresh_time = now;
+
+ return flow;
+}
+
+static void of_dpa_flow_pkt_hdr_reset(OfDpaFlowContext *fc)
+{
+ OfDpaFlowPktFields *fields = &fc->fields;
+
+ fc->iov[0].iov_base = fields->ethhdr;
+ fc->iov[0].iov_len = sizeof(struct eth_header);
+ fc->iov[1].iov_base = fields->vlanhdr;
+ fc->iov[1].iov_len = fields->vlanhdr ? sizeof(struct vlan_header) : 0;
+}
+
+static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc,
+ const struct iovec *iov, int iovcnt)
+{
+ OfDpaFlowPktFields *fields = &fc->fields;
+ size_t sofar = 0;
+ int i;
+
+ sofar += sizeof(struct eth_header);
+ if (iov->iov_len < sofar) {
+ DPRINTF("flow_pkt_parse underrun on eth_header\n");
+ return;
+ }
+
+ fields->ethhdr = iov->iov_base;
+ fields->h_proto = &fields->ethhdr->h_proto;
+
+ if (ntohs(*fields->h_proto) == ETH_P_VLAN) {
+ sofar += sizeof(struct vlan_header);
+ if (iov->iov_len < sofar) {
+ DPRINTF("flow_pkt_parse underrun on vlan_header\n");
+ return;
+ }
+ fields->vlanhdr = (struct vlan_header *)(fields->ethhdr + 1);
+ fields->h_proto = &fields->vlanhdr->h_proto;
+ }
+
+ switch (ntohs(*fields->h_proto)) {
+ case ETH_P_IP:
+ sofar += sizeof(struct ip_header);
+ if (iov->iov_len < sofar) {
+ DPRINTF("flow_pkt_parse underrun on ip_header\n");
+ return;
+ }
+ fields->ipv4hdr = (struct ip_header *)(fields->h_proto + 1);
+ break;
+ case ETH_P_IPV6:
+ sofar += sizeof(struct ip6_header);
+ if (iov->iov_len < sofar) {
+ DPRINTF("flow_pkt_parse underrun on ip6_header\n");
+ return;
+ }
+ fields->ipv6hdr = (struct ip6_header *)(fields->h_proto + 1);
+ break;
+ }
+
+ /* To facilitate (potential) VLAN tag insertion, Make a
+ * copy of the iov and insert two new vectors at the
+ * beginning for eth hdr and vlan hdr. No data is copied,
+ * just the vectors.
+ */
+
+ of_dpa_flow_pkt_hdr_reset(fc);
+
+ fc->iov[2].iov_base = fields->h_proto + 1;
+ fc->iov[2].iov_len = iov->iov_len - fc->iov[0].iov_len - fc->iov[1].iov_len;
+
+ for (i = 1; i < iovcnt; i++) {
+ fc->iov[i+2] = iov[i];
+ }
+
+ fc->iovcnt = iovcnt + 2;
+}
+
+static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id)
+{
+ OfDpaFlowPktFields *fields = &fc->fields;
+ uint16_t h_proto = fields->ethhdr->h_proto;
+
+ if (fields->vlanhdr) {
+ DPRINTF("flow_pkt_insert_vlan packet already has vlan\n");
+ return;
+ }
+
+ fields->ethhdr->h_proto = htons(ETH_P_VLAN);
+ fields->vlanhdr = &fc->vlanhdr;
+ fields->vlanhdr->h_tci = vlan_id;
+ fields->vlanhdr->h_proto = h_proto;
+ fields->h_proto = &fields->vlanhdr->h_proto;
+
+ fc->iov[1].iov_base = fields->vlanhdr;
+ fc->iov[1].iov_len = sizeof(struct vlan_header);
+}
+
+static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc)
+{
+ OfDpaFlowPktFields *fields = &fc->fields;
+
+ if (!fields->vlanhdr) {
+ return;
+ }
+
+ fc->iov[0].iov_len -= sizeof(fields->ethhdr->h_proto);
+ fc->iov[1].iov_base = fields->h_proto;
+ fc->iov[1].iov_len = sizeof(fields->ethhdr->h_proto);
+}
+
+static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc,
+ uint8_t *src_mac, uint8_t *dst_mac,
+ __be16 vlan_id)
+{
+ OfDpaFlowPktFields *fields = &fc->fields;
+
+ if (src_mac || dst_mac) {
+ memcpy(&fc->ethhdr_rewrite, fields->ethhdr, sizeof(struct eth_header));
+ if (src_mac && memcmp(src_mac, zero_mac.a, ETH_ALEN)) {
+ memcpy(fc->ethhdr_rewrite.h_source, src_mac, ETH_ALEN);
+ }
+ if (dst_mac && memcmp(dst_mac, zero_mac.a, ETH_ALEN)) {
+ memcpy(fc->ethhdr_rewrite.h_dest, dst_mac, ETH_ALEN);
+ }
+ fc->iov[0].iov_base = &fc->ethhdr_rewrite;
+ }
+
+ if (vlan_id && fields->vlanhdr) {
+ fc->vlanhdr_rewrite = fc->vlanhdr;
+ fc->vlanhdr_rewrite.h_tci = vlan_id;
+ fc->iov[1].iov_base = &fc->vlanhdr_rewrite;
+ }
+}
+
+static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id);
+
+static void of_dpa_ig_port_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT;
+ match->value.in_pport = fc->in_pport;
+ match->value.width = FLOW_KEY_WIDTH(tbl_id);
+}
+
+static void of_dpa_ig_port_miss(OfDpaFlowContext *fc)
+{
+ uint32_t port;
+
+ /* The default on miss is for packets from physical ports
+ * to go to the VLAN Flow Table. There is no default rule
+ * for packets from logical ports, which are dropped on miss.
+ */
+
+ if (fp_port_from_pport(fc->in_pport, &port)) {
+ of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_VLAN);
+ }
+}
+
+static void of_dpa_vlan_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN;
+ match->value.in_pport = fc->in_pport;
+ if (fc->fields.vlanhdr) {
+ match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci;
+ }
+ match->value.width = FLOW_KEY_WIDTH(eth.vlan_id);
+}
+
+static void of_dpa_vlan_insert(OfDpaFlowContext *fc,
+ OfDpaFlow *flow)
+{
+ if (flow->action.apply.new_vlan_id) {
+ of_dpa_flow_pkt_insert_vlan(fc, flow->action.apply.new_vlan_id);
+ }
+}
+
+static void of_dpa_term_mac_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC;
+ match->value.in_pport = fc->in_pport;
+ match->value.eth.type = *fc->fields.h_proto;
+ match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci;
+ memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest,
+ sizeof(match->value.eth.dst.a));
+ match->value.width = FLOW_KEY_WIDTH(eth.type);
+}
+
+static void of_dpa_term_mac_miss(OfDpaFlowContext *fc)
+{
+ of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_BRIDGING);
+}
+
+static void of_dpa_apply_actions(OfDpaFlowContext *fc,
+ OfDpaFlow *flow)
+{
+ fc->action_set.apply.copy_to_cpu = flow->action.apply.copy_to_cpu;
+ fc->action_set.apply.vlan_id = flow->key.eth.vlan_id;
+}
+
+static void of_dpa_bridging_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING;
+ if (fc->fields.vlanhdr) {
+ match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci;
+ } else if (fc->tunnel_id) {
+ match->value.tunnel_id = fc->tunnel_id;
+ }
+ memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest,
+ sizeof(match->value.eth.dst.a));
+ match->value.width = FLOW_KEY_WIDTH(eth.dst);
+}
+
+static void of_dpa_bridging_learn(OfDpaFlowContext *fc,
+ OfDpaFlow *dst_flow)
+{
+ OfDpaFlowMatch match = { { 0, }, };
+ OfDpaFlow *flow;
+ uint8_t *addr;
+ uint16_t vlan_id;
+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000;
+ int64_t refresh_delay = 1;
+
+ /* Do a lookup in bridge table by src_mac/vlan */
+
+ addr = fc->fields.ethhdr->h_source;
+ vlan_id = fc->fields.vlanhdr->h_tci;
+
+ match.value.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING;
+ match.value.eth.vlan_id = vlan_id;
+ memcpy(match.value.eth.dst.a, addr, sizeof(match.value.eth.dst.a));
+ match.value.width = FLOW_KEY_WIDTH(eth.dst);
+
+ flow = of_dpa_flow_match(fc->of_dpa, &match);
+ if (flow) {
+ if (!memcmp(flow->mask.eth.dst.a, ff_mac.a,
+ sizeof(flow->mask.eth.dst.a))) {
+ /* src_mac/vlan already learned; if in_port and out_port
+ * don't match, the end station has moved and the port
+ * needs updating */
+ /* XXX implement the in_port/out_port check */
+ if (now - flow->stats.refresh_time < refresh_delay) {
+ return;
+ }
+ flow->stats.refresh_time = now;
+ }
+ }
+
+ /* Let driver know about mac/vlan. This may be a new mac/vlan
+ * or a refresh of existing mac/vlan that's been hit after the
+ * refresh_delay.
+ */
+
+ rocker_event_mac_vlan_seen(world_rocker(fc->of_dpa->world),
+ fc->in_pport, addr, vlan_id);
+}
+
+static void of_dpa_bridging_miss(OfDpaFlowContext *fc)
+{
+ of_dpa_bridging_learn(fc, NULL);
+ of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY);
+}
+
+static void of_dpa_bridging_action_write(OfDpaFlowContext *fc,
+ OfDpaFlow *flow)
+{
+ if (flow->action.write.group_id != ROCKER_GROUP_NONE) {
+ fc->action_set.write.group_id = flow->action.write.group_id;
+ }
+ fc->action_set.write.tun_log_lport = flow->action.write.tun_log_lport;
+}
+
+static void of_dpa_unicast_routing_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING;
+ match->value.eth.type = *fc->fields.h_proto;
+ if (fc->fields.ipv4hdr) {
+ match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst;
+ }
+ if (fc->fields.ipv6_dst_addr) {
+ memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr,
+ sizeof(match->value.ipv6.addr.dst));
+ }
+ match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst);
+}
+
+static void of_dpa_unicast_routing_miss(OfDpaFlowContext *fc)
+{
+ of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY);
+}
+
+static void of_dpa_unicast_routing_action_write(OfDpaFlowContext *fc,
+ OfDpaFlow *flow)
+{
+ if (flow->action.write.group_id != ROCKER_GROUP_NONE) {
+ fc->action_set.write.group_id = flow->action.write.group_id;
+ }
+}
+
+static void
+of_dpa_multicast_routing_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING;
+ match->value.eth.type = *fc->fields.h_proto;
+ match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci;
+ if (fc->fields.ipv4hdr) {
+ match->value.ipv4.addr.src = fc->fields.ipv4hdr->ip_src;
+ match->value.ipv4.addr.dst = fc->fields.ipv4hdr->ip_dst;
+ }
+ if (fc->fields.ipv6_src_addr) {
+ memcpy(&match->value.ipv6.addr.src, fc->fields.ipv6_src_addr,
+ sizeof(match->value.ipv6.addr.src));
+ }
+ if (fc->fields.ipv6_dst_addr) {
+ memcpy(&match->value.ipv6.addr.dst, fc->fields.ipv6_dst_addr,
+ sizeof(match->value.ipv6.addr.dst));
+ }
+ match->value.width = FLOW_KEY_WIDTH(ipv6.addr.dst);
+}
+
+static void of_dpa_multicast_routing_miss(OfDpaFlowContext *fc)
+{
+ of_dpa_flow_ig_tbl(fc, ROCKER_OF_DPA_TABLE_ID_ACL_POLICY);
+}
+
+static void
+of_dpa_multicast_routing_action_write(OfDpaFlowContext *fc,
+ OfDpaFlow *flow)
+{
+ if (flow->action.write.group_id != ROCKER_GROUP_NONE) {
+ fc->action_set.write.group_id = flow->action.write.group_id;
+ }
+ fc->action_set.write.vlan_id = flow->action.write.vlan_id;
+}
+
+static void of_dpa_acl_build_match(OfDpaFlowContext *fc,
+ OfDpaFlowMatch *match)
+{
+ match->value.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
+ match->value.in_pport = fc->in_pport;
+ memcpy(match->value.eth.src.a, fc->fields.ethhdr->h_source,
+ sizeof(match->value.eth.src.a));
+ memcpy(match->value.eth.dst.a, fc->fields.ethhdr->h_dest,
+ sizeof(match->value.eth.dst.a));
+ match->value.eth.type = *fc->fields.h_proto;
+ match->value.eth.vlan_id = fc->fields.vlanhdr->h_tci;
+ match->value.width = FLOW_KEY_WIDTH(eth.type);
+ if (fc->fields.ipv4hdr) {
+ match->value.ip.proto = fc->fields.ipv4hdr->ip_p;
+ match->value.ip.tos = fc->fields.ipv4hdr->ip_tos;
+ match->value.width = FLOW_KEY_WIDTH(ip.tos);
+ } else if (fc->fields.ipv6hdr) {
+ match->value.ip.proto =
+ fc->fields.ipv6hdr->ip6_ctlun.ip6_un1.ip6_un1_nxt;
+ match->value.ip.tos = 0; /* XXX what goes here? */
+ match->value.width = FLOW_KEY_WIDTH(ip.tos);
+ }
+}
+
+static void of_dpa_eg(OfDpaFlowContext *fc);
+static void of_dpa_acl_hit(OfDpaFlowContext *fc,
+ OfDpaFlow *dst_flow)
+{
+ of_dpa_eg(fc);
+}
+
+static void of_dpa_acl_action_write(OfDpaFlowContext *fc,
+ OfDpaFlow *flow)
+{
+ if (flow->action.write.group_id != ROCKER_GROUP_NONE) {
+ fc->action_set.write.group_id = flow->action.write.group_id;
+ }
+}
+
+static void of_dpa_drop(OfDpaFlowContext *fc)
+{
+ /* drop packet */
+}
+
+static OfDpaGroup *of_dpa_group_find(OfDpa *of_dpa,
+ uint32_t group_id)
+{
+ return g_hash_table_lookup(of_dpa->group_tbl, &group_id);
+}
+
+static int of_dpa_group_add(OfDpa *of_dpa, OfDpaGroup *group)
+{
+ g_hash_table_insert(of_dpa->group_tbl, &group->id, group);
+
+ return 0;
+}
+
+#if 0
+static int of_dpa_group_mod(OfDpa *of_dpa, OfDpaGroup *group)
+{
+ OfDpaGroup *old_group = of_dpa_group_find(of_dpa, group->id);
+
+ if (!old_group) {
+ return -ENOENT;
+ }
+
+ /* XXX */
+
+ return 0;
+}
+#endif
+
+static int of_dpa_group_del(OfDpa *of_dpa, OfDpaGroup *group)
+{
+ g_hash_table_remove(of_dpa->group_tbl, &group->id);
+
+ return 0;
+}
+
+#if 0
+static int of_dpa_group_get_stats(OfDpa *of_dpa, uint32_t id)
+{
+ OfDpaGroup *group = of_dpa_group_find(of_dpa, id);
+
+ if (!group) {
+ return -ENOENT;
+ }
+
+ /* XXX get/return stats */
+
+ return 0;
+}
+#endif
+
+static OfDpaGroup *of_dpa_group_alloc(uint32_t id)
+{
+ OfDpaGroup *group = g_malloc0(sizeof(OfDpaGroup));
+
+ if (!group) {
+ return NULL;
+ }
+
+ group->id = id;
+
+ return group;
+}
+
+static void of_dpa_output_l2_interface(OfDpaFlowContext *fc,
+ OfDpaGroup *group)
+{
+ if (group->l2_interface.pop_vlan) {
+ of_dpa_flow_pkt_strip_vlan(fc);
+ }
+
+ /* Note: By default, and as per the OpenFlow 1.3.1
+ * specification, a packet cannot be forwarded back
+ * to the IN_PORT from which it came in. An action
+ * bucket that specifies the particular packet's
+ * egress port is not evaluated.
+ */
+
+ if (group->l2_interface.out_pport == 0) {
+ rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt);
+ } else if (group->l2_interface.out_pport != fc->in_pport) {
+ rocker_port_eg(world_rocker(fc->of_dpa->world),
+ group->l2_interface.out_pport,
+ fc->iov, fc->iovcnt);
+ }
+}
+
+static void of_dpa_output_l2_rewrite(OfDpaFlowContext *fc,
+ OfDpaGroup *group)
+{
+ OfDpaGroup *l2_group =
+ of_dpa_group_find(fc->of_dpa, group->l2_rewrite.group_id);
+
+ if (!l2_group) {
+ return;
+ }
+
+ of_dpa_flow_pkt_hdr_rewrite(fc, group->l2_rewrite.src_mac.a,
+ group->l2_rewrite.dst_mac.a,
+ group->l2_rewrite.vlan_id);
+ of_dpa_output_l2_interface(fc, l2_group);
+}
+
+static void of_dpa_output_l2_flood(OfDpaFlowContext *fc,
+ OfDpaGroup *group)
+{
+ OfDpaGroup *l2_group;
+ int i;
+
+ for (i = 0; i < group->l2_flood.group_count; i++) {
+ of_dpa_flow_pkt_hdr_reset(fc);
+ l2_group = of_dpa_group_find(fc->of_dpa, group->l2_flood.group_ids[i]);
+ if (!l2_group) {
+ continue;
+ }
+ switch (ROCKER_GROUP_TYPE_GET(l2_group->id)) {
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE:
+ of_dpa_output_l2_interface(fc, l2_group);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE:
+ of_dpa_output_l2_rewrite(fc, l2_group);
+ break;
+ }
+ }
+}
+
+static void of_dpa_output_l3_unicast(OfDpaFlowContext *fc, OfDpaGroup *group)
+{
+ OfDpaGroup *l2_group =
+ of_dpa_group_find(fc->of_dpa, group->l3_unicast.group_id);
+
+ if (!l2_group) {
+ return;
+ }
+
+ of_dpa_flow_pkt_hdr_rewrite(fc, group->l3_unicast.src_mac.a,
+ group->l3_unicast.dst_mac.a,
+ group->l3_unicast.vlan_id);
+ /* XXX need ttl_check */
+ of_dpa_output_l2_interface(fc, l2_group);
+}
+
+static void of_dpa_eg(OfDpaFlowContext *fc)
+{
+ OfDpaFlowAction *set = &fc->action_set;
+ OfDpaGroup *group;
+ uint32_t group_id;
+
+ /* send a copy of pkt to CPU (controller)? */
+
+ if (set->apply.copy_to_cpu) {
+ group_id = ROCKER_GROUP_L2_INTERFACE(set->apply.vlan_id, 0);
+ group = of_dpa_group_find(fc->of_dpa, group_id);
+ if (group) {
+ of_dpa_output_l2_interface(fc, group);
+ of_dpa_flow_pkt_hdr_reset(fc);
+ }
+ }
+
+ /* process group write actions */
+
+ if (!set->write.group_id) {
+ return;
+ }
+
+ group = of_dpa_group_find(fc->of_dpa, set->write.group_id);
+ if (!group) {
+ return;
+ }
+
+ switch (ROCKER_GROUP_TYPE_GET(group->id)) {
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE:
+ of_dpa_output_l2_interface(fc, group);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE:
+ of_dpa_output_l2_rewrite(fc, group);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
+ of_dpa_output_l2_flood(fc, group);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST:
+ of_dpa_output_l3_unicast(fc, group);
+ break;
+ }
+}
+
+typedef struct of_dpa_flow_tbl_ops {
+ void (*build_match)(OfDpaFlowContext *fc, OfDpaFlowMatch *match);
+ void (*hit)(OfDpaFlowContext *fc, OfDpaFlow *flow);
+ void (*miss)(OfDpaFlowContext *fc);
+ void (*hit_no_goto)(OfDpaFlowContext *fc);
+ void (*action_apply)(OfDpaFlowContext *fc, OfDpaFlow *flow);
+ void (*action_write)(OfDpaFlowContext *fc, OfDpaFlow *flow);
+} OfDpaFlowTblOps;
+
+static OfDpaFlowTblOps of_dpa_tbl_ops[] = {
+ [ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT] = {
+ .build_match = of_dpa_ig_port_build_match,
+ .miss = of_dpa_ig_port_miss,
+ .hit_no_goto = of_dpa_drop,
+ },
+ [ROCKER_OF_DPA_TABLE_ID_VLAN] = {
+ .build_match = of_dpa_vlan_build_match,
+ .hit_no_goto = of_dpa_drop,
+ .action_apply = of_dpa_vlan_insert,
+ },
+ [ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC] = {
+ .build_match = of_dpa_term_mac_build_match,
+ .miss = of_dpa_term_mac_miss,
+ .hit_no_goto = of_dpa_drop,
+ .action_apply = of_dpa_apply_actions,
+ },
+ [ROCKER_OF_DPA_TABLE_ID_BRIDGING] = {
+ .build_match = of_dpa_bridging_build_match,
+ .hit = of_dpa_bridging_learn,
+ .miss = of_dpa_bridging_miss,
+ .hit_no_goto = of_dpa_drop,
+ .action_apply = of_dpa_apply_actions,
+ .action_write = of_dpa_bridging_action_write,
+ },
+ [ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING] = {
+ .build_match = of_dpa_unicast_routing_build_match,
+ .miss = of_dpa_unicast_routing_miss,
+ .hit_no_goto = of_dpa_drop,
+ .action_write = of_dpa_unicast_routing_action_write,
+ },
+ [ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING] = {
+ .build_match = of_dpa_multicast_routing_build_match,
+ .miss = of_dpa_multicast_routing_miss,
+ .hit_no_goto = of_dpa_drop,
+ .action_write = of_dpa_multicast_routing_action_write,
+ },
+ [ROCKER_OF_DPA_TABLE_ID_ACL_POLICY] = {
+ .build_match = of_dpa_acl_build_match,
+ .hit = of_dpa_acl_hit,
+ .miss = of_dpa_eg,
+ .action_apply = of_dpa_apply_actions,
+ .action_write = of_dpa_acl_action_write,
+ },
+};
+
+static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id)
+{
+ OfDpaFlowTblOps *ops = &of_dpa_tbl_ops[tbl_id];
+ OfDpaFlowMatch match = { { 0, }, };
+ OfDpaFlow *flow;
+
+ if (ops->build_match) {
+ ops->build_match(fc, &match);
+ } else {
+ return;
+ }
+
+ flow = of_dpa_flow_match(fc->of_dpa, &match);
+ if (!flow) {
+ if (ops->miss) {
+ ops->miss(fc);
+ }
+ return;
+ }
+
+ flow->stats.hits++;
+
+ if (ops->action_apply) {
+ ops->action_apply(fc, flow);
+ }
+
+ if (ops->action_write) {
+ ops->action_write(fc, flow);
+ }
+
+ if (ops->hit) {
+ ops->hit(fc, flow);
+ }
+
+ if (flow->action.goto_tbl) {
+ of_dpa_flow_ig_tbl(fc, flow->action.goto_tbl);
+ } else if (ops->hit_no_goto) {
+ ops->hit_no_goto(fc);
+ }
+
+ /* drop packet */
+}
+
+static ssize_t of_dpa_ig(World *world, uint32_t pport,
+ const struct iovec *iov, int iovcnt)
+{
+ struct iovec iov_copy[iovcnt + 2];
+ OfDpaFlowContext fc = {
+ .of_dpa = world_private(world),
+ .in_pport = pport,
+ .iov = iov_copy,
+ .iovcnt = iovcnt + 2,
+ };
+
+ of_dpa_flow_pkt_parse(&fc, iov, iovcnt);
+ of_dpa_flow_ig_tbl(&fc, ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT);
+
+ return iov_size(iov, iovcnt);
+}
+
+#define ROCKER_TUNNEL_LPORT 0x00010000
+
+static int of_dpa_cmd_add_ig_port(OfDpaFlow *flow, RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ bool overlay_tunnel;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT;
+ key->width = FLOW_KEY_WIDTH(tbl_id);
+
+ key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]);
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) {
+ mask->in_pport =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]);
+ }
+
+ overlay_tunnel = !!(key->in_pport & ROCKER_TUNNEL_LPORT);
+
+ action->goto_tbl =
+ rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]);
+
+ if (!overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_VLAN) {
+ return -ROCKER_EINVAL;
+ }
+
+ if (overlay_tunnel && action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_BRIDGING) {
+ return -ROCKER_EINVAL;
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_vlan(OfDpaFlow *flow, RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ uint32_t port;
+ bool untagged;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) {
+ DPRINTF("Must give in_pport and vlan_id to install VLAN tbl entry\n");
+ return -ROCKER_EINVAL;
+ }
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN;
+ key->width = FLOW_KEY_WIDTH(eth.vlan_id);
+
+ key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]);
+ if (!fp_port_from_pport(key->in_pport, &port)) {
+ DPRINTF("in_pport (%d) not a front-panel port\n", key->in_pport);
+ return -ROCKER_EINVAL;
+ }
+ mask->in_pport = 0xffffffff;
+
+ key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) {
+ mask->eth.vlan_id =
+ rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]);
+ }
+
+ if (key->eth.vlan_id) {
+ untagged = false; /* filtering */
+ } else {
+ untagged = true;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) {
+ action->goto_tbl =
+ rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]);
+ if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) {
+ DPRINTF("Goto tbl (%d) must be TERM_MAC\n", action->goto_tbl);
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (untagged) {
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]) {
+ DPRINTF("Must specify new vlan_id if untagged\n");
+ return -ROCKER_EINVAL;
+ }
+ action->apply.new_vlan_id =
+ rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_NEW_VLAN_ID]);
+ if (1 > ntohs(action->apply.new_vlan_id) ||
+ ntohs(action->apply.new_vlan_id) > 4095) {
+ DPRINTF("New vlan_id (%d) must be between 1 and 4095\n",
+ ntohs(action->apply.new_vlan_id));
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_term_mac(OfDpaFlow *flow, RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ const MACAddr ipv4_mcast = { .a = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 } };
+ const MACAddr ipv4_mask = { .a = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 } };
+ const MACAddr ipv6_mcast = { .a = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 } };
+ const MACAddr ipv6_mask = { .a = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 } };
+ uint32_t port;
+ bool unicast = false;
+ bool multicast = false;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC;
+ key->width = FLOW_KEY_WIDTH(eth.type);
+
+ key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]);
+ if (!fp_port_from_pport(key->in_pport, &port)) {
+ return -ROCKER_EINVAL;
+ }
+ mask->in_pport =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]);
+
+ key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]);
+ if (key->eth.type != htons(0x0800) && key->eth.type != htons(0x86dd)) {
+ return -ROCKER_EINVAL;
+ }
+ mask->eth.type = htons(0xffff);
+
+ memcpy(key->eth.dst.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]),
+ sizeof(key->eth.dst.a));
+ memcpy(mask->eth.dst.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]),
+ sizeof(mask->eth.dst.a));
+
+ if ((key->eth.dst.a[0] & 0x01) == 0x00) {
+ unicast = true;
+ }
+
+ /* only two wildcard rules are acceptable for IPv4 and IPv6 multicast */
+ if (memcmp(key->eth.dst.a, ipv4_mcast.a, sizeof(key->eth.dst.a)) == 0 &&
+ memcmp(mask->eth.dst.a, ipv4_mask.a, sizeof(mask->eth.dst.a)) == 0) {
+ multicast = true;
+ }
+ if (memcmp(key->eth.dst.a, ipv6_mcast.a, sizeof(key->eth.dst.a)) == 0 &&
+ memcmp(mask->eth.dst.a, ipv6_mask.a, sizeof(mask->eth.dst.a)) == 0) {
+ multicast = true;
+ }
+
+ if (!unicast && !multicast) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+ mask->eth.vlan_id =
+ rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]);
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) {
+ action->goto_tbl =
+ rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]);
+
+ if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING &&
+ action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) {
+ return -ROCKER_EINVAL;
+ }
+
+ if (unicast &&
+ action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) {
+ return -ROCKER_EINVAL;
+ }
+
+ if (multicast &&
+ action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) {
+ action->apply.copy_to_cpu =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]);
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_bridging(OfDpaFlow *flow, RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ bool unicast = false;
+ bool dst_mac = false;
+ bool dst_mac_mask = false;
+ enum {
+ BRIDGING_MODE_UNKNOWN,
+ BRIDGING_MODE_VLAN_UCAST,
+ BRIDGING_MODE_VLAN_MCAST,
+ BRIDGING_MODE_VLAN_DFLT,
+ BRIDGING_MODE_TUNNEL_UCAST,
+ BRIDGING_MODE_TUNNEL_MCAST,
+ BRIDGING_MODE_TUNNEL_DFLT,
+ } mode = BRIDGING_MODE_UNKNOWN;
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING;
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) {
+ key->eth.vlan_id =
+ rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+ mask->eth.vlan_id = 0xffff;
+ key->width = FLOW_KEY_WIDTH(eth.vlan_id);
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) {
+ key->tunnel_id =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]);
+ mask->tunnel_id = 0xffffffff;
+ key->width = FLOW_KEY_WIDTH(tunnel_id);
+ }
+
+ /* can't do VLAN bridging and tunnel bridging at same time */
+ if (key->eth.vlan_id && key->tunnel_id) {
+ DPRINTF("can't do VLAN bridging and tunnel bridging at same time\n");
+ return -ROCKER_EINVAL;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) {
+ memcpy(key->eth.dst.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]),
+ sizeof(key->eth.dst.a));
+ key->width = FLOW_KEY_WIDTH(eth.dst);
+ dst_mac = true;
+ unicast = (key->eth.dst.a[0] & 0x01) == 0x00;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) {
+ memcpy(mask->eth.dst.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]),
+ sizeof(mask->eth.dst.a));
+ key->width = FLOW_KEY_WIDTH(eth.dst);
+ dst_mac_mask = true;
+ } else if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) {
+ memcpy(mask->eth.dst.a, ff_mac.a, sizeof(mask->eth.dst.a));
+ }
+
+ if (key->eth.vlan_id) {
+ if (dst_mac && !dst_mac_mask) {
+ mode = unicast ? BRIDGING_MODE_VLAN_UCAST :
+ BRIDGING_MODE_VLAN_MCAST;
+ } else if ((dst_mac && dst_mac_mask) || !dst_mac) {
+ mode = BRIDGING_MODE_VLAN_DFLT;
+ }
+ } else if (key->tunnel_id) {
+ if (dst_mac && !dst_mac_mask) {
+ mode = unicast ? BRIDGING_MODE_TUNNEL_UCAST :
+ BRIDGING_MODE_TUNNEL_MCAST;
+ } else if ((dst_mac && dst_mac_mask) || !dst_mac) {
+ mode = BRIDGING_MODE_TUNNEL_DFLT;
+ }
+ }
+
+ if (mode == BRIDGING_MODE_UNKNOWN) {
+ DPRINTF("Unknown bridging mode\n");
+ return -ROCKER_EINVAL;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) {
+ action->goto_tbl =
+ rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]);
+ if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) {
+ DPRINTF("Briding goto tbl must be ACL policy\n");
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) {
+ action->write.group_id =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]);
+ switch (mode) {
+ case BRIDGING_MODE_VLAN_UCAST:
+ if (ROCKER_GROUP_TYPE_GET(action->write.group_id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) {
+ DPRINTF("Bridging mode vlan ucast needs L2 "
+ "interface group (0x%08x)\n",
+ action->write.group_id);
+ return -ROCKER_EINVAL;
+ }
+ break;
+ case BRIDGING_MODE_VLAN_MCAST:
+ if (ROCKER_GROUP_TYPE_GET(action->write.group_id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) {
+ DPRINTF("Bridging mode vlan mcast needs L2 "
+ "mcast group (0x%08x)\n",
+ action->write.group_id);
+ return -ROCKER_EINVAL;
+ }
+ break;
+ case BRIDGING_MODE_VLAN_DFLT:
+ if (ROCKER_GROUP_TYPE_GET(action->write.group_id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) {
+ DPRINTF("Bridging mode vlan dflt needs L2 "
+ "flood group (0x%08x)\n",
+ action->write.group_id);
+ return -ROCKER_EINVAL;
+ }
+ break;
+ case BRIDGING_MODE_TUNNEL_MCAST:
+ if (ROCKER_GROUP_TYPE_GET(action->write.group_id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) {
+ DPRINTF("Bridging mode tunnel mcast needs L2 "
+ "overlay group (0x%08x)\n",
+ action->write.group_id);
+ return -ROCKER_EINVAL;
+ }
+ break;
+ case BRIDGING_MODE_TUNNEL_DFLT:
+ if (ROCKER_GROUP_TYPE_GET(action->write.group_id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY) {
+ DPRINTF("Bridging mode tunnel dflt needs L2 "
+ "overlay group (0x%08x)\n",
+ action->write.group_id);
+ return -ROCKER_EINVAL;
+ }
+ break;
+ default:
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]) {
+ action->write.tun_log_lport =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_LPORT]);
+ if (mode != BRIDGING_MODE_TUNNEL_UCAST) {
+ DPRINTF("Have tunnel logical port but not "
+ "in bridging tunnel mode\n");
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) {
+ action->apply.copy_to_cpu =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]);
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_unicast_routing(OfDpaFlow *flow,
+ RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ enum {
+ UNICAST_ROUTING_MODE_UNKNOWN,
+ UNICAST_ROUTING_MODE_IPV4,
+ UNICAST_ROUTING_MODE_IPV6,
+ } mode = UNICAST_ROUTING_MODE_UNKNOWN;
+ uint8_t type;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING;
+ key->width = FLOW_KEY_WIDTH(ipv6.addr.dst);
+
+ key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]);
+ switch (ntohs(key->eth.type)) {
+ case 0x0800:
+ mode = UNICAST_ROUTING_MODE_IPV4;
+ break;
+ case 0x86dd:
+ mode = UNICAST_ROUTING_MODE_IPV6;
+ break;
+ default:
+ return -ROCKER_EINVAL;
+ }
+ mask->eth.type = htons(0xffff);
+
+ switch (mode) {
+ case UNICAST_ROUTING_MODE_IPV4:
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) {
+ return -ROCKER_EINVAL;
+ }
+ key->ipv4.addr.dst =
+ rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]);
+ if (ipv4_addr_is_multicast(key->ipv4.addr.dst)) {
+ return -ROCKER_EINVAL;
+ }
+ flow->lpm = of_dpa_mask2prefix(htonl(0xffffffff));
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]) {
+ mask->ipv4.addr.dst =
+ rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP_MASK]);
+ flow->lpm = of_dpa_mask2prefix(mask->ipv4.addr.dst);
+ }
+ break;
+ case UNICAST_ROUTING_MODE_IPV6:
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) {
+ return -ROCKER_EINVAL;
+ }
+ memcpy(&key->ipv6.addr.dst,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]),
+ sizeof(key->ipv6.addr.dst));
+ if (ipv6_addr_is_multicast(&key->ipv6.addr.dst)) {
+ return -ROCKER_EINVAL;
+ }
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]) {
+ memcpy(&mask->ipv6.addr.dst,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6_MASK]),
+ sizeof(mask->ipv6.addr.dst));
+ }
+ break;
+ default:
+ return -ROCKER_EINVAL;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) {
+ action->goto_tbl =
+ rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]);
+ if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) {
+ action->write.group_id =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]);
+ type = ROCKER_GROUP_TYPE_GET(action->write.group_id);
+ if (type != ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE &&
+ type != ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST &&
+ type != ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_multicast_routing(OfDpaFlow *flow,
+ RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ enum {
+ MULTICAST_ROUTING_MODE_UNKNOWN,
+ MULTICAST_ROUTING_MODE_IPV4,
+ MULTICAST_ROUTING_MODE_IPV6,
+ } mode = MULTICAST_ROUTING_MODE_UNKNOWN;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING;
+ key->width = FLOW_KEY_WIDTH(ipv6.addr.dst);
+
+ key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]);
+ switch (ntohs(key->eth.type)) {
+ case 0x0800:
+ mode = MULTICAST_ROUTING_MODE_IPV4;
+ break;
+ case 0x86dd:
+ mode = MULTICAST_ROUTING_MODE_IPV6;
+ break;
+ default:
+ return -ROCKER_EINVAL;
+ }
+
+ key->eth.vlan_id = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+
+ switch (mode) {
+ case MULTICAST_ROUTING_MODE_IPV4:
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) {
+ key->ipv4.addr.src =
+ rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]);
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]) {
+ mask->ipv4.addr.src =
+ rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP_MASK]);
+ }
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IP]) {
+ if (mask->ipv4.addr.src != 0) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->ipv4.addr.dst =
+ rocker_tlv_get_u32(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IP]);
+ if (!ipv4_addr_is_multicast(key->ipv4.addr.dst)) {
+ return -ROCKER_EINVAL;
+ }
+
+ break;
+
+ case MULTICAST_ROUTING_MODE_IPV6:
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) {
+ memcpy(&key->ipv6.addr.src,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]),
+ sizeof(key->ipv6.addr.src));
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]) {
+ memcpy(&mask->ipv6.addr.src,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6_MASK]),
+ sizeof(mask->ipv6.addr.src));
+ }
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_SRC_IPV6]) {
+ if (mask->ipv6.addr.src.addr32[0] != 0 &&
+ mask->ipv6.addr.src.addr32[1] != 0 &&
+ mask->ipv6.addr.src.addr32[2] != 0 &&
+ mask->ipv6.addr.src.addr32[3] != 0) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]) {
+ return -ROCKER_EINVAL;
+ }
+
+ memcpy(&key->ipv6.addr.dst,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_IPV6]),
+ sizeof(key->ipv6.addr.dst));
+ if (!ipv6_addr_is_multicast(&key->ipv6.addr.dst)) {
+ return -ROCKER_EINVAL;
+ }
+
+ break;
+
+ default:
+ return -ROCKER_EINVAL;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]) {
+ action->goto_tbl =
+ rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_GOTO_TABLE_ID]);
+ if (action->goto_tbl != ROCKER_OF_DPA_TABLE_ID_ACL_POLICY) {
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) {
+ action->write.group_id =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]);
+ if (ROCKER_GROUP_TYPE_GET(action->write.group_id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST) {
+ return -ROCKER_EINVAL;
+ }
+ action->write.vlan_id = key->eth.vlan_id;
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_acl_ip(OfDpaFlowKey *key, OfDpaFlowKey *mask,
+ RockerTlv **flow_tlvs)
+{
+ key->width = FLOW_KEY_WIDTH(ip.tos);
+
+ key->ip.proto = 0;
+ key->ip.tos = 0;
+ mask->ip.proto = 0;
+ mask->ip.tos = 0;
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]) {
+ key->ip.proto =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO]);
+ }
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]) {
+ mask->ip.proto =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_PROTO_MASK]);
+ }
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]) {
+ key->ip.tos =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP]);
+ }
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]) {
+ mask->ip.tos =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_DSCP_MASK]);
+ }
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) {
+ key->ip.tos |=
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN]) << 6;
+ }
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) {
+ mask->ip.tos |=
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_IP_ECN_MASK]) << 6;
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_acl(OfDpaFlow *flow, RockerTlv **flow_tlvs)
+{
+ OfDpaFlowKey *key = &flow->key;
+ OfDpaFlowKey *mask = &flow->mask;
+ OfDpaFlowAction *action = &flow->action;
+ enum {
+ ACL_MODE_UNKNOWN,
+ ACL_MODE_IPV4_VLAN,
+ ACL_MODE_IPV6_VLAN,
+ ACL_MODE_IPV4_TENANT,
+ ACL_MODE_IPV6_TENANT,
+ ACL_MODE_NON_IP_VLAN,
+ ACL_MODE_NON_IP_TENANT,
+ ACL_MODE_ANY_VLAN,
+ ACL_MODE_ANY_TENANT,
+ } mode = ACL_MODE_UNKNOWN;
+ int err = ROCKER_OK;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]) {
+ return -ROCKER_EINVAL;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID] &&
+ flow_tlvs[ROCKER_TLV_OF_DPA_TUNNEL_ID]) {
+ return -ROCKER_EINVAL;
+ }
+
+ key->tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
+ key->width = FLOW_KEY_WIDTH(eth.type);
+
+ key->in_pport = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT]);
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]) {
+ mask->in_pport =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IN_PPORT_MASK]);
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) {
+ memcpy(key->eth.src.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]),
+ sizeof(key->eth.src.a));
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]) {
+ memcpy(mask->eth.src.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC_MASK]),
+ sizeof(mask->eth.src.a));
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) {
+ memcpy(key->eth.dst.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]),
+ sizeof(key->eth.dst.a));
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]) {
+ memcpy(mask->eth.dst.a,
+ rocker_tlv_data(flow_tlvs[ROCKER_TLV_OF_DPA_DST_MAC_MASK]),
+ sizeof(mask->eth.dst.a));
+ }
+
+ key->eth.type = rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_ETHERTYPE]);
+ if (key->eth.type) {
+ mask->eth.type = 0xffff;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) {
+ key->eth.vlan_id =
+ rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]) {
+ mask->eth.vlan_id =
+ rocker_tlv_get_u16(flow_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID_MASK]);
+ }
+
+ switch (ntohs(key->eth.type)) {
+ case 0x0000:
+ mode = (key->eth.vlan_id) ? ACL_MODE_ANY_VLAN : ACL_MODE_ANY_TENANT;
+ break;
+ case 0x0800:
+ mode = (key->eth.vlan_id) ? ACL_MODE_IPV4_VLAN : ACL_MODE_IPV4_TENANT;
+ break;
+ case 0x86dd:
+ mode = (key->eth.vlan_id) ? ACL_MODE_IPV6_VLAN : ACL_MODE_IPV6_TENANT;
+ break;
+ default:
+ mode = (key->eth.vlan_id) ? ACL_MODE_NON_IP_VLAN :
+ ACL_MODE_NON_IP_TENANT;
+ break;
+ }
+
+ /* XXX only supporting VLAN modes for now */
+ if (mode != ACL_MODE_IPV4_VLAN &&
+ mode != ACL_MODE_IPV6_VLAN &&
+ mode != ACL_MODE_NON_IP_VLAN &&
+ mode != ACL_MODE_ANY_VLAN) {
+ return -ROCKER_EINVAL;
+ }
+
+ switch (ntohs(key->eth.type)) {
+ case 0x0800:
+ case 0x86dd:
+ err = of_dpa_cmd_add_acl_ip(key, mask, flow_tlvs);
+ break;
+ }
+
+ if (err) {
+ return err;
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) {
+ action->write.group_id =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]);
+ }
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]) {
+ action->apply.copy_to_cpu =
+ rocker_tlv_get_u8(flow_tlvs[ROCKER_TLV_OF_DPA_COPY_CPU_ACTION]);
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_flow_add_mod(OfDpa *of_dpa, OfDpaFlow *flow,
+ RockerTlv **flow_tlvs)
+{
+ enum rocker_of_dpa_table_id tbl;
+ int err = ROCKER_OK;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY] ||
+ !flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]) {
+ return -ROCKER_EINVAL;
+ }
+
+ tbl = rocker_tlv_get_le16(flow_tlvs[ROCKER_TLV_OF_DPA_TABLE_ID]);
+ flow->priority = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_PRIORITY]);
+ flow->hardtime = rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_HARDTIME]);
+
+ if (flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]) {
+ if (tbl == ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT ||
+ tbl == ROCKER_OF_DPA_TABLE_ID_VLAN ||
+ tbl == ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC) {
+ return -ROCKER_EINVAL;
+ }
+ flow->idletime =
+ rocker_tlv_get_le32(flow_tlvs[ROCKER_TLV_OF_DPA_IDLETIME]);
+ }
+
+ switch (tbl) {
+ case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT:
+ err = of_dpa_cmd_add_ig_port(flow, flow_tlvs);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_VLAN:
+ err = of_dpa_cmd_add_vlan(flow, flow_tlvs);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC:
+ err = of_dpa_cmd_add_term_mac(flow, flow_tlvs);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_BRIDGING:
+ err = of_dpa_cmd_add_bridging(flow, flow_tlvs);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING:
+ err = of_dpa_cmd_add_unicast_routing(flow, flow_tlvs);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING:
+ err = of_dpa_cmd_add_multicast_routing(flow, flow_tlvs);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY:
+ err = of_dpa_cmd_add_acl(flow, flow_tlvs);
+ break;
+ }
+
+ return err;
+}
+
+static int of_dpa_cmd_flow_add(OfDpa *of_dpa, uint64_t cookie,
+ RockerTlv **flow_tlvs)
+{
+ OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie);
+ int err = ROCKER_OK;
+
+ if (flow) {
+ return -ROCKER_EEXIST;
+ }
+
+ flow = of_dpa_flow_alloc(cookie);
+ if (!flow) {
+ return -ROCKER_ENOMEM;
+ }
+
+ err = of_dpa_cmd_flow_add_mod(of_dpa, flow, flow_tlvs);
+ if (err) {
+ g_free(flow);
+ return err;
+ }
+
+ return of_dpa_flow_add(of_dpa, flow);
+}
+
+static int of_dpa_cmd_flow_mod(OfDpa *of_dpa, uint64_t cookie,
+ RockerTlv **flow_tlvs)
+{
+ OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie);
+
+ if (!flow) {
+ return -ROCKER_ENOENT;
+ }
+
+ return of_dpa_cmd_flow_add_mod(of_dpa, flow, flow_tlvs);
+}
+
+static int of_dpa_cmd_flow_del(OfDpa *of_dpa, uint64_t cookie)
+{
+ OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie);
+
+ if (!flow) {
+ return -ROCKER_ENOENT;
+ }
+
+ of_dpa_flow_del(of_dpa, flow);
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_flow_get_stats(OfDpa *of_dpa, uint64_t cookie,
+ struct desc_info *info, char *buf)
+{
+ OfDpaFlow *flow = of_dpa_flow_find(of_dpa, cookie);
+ size_t tlv_size;
+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) / 1000;
+ int pos;
+
+ if (!flow) {
+ return -ROCKER_ENOENT;
+ }
+
+ tlv_size = rocker_tlv_total_size(sizeof(uint32_t)) + /* duration */
+ rocker_tlv_total_size(sizeof(uint64_t)) + /* rx_pkts */
+ rocker_tlv_total_size(sizeof(uint64_t)); /* tx_ptks */
+
+ if (tlv_size > desc_buf_size(info)) {
+ return -ROCKER_EMSGSIZE;
+ }
+
+ pos = 0;
+ rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION,
+ (int32_t)(now - flow->stats.install_time));
+ rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS,
+ flow->stats.rx_pkts);
+ rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS,
+ flow->stats.tx_pkts);
+
+ return desc_set_buf(info, tlv_size);
+}
+
+static int of_dpa_flow_cmd(OfDpa *of_dpa, struct desc_info *info,
+ char *buf, uint16_t cmd,
+ RockerTlv **flow_tlvs)
+{
+ uint64_t cookie;
+
+ if (!flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]) {
+ return -ROCKER_EINVAL;
+ }
+
+ cookie = rocker_tlv_get_le64(flow_tlvs[ROCKER_TLV_OF_DPA_COOKIE]);
+
+ switch (cmd) {
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
+ return of_dpa_cmd_flow_add(of_dpa, cookie, flow_tlvs);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
+ return of_dpa_cmd_flow_mod(of_dpa, cookie, flow_tlvs);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
+ return of_dpa_cmd_flow_del(of_dpa, cookie);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
+ return of_dpa_cmd_flow_get_stats(of_dpa, cookie, info, buf);
+ }
+
+ return -ROCKER_ENOTSUP;
+}
+
+static int of_dpa_cmd_add_l2_interface(OfDpaGroup *group,
+ RockerTlv **group_tlvs)
+{
+ if (!group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT] ||
+ !group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]) {
+ return -ROCKER_EINVAL;
+ }
+
+ group->l2_interface.out_pport =
+ rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_OUT_PPORT]);
+ group->l2_interface.pop_vlan =
+ rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_POP_VLAN]);
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_l2_rewrite(OfDpa *of_dpa, OfDpaGroup *group,
+ RockerTlv **group_tlvs)
+{
+ OfDpaGroup *l2_interface_group;
+
+ if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) {
+ return -ROCKER_EINVAL;
+ }
+
+ group->l2_rewrite.group_id =
+ rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]);
+
+ l2_interface_group = of_dpa_group_find(of_dpa, group->l2_rewrite.group_id);
+ if (!l2_interface_group ||
+ ROCKER_GROUP_TYPE_GET(l2_interface_group->id) !=
+ ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) {
+ DPRINTF("l2 rewrite group needs a valid l2 interface group\n");
+ return -ROCKER_EINVAL;
+ }
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) {
+ memcpy(group->l2_rewrite.src_mac.a,
+ rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]),
+ sizeof(group->l2_rewrite.src_mac.a));
+ }
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) {
+ memcpy(group->l2_rewrite.dst_mac.a,
+ rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]),
+ sizeof(group->l2_rewrite.dst_mac.a));
+ }
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) {
+ group->l2_rewrite.vlan_id =
+ rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+ if (ROCKER_GROUP_VLAN_GET(l2_interface_group->id) !=
+ (ntohs(group->l2_rewrite.vlan_id) & VLAN_VID_MASK)) {
+ DPRINTF("Set VLAN ID must be same as L2 interface group\n");
+ return -ROCKER_EINVAL;
+ }
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group,
+ RockerTlv **group_tlvs)
+{
+ OfDpaGroup *l2_group;
+ RockerTlv **tlvs;
+ int err;
+ int i;
+
+ if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT] ||
+ !group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]) {
+ return -ROCKER_EINVAL;
+ }
+
+ group->l2_flood.group_count =
+ rocker_tlv_get_le16(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_COUNT]);
+
+ tlvs = g_malloc0((group->l2_flood.group_count + 1) *
+ sizeof(RockerTlv *));
+ if (!tlvs) {
+ return -ROCKER_ENOMEM;
+ }
+
+ g_free(group->l2_flood.group_ids);
+ group->l2_flood.group_ids =
+ g_malloc0(group->l2_flood.group_count * sizeof(uint32_t));
+ if (!group->l2_flood.group_ids) {
+ err = -ROCKER_ENOMEM;
+ goto err_out;
+ }
+
+ rocker_tlv_parse_nested(tlvs, group->l2_flood.group_count,
+ group_tlvs[ROCKER_TLV_OF_DPA_GROUP_IDS]);
+
+ for (i = 0; i < group->l2_flood.group_count; i++) {
+ group->l2_flood.group_ids[i] = rocker_tlv_get_le32(tlvs[i + 1]);
+ }
+
+ /* All of the L2 interface groups referenced by the L2 flood
+ * must have same VLAN
+ */
+
+ for (i = 0; i < group->l2_flood.group_count; i++) {
+ l2_group = of_dpa_group_find(of_dpa, group->l2_flood.group_ids[i]);
+ if (!l2_group) {
+ continue;
+ }
+ if ((ROCKER_GROUP_TYPE_GET(l2_group->id) ==
+ ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) &&
+ (ROCKER_GROUP_VLAN_GET(l2_group->id) !=
+ ROCKER_GROUP_VLAN_GET(group->id))) {
+ DPRINTF("l2 interface group 0x%08x VLAN doesn't match l2 "
+ "flood group 0x%08x\n",
+ group->l2_flood.group_ids[i], group->id);
+ err = -ROCKER_EINVAL;
+ goto err_out;
+ }
+ }
+
+ g_free(tlvs);
+ return ROCKER_OK;
+
+err_out:
+ group->l2_flood.group_count = 0;
+ g_free(group->l2_flood.group_ids);
+ g_free(tlvs);
+
+ return err;
+}
+
+static int of_dpa_cmd_add_l3_unicast(OfDpaGroup *group, RockerTlv **group_tlvs)
+{
+ if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]) {
+ return -ROCKER_EINVAL;
+ }
+
+ group->l3_unicast.group_id =
+ rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID_LOWER]);
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]) {
+ memcpy(group->l3_unicast.src_mac.a,
+ rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_SRC_MAC]),
+ sizeof(group->l3_unicast.src_mac.a));
+ }
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]) {
+ memcpy(group->l3_unicast.dst_mac.a,
+ rocker_tlv_data(group_tlvs[ROCKER_TLV_OF_DPA_DST_MAC]),
+ sizeof(group->l3_unicast.dst_mac.a));
+ }
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]) {
+ group->l3_unicast.vlan_id =
+ rocker_tlv_get_u16(group_tlvs[ROCKER_TLV_OF_DPA_VLAN_ID]);
+ }
+
+ if (group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]) {
+ group->l3_unicast.ttl_check =
+ rocker_tlv_get_u8(group_tlvs[ROCKER_TLV_OF_DPA_TTL_CHECK]);
+ }
+
+ return ROCKER_OK;
+}
+
+static int of_dpa_cmd_group_do(OfDpa *of_dpa, uint32_t group_id,
+ OfDpaGroup *group, RockerTlv **group_tlvs)
+{
+ uint8_t type = ROCKER_GROUP_TYPE_GET(group_id);
+
+ switch (type) {
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE:
+ return of_dpa_cmd_add_l2_interface(group, group_tlvs);
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE:
+ return of_dpa_cmd_add_l2_rewrite(of_dpa, group, group_tlvs);
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
+ /* Treat L2 multicast group same as a L2 flood group */
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
+ return of_dpa_cmd_add_l2_flood(of_dpa, group, group_tlvs);
+ case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST:
+ return of_dpa_cmd_add_l3_unicast(group, group_tlvs);
+ }
+
+ return -ROCKER_ENOTSUP;
+}
+
+static int of_dpa_cmd_group_add(OfDpa *of_dpa, uint32_t group_id,
+ RockerTlv **group_tlvs)
+{
+ OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id);
+ int err;
+
+ if (group) {
+ return -ROCKER_EEXIST;
+ }
+
+ group = of_dpa_group_alloc(group_id);
+ if (!group) {
+ return -ROCKER_ENOMEM;
+ }
+
+ err = of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs);
+ if (err) {
+ goto err_cmd_add;
+ }
+
+ err = of_dpa_group_add(of_dpa, group);
+ if (err) {
+ goto err_cmd_add;
+ }
+
+ return ROCKER_OK;
+
+err_cmd_add:
+ g_free(group);
+ return err;
+}
+
+static int of_dpa_cmd_group_mod(OfDpa *of_dpa, uint32_t group_id,
+ RockerTlv **group_tlvs)
+{
+ OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id);
+
+ if (!group) {
+ return -ROCKER_ENOENT;
+ }
+
+ return of_dpa_cmd_group_do(of_dpa, group_id, group, group_tlvs);
+}
+
+static int of_dpa_cmd_group_del(OfDpa *of_dpa, uint32_t group_id)
+{
+ OfDpaGroup *group = of_dpa_group_find(of_dpa, group_id);
+
+ if (!group) {
+ return -ROCKER_ENOENT;
+ }
+
+ return of_dpa_group_del(of_dpa, group);
+}
+
+static int of_dpa_cmd_group_get_stats(OfDpa *of_dpa, uint32_t group_id,
+ struct desc_info *info, char *buf)
+{
+ return -ROCKER_ENOTSUP;
+}
+
+static int of_dpa_group_cmd(OfDpa *of_dpa, struct desc_info *info,
+ char *buf, uint16_t cmd, RockerTlv **group_tlvs)
+{
+ uint32_t group_id;
+
+ if (!group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]) {
+ return -ROCKER_EINVAL;
+ }
+
+ group_id = rocker_tlv_get_le32(group_tlvs[ROCKER_TLV_OF_DPA_GROUP_ID]);
+
+ switch (cmd) {
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
+ return of_dpa_cmd_group_add(of_dpa, group_id, group_tlvs);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
+ return of_dpa_cmd_group_mod(of_dpa, group_id, group_tlvs);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
+ return of_dpa_cmd_group_del(of_dpa, group_id);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
+ return of_dpa_cmd_group_get_stats(of_dpa, group_id, info, buf);
+ }
+
+ return -ROCKER_ENOTSUP;
+}
+
+static int of_dpa_cmd(World *world, struct desc_info *info,
+ char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv)
+{
+ OfDpa *of_dpa = world_private(world);
+ RockerTlv *tlvs[ROCKER_TLV_OF_DPA_MAX + 1];
+
+ rocker_tlv_parse_nested(tlvs, ROCKER_TLV_OF_DPA_MAX, cmd_info_tlv);
+
+ switch (cmd) {
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
+ return of_dpa_flow_cmd(of_dpa, info, buf, cmd, tlvs);
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
+ case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
+ return of_dpa_group_cmd(of_dpa, info, buf, cmd, tlvs);
+ }
+
+ return -ROCKER_ENOTSUP;
+}
+
+static gboolean rocker_int64_equal(gconstpointer v1, gconstpointer v2)
+{
+ return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint rocker_int64_hash(gconstpointer v)
+{
+ return (guint)*(const uint64_t *)v;
+}
+
+static int of_dpa_init(World *world)
+{
+ OfDpa *of_dpa = world_private(world);
+
+ of_dpa->world = world;
+
+ of_dpa->flow_tbl = g_hash_table_new_full(rocker_int64_hash,
+ rocker_int64_equal,
+ NULL, g_free);
+ if (!of_dpa->flow_tbl) {
+ return -ENOMEM;
+ }
+
+ of_dpa->group_tbl = g_hash_table_new_full(g_int_hash, g_int_equal,
+ NULL, g_free);
+ if (!of_dpa->group_tbl) {
+ goto err_group_tbl;
+ }
+
+ /* XXX hardcode some artificial table max values */
+ of_dpa->flow_tbl_max_size = 100;
+ of_dpa->group_tbl_max_size = 100;
+
+ return 0;
+
+err_group_tbl:
+ g_hash_table_destroy(of_dpa->flow_tbl);
+ return -ENOMEM;
+}
+
+static void of_dpa_uninit(World *world)
+{
+ OfDpa *of_dpa = world_private(world);
+
+ g_hash_table_destroy(of_dpa->group_tbl);
+ g_hash_table_destroy(of_dpa->flow_tbl);
+}
+
+static WorldOps of_dpa_ops = {
+ .init = of_dpa_init,
+ .uninit = of_dpa_uninit,
+ .ig = of_dpa_ig,
+ .cmd = of_dpa_cmd,
+};
+
+World *of_dpa_world_alloc(Rocker *r)
+{
+ return world_alloc(r, sizeof(OfDpa), ROCKER_WORLD_TYPE_OF_DPA, &of_dpa_ops);
+}
diff --git a/hw/net/rocker/rocker_of_dpa.h b/hw/net/rocker/rocker_of_dpa.h
new file mode 100644
index 0000000000..f3f6d77807
--- /dev/null
+++ b/hw/net/rocker/rocker_of_dpa.h
@@ -0,0 +1,22 @@
+/*
+ * QEMU rocker switch emulation - OF-DPA flow processing support
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ROCKER_OF_DPA_H_
+#define _ROCKER_OF_DPA_H_
+
+World *of_dpa_world_alloc(Rocker *r);
+
+#endif /* _ROCKER_OF_DPA_H_ */
diff --git a/hw/net/rocker/rocker_tlv.h b/hw/net/rocker/rocker_tlv.h
new file mode 100644
index 0000000000..e3c4ab6793
--- /dev/null
+++ b/hw/net/rocker/rocker_tlv.h
@@ -0,0 +1,244 @@
+/*
+ * QEMU rocker switch emulation - TLV parsing and composing
+ *
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ROCKER_TLV_H_
+#define _ROCKER_TLV_H_
+
+#define ROCKER_TLV_ALIGNTO 8U
+#define ROCKER_TLV_ALIGN(len) \
+ (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1))
+#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(RockerTlv))
+
+/*
+ * <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) --->
+ * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (RockerTlv) | ing | | ing |
+ * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+
+ * <--------------------------- tlv->len -------------------------->
+ */
+
+static inline RockerTlv *rocker_tlv_next(const RockerTlv *tlv, int *remaining)
+{
+ int totlen = ROCKER_TLV_ALIGN(le16_to_cpu(tlv->len));
+
+ *remaining -= totlen;
+ return (RockerTlv *) ((char *) tlv + totlen);
+}
+
+static inline int rocker_tlv_ok(const RockerTlv *tlv, int remaining)
+{
+ return remaining >= (int) ROCKER_TLV_HDRLEN &&
+ le16_to_cpu(tlv->len) >= ROCKER_TLV_HDRLEN &&
+ le16_to_cpu(tlv->len) <= remaining;
+}
+
+#define rocker_tlv_for_each(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ rocker_tlv_ok(pos, rem); \
+ pos = rocker_tlv_next(pos, &(rem)))
+
+#define rocker_tlv_for_each_nested(pos, tlv, rem) \
+ rocker_tlv_for_each(pos, rocker_tlv_data(tlv), rocker_tlv_len(tlv), rem)
+
+static inline int rocker_tlv_size(int payload)
+{
+ return ROCKER_TLV_HDRLEN + payload;
+}
+
+static inline int rocker_tlv_total_size(int payload)
+{
+ return ROCKER_TLV_ALIGN(rocker_tlv_size(payload));
+}
+
+static inline int rocker_tlv_padlen(int payload)
+{
+ return rocker_tlv_total_size(payload) - rocker_tlv_size(payload);
+}
+
+static inline int rocker_tlv_type(const RockerTlv *tlv)
+{
+ return le32_to_cpu(tlv->type);
+}
+
+static inline void *rocker_tlv_data(const RockerTlv *tlv)
+{
+ return (char *) tlv + ROCKER_TLV_HDRLEN;
+}
+
+static inline int rocker_tlv_len(const RockerTlv *tlv)
+{
+ return le16_to_cpu(tlv->len) - ROCKER_TLV_HDRLEN;
+}
+
+static inline uint8_t rocker_tlv_get_u8(const RockerTlv *tlv)
+{
+ return *(uint8_t *) rocker_tlv_data(tlv);
+}
+
+static inline uint16_t rocker_tlv_get_u16(const RockerTlv *tlv)
+{
+ return *(uint16_t *) rocker_tlv_data(tlv);
+}
+
+static inline uint32_t rocker_tlv_get_u32(const RockerTlv *tlv)
+{
+ return *(uint32_t *) rocker_tlv_data(tlv);
+}
+
+static inline uint64_t rocker_tlv_get_u64(const RockerTlv *tlv)
+{
+ return *(uint64_t *) rocker_tlv_data(tlv);
+}
+
+static inline uint16_t rocker_tlv_get_le16(const RockerTlv *tlv)
+{
+ return le16_to_cpup((uint16_t *) rocker_tlv_data(tlv));
+}
+
+static inline uint32_t rocker_tlv_get_le32(const RockerTlv *tlv)
+{
+ return le32_to_cpup((uint32_t *) rocker_tlv_data(tlv));
+}
+
+static inline uint64_t rocker_tlv_get_le64(const RockerTlv *tlv)
+{
+ return le64_to_cpup((uint64_t *) rocker_tlv_data(tlv));
+}
+
+static inline void rocker_tlv_parse(RockerTlv **tb, int maxtype,
+ const char *buf, int buf_len)
+{
+ const RockerTlv *tlv;
+ const RockerTlv *head = (const RockerTlv *) buf;
+ int rem;
+
+ memset(tb, 0, sizeof(RockerTlv *) * (maxtype + 1));
+
+ rocker_tlv_for_each(tlv, head, buf_len, rem) {
+ uint32_t type = rocker_tlv_type(tlv);
+
+ if (type > 0 && type <= maxtype) {
+ tb[type] = (RockerTlv *) tlv;
+ }
+ }
+}
+
+static inline void rocker_tlv_parse_nested(RockerTlv **tb, int maxtype,
+ const RockerTlv *tlv)
+{
+ rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv), rocker_tlv_len(tlv));
+}
+
+static inline RockerTlv *rocker_tlv_start(char *buf, int buf_pos)
+{
+ return (RockerTlv *) (buf + buf_pos);
+}
+
+static inline void rocker_tlv_put_iov(char *buf, int *buf_pos,
+ int type, const struct iovec *iov,
+ const unsigned int iovcnt)
+{
+ size_t len = iov_size(iov, iovcnt);
+ int total_size = rocker_tlv_total_size(len);
+ RockerTlv *tlv;
+
+ tlv = rocker_tlv_start(buf, *buf_pos);
+ *buf_pos += total_size;
+ tlv->type = cpu_to_le32(type);
+ tlv->len = cpu_to_le16(rocker_tlv_size(len));
+ iov_to_buf(iov, iovcnt, 0, rocker_tlv_data(tlv), len);
+ memset((char *) tlv + le16_to_cpu(tlv->len), 0, rocker_tlv_padlen(len));
+}
+
+static inline void rocker_tlv_put(char *buf, int *buf_pos,
+ int type, int len, void *data)
+{
+ struct iovec iov = {
+ .iov_base = data,
+ .iov_len = len,
+ };
+
+ rocker_tlv_put_iov(buf, buf_pos, type, &iov, 1);
+}
+
+static inline void rocker_tlv_put_u8(char *buf, int *buf_pos,
+ int type, uint8_t value)
+{
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint8_t), &value);
+}
+
+static inline void rocker_tlv_put_u16(char *buf, int *buf_pos,
+ int type, uint16_t value)
+{
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value);
+}
+
+static inline void rocker_tlv_put_u32(char *buf, int *buf_pos,
+ int type, uint32_t value)
+{
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value);
+}
+
+static inline void rocker_tlv_put_u64(char *buf, int *buf_pos,
+ int type, uint64_t value)
+{
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value);
+}
+
+static inline void rocker_tlv_put_le16(char *buf, int *buf_pos,
+ int type, uint16_t value)
+{
+ value = cpu_to_le16(value);
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint16_t), &value);
+}
+
+static inline void rocker_tlv_put_le32(char *buf, int *buf_pos,
+ int type, uint32_t value)
+{
+ value = cpu_to_le32(value);
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint32_t), &value);
+}
+
+static inline void rocker_tlv_put_le64(char *buf, int *buf_pos,
+ int type, uint64_t value)
+{
+ value = cpu_to_le64(value);
+ rocker_tlv_put(buf, buf_pos, type, sizeof(uint64_t), &value);
+}
+
+static inline RockerTlv *rocker_tlv_nest_start(char *buf, int *buf_pos,
+ int type)
+{
+ RockerTlv *start = rocker_tlv_start(buf, *buf_pos);
+
+ rocker_tlv_put(buf, buf_pos, type, 0, NULL);
+ return start;
+}
+
+static inline void rocker_tlv_nest_end(char *buf, int *buf_pos,
+ RockerTlv *start)
+{
+ start->len = (char *) rocker_tlv_start(buf, *buf_pos) - (char *) start;
+}
+
+static inline void rocker_tlv_nest_cancel(char *buf, int *buf_pos,
+ RockerTlv *start)
+{
+ *buf_pos = (char *) start - buf;
+}
+
+#endif
diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c
new file mode 100644
index 0000000000..b991e871d3
--- /dev/null
+++ b/hw/net/rocker/rocker_world.c
@@ -0,0 +1,106 @@
+/*
+ * QEMU rocker switch emulation - switch worlds
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "qemu/iov.h"
+
+#include "rocker.h"
+#include "rocker_world.h"
+
+struct world {
+ Rocker *r;
+ enum rocker_world_type type;
+ WorldOps *ops;
+};
+
+ssize_t world_ingress(World *world, uint32_t pport,
+ const struct iovec *iov, int iovcnt)
+{
+ if (world->ops->ig) {
+ return world->ops->ig(world, pport, iov, iovcnt);
+ }
+
+ return iov_size(iov, iovcnt);
+}
+
+int world_do_cmd(World *world, DescInfo *info,
+ char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv)
+{
+ if (world->ops->cmd) {
+ return world->ops->cmd(world, info, buf, cmd, cmd_info_tlv);
+ }
+
+ return -ROCKER_ENOTSUP;
+}
+
+World *world_alloc(Rocker *r, size_t sizeof_private,
+ enum rocker_world_type type, WorldOps *ops)
+{
+ World *w = g_malloc0(sizeof(World) + sizeof_private);
+
+ if (w) {
+ w->r = r;
+ w->type = type;
+ w->ops = ops;
+ if (w->ops->init) {
+ w->ops->init(w);
+ }
+ }
+
+ return w;
+}
+
+void world_free(World *world)
+{
+ if (world->ops->uninit) {
+ world->ops->uninit(world);
+ }
+ g_free(world);
+}
+
+void world_reset(World *world)
+{
+ if (world->ops->uninit) {
+ world->ops->uninit(world);
+ }
+ if (world->ops->init) {
+ world->ops->init(world);
+ }
+}
+
+void *world_private(World *world)
+{
+ return world + 1;
+}
+
+Rocker *world_rocker(World *world)
+{
+ return world->r;
+}
+
+enum rocker_world_type world_type(World *world)
+{
+ return world->type;
+}
+
+const char *world_name(World *world)
+{
+ switch (world->type) {
+ case ROCKER_WORLD_TYPE_OF_DPA:
+ return "OF_DPA";
+ default:
+ return "unknown";
+ }
+}
diff --git a/hw/net/rocker/rocker_world.h b/hw/net/rocker/rocker_world.h
new file mode 100644
index 0000000000..18d277b927
--- /dev/null
+++ b/hw/net/rocker/rocker_world.h
@@ -0,0 +1,60 @@
+/*
+ * QEMU rocker switch emulation - switch worlds
+ *
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ROCKER_WORLD_H_
+#define _ROCKER_WORLD_H_
+
+#include "rocker_hw.h"
+
+enum rocker_world_type {
+ ROCKER_WORLD_TYPE_OF_DPA = ROCKER_PORT_MODE_OF_DPA,
+ ROCKER_WORLD_TYPE_MAX,
+};
+
+typedef int (world_init)(World *world);
+typedef void (world_uninit)(World *world);
+typedef ssize_t (world_ig)(World *world, uint32_t pport,
+ const struct iovec *iov, int iovcnt);
+typedef int (world_cmd)(World *world, DescInfo *info,
+ char *buf, uint16_t cmd,
+ RockerTlv *cmd_info_tlv);
+
+typedef struct world_ops {
+ world_init *init;
+ world_uninit *uninit;
+ world_ig *ig;
+ world_cmd *cmd;
+} WorldOps;
+
+ssize_t world_ingress(World *world, uint32_t pport,
+ const struct iovec *iov, int iovcnt);
+int world_do_cmd(World *world, DescInfo *info,
+ char *buf, uint16_t cmd, RockerTlv *cmd_info_tlv);
+
+World *world_alloc(Rocker *r, size_t sizeof_private,
+ enum rocker_world_type type, WorldOps *ops);
+void world_free(World *world);
+void world_reset(World *world);
+
+void *world_private(World *world);
+Rocker *world_rocker(World *world);
+
+enum rocker_world_type world_type(World *world);
+const char *world_name(World *world);
+
+World *rocker_get_world(Rocker *r, enum rocker_world_type type);
+
+#endif /* _ROCKER_WORLD_H_ */
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index cc252edeff..3af6faf4c8 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -226,12 +226,6 @@ static void rxfilter_notify(NetClientState *nc)
}
}
-static char *mac_strdup_printf(const uint8_t *mac)
-{
- return g_strdup_printf("%.2x:%.2x:%.2x:%.2x:%.2x:%.2x", mac[0],
- mac[1], mac[2], mac[3], mac[4], mac[5]);
-}
-
static intList *get_vlan_table(VirtIONet *n)
{
intList *list, *entry;
@@ -284,12 +278,12 @@ static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
info->multicast_overflow = n->mac_table.multi_overflow;
info->unicast_overflow = n->mac_table.uni_overflow;
- info->main_mac = mac_strdup_printf(n->mac);
+ info->main_mac = qemu_mac_strdup_printf(n->mac);
str_list = NULL;
for (i = 0; i < n->mac_table.first_multi; i++) {
entry = g_malloc0(sizeof(*entry));
- entry->value = mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
+ entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
entry->next = str_list;
str_list = entry;
}
@@ -298,7 +292,7 @@ static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
str_list = NULL;
for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
entry = g_malloc0(sizeof(*entry));
- entry->value = mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
+ entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
entry->next = str_list;
str_list = entry;
}
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d4ffead48a..5d050c830f 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -88,6 +88,7 @@
#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003
#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004
#define PCI_DEVICE_ID_REDHAT_TEST 0x0005
+#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006
#define PCI_DEVICE_ID_REDHAT_SDHCI 0x0007
#define PCI_DEVICE_ID_REDHAT_PCIE_HOST 0x0008
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d7be386849..c6de71030b 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -23,6 +23,7 @@
#define PCI_CLASS_STORAGE_OTHER 0x0180
#define PCI_CLASS_NETWORK_ETHERNET 0x0200
+#define PCI_CLASS_NETWORK_OTHER 0x0280
#define PCI_CLASS_DISPLAY_VGA 0x0300
#define PCI_CLASS_DISPLAY_OTHER 0x0380
diff --git a/include/net/net.h b/include/net/net.h
index 50ffcb9281..e66ca03bf3 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -97,6 +97,7 @@ typedef struct NICState {
bool peer_deleted;
} NICState;
+char *qemu_mac_strdup_printf(const uint8_t *macaddr);
NetClientState *qemu_find_netdev(const char *id);
int qemu_find_net_clients_except(const char *id, NetClientState **ncs,
NetClientOptionsKind type, int max);
diff --git a/net/net.c b/net/net.c
index 0be084dfb4..7427f6a65a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -151,6 +151,13 @@ int parse_host_port(struct sockaddr_in *saddr, const char *str)
return 0;
}
+char *qemu_mac_strdup_printf(const uint8_t *macaddr)
+{
+ return g_strdup_printf("%.2x:%.2x:%.2x:%.2x:%.2x:%.2x",
+ macaddr[0], macaddr[1], macaddr[2],
+ macaddr[3], macaddr[4], macaddr[5]);
+}
+
void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6])
{
snprintf(nc->info_str, sizeof(nc->info_str),
diff --git a/tests/rocker/README b/tests/rocker/README
new file mode 100644
index 0000000000..531e6730c0
--- /dev/null
+++ b/tests/rocker/README
@@ -0,0 +1,5 @@
+Tests require simp (simple network simulator) found here:
+
+https://github.com/scottfeldman/simp
+
+Run 'all' to run all tests.
diff --git a/tests/rocker/all b/tests/rocker/all
new file mode 100755
index 0000000000..d5ae9632a2
--- /dev/null
+++ b/tests/rocker/all
@@ -0,0 +1,19 @@
+echo -n "Running port test... "
+./port
+if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi
+
+echo -n "Running bridge test... "
+./bridge
+if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi
+
+echo -n "Running bridge STP test... "
+./bridge-stp
+if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi
+
+echo -n "Running bridge VLAN test... "
+./bridge-vlan
+if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi
+
+echo -n "Running bridge VLAN STP test... "
+./bridge-vlan-stp
+if [ $? -eq 0 ]; then echo "pass"; else echo "FAILED"; exit 1; fi
diff --git a/tests/rocker/bridge b/tests/rocker/bridge
new file mode 100755
index 0000000000..7a03f9a227
--- /dev/null
+++ b/tests/rocker/bridge
@@ -0,0 +1,48 @@
+simp destroy ".*"
+simp create -o sw1:rocker:sw1 tut tut.dot
+simp start tut
+sleep 10
+while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+
+# configure a 2-port bridge
+
+simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0"
+
+# turn off vlan default_pvid on br0
+
+simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null"
+
+# turn off learning and flooding in SW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off"
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off"
+
+# turn on learning in HW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self"
+
+# bring up bridge and ports
+
+simp ssh tut sw1 --cmd "sudo ifconfig br0 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp1 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp2 up"
+simp ssh tut sw1 --cmd "sudo ifconfig br0 11.0.0.3/24"
+
+# config IP on hosts
+
+simp ssh tut h1 --cmd "sudo ifconfig swp1 11.0.0.1/24"
+simp ssh tut h2 --cmd "sudo ifconfig swp1 11.0.0.2/24"
+
+# test...
+
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+if [ $? -ne 0 ]; then exit 1; fi
+simp ssh tut h1 --cmd "ping -c10 11.0.0.3 >/dev/null"
diff --git a/tests/rocker/bridge-stp b/tests/rocker/bridge-stp
new file mode 100755
index 0000000000..4a111a17d3
--- /dev/null
+++ b/tests/rocker/bridge-stp
@@ -0,0 +1,57 @@
+simp destroy ".*"
+simp create -o sw1:rocker:sw1 tut tut.dot
+simp start tut
+sleep 10
+while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+
+# configure a 2-port bridge
+
+simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge"
+simp ssh tut sw1 --cmd "sudo brctl stp br0 on"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0"
+
+# turn off vlan default_pvid on br0
+
+simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null"
+
+# turn off learning and flooding in SW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off"
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off"
+
+# turn on learning in HW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self"
+
+# config IP on hosts
+
+simp ssh tut h1 --cmd "sudo ifconfig swp1 11.0.0.1/24"
+simp ssh tut h2 --cmd "sudo ifconfig swp1 11.0.0.2/24"
+
+# bring up bridge and ports
+
+simp ssh tut sw1 --cmd "sudo ifconfig br0 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp1 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp2 up"
+
+# test...
+
+simp ssh tut h1 --cmd "ping -w 1 -c1 11.0.0.2 >/dev/null"
+if [ $? -eq 0 ]; then exit 1; fi
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan
new file mode 100755
index 0000000000..9fa3431f66
--- /dev/null
+++ b/tests/rocker/bridge-vlan
@@ -0,0 +1,57 @@
+simp destroy ".*"
+simp create -o sw1:rocker:sw1 tut tut.dot
+simp start tut
+sleep 10
+while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+
+# configure a 2-port bridge
+
+simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0"
+
+# turn off vlan default_pvid on br0
+# turn on vlan filtering on br0
+
+simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null"
+simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filtering 2> /dev/null"
+
+# add both ports to VLAN 57
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp1 master"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp2 master"
+
+# turn off learning and flooding in SW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off"
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off"
+
+# turn on learning in HW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self"
+
+# bring up bridge and ports
+
+simp ssh tut sw1 --cmd "sudo ifconfig br0 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp1 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp2 up"
+
+# config IP on host VLANs
+
+simp ssh tut h1 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1"
+simp ssh tut h1 --cmd "sudo ifconfig swp1 up"
+simp ssh tut h1 --cmd "sudo ifconfig swp1.57 11.0.0.1/24"
+
+simp ssh tut h2 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1"
+simp ssh tut h2 --cmd "sudo ifconfig swp1 up"
+simp ssh tut h2 --cmd "sudo ifconfig swp1.57 11.0.0.2/24"
+
+# test...
+
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp
new file mode 100755
index 0000000000..77ab67efe2
--- /dev/null
+++ b/tests/rocker/bridge-vlan-stp
@@ -0,0 +1,69 @@
+simp destroy ".*"
+simp create -o sw1:rocker:sw1 tut tut.dot
+simp start tut
+sleep 10
+while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+
+# configure a 2-port bridge
+
+simp ssh tut sw1 --cmd "sudo /sbin/ip link add name br0 type bridge"
+simp ssh tut sw1 --cmd "sudo brctl stp br0 on"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp1 master br0"
+simp ssh tut sw1 --cmd "sudo /sbin/ip link set dev swp2 master br0"
+
+# turn off vlan default_pvid on br0
+# turn on vlan filtering on br0
+
+simp ssh tut sw1 --cmd "echo 0 | sudo dd of=/sys/class/net/br0/bridge/default_pvid 2> /dev/null"
+simp ssh tut sw1 --cmd "echo 1 | sudo dd of=/sys/class/net/br0/bridge/vlan_filtering 2> /dev/null"
+
+# add both ports to VLAN 57
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp1 master"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev swp2 master"
+
+# turn off learning and flooding in SW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning off"
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 flood off"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 flood off"
+
+# turn on learning in HW
+
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp1 learning on self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge link set dev swp2 learning on self"
+
+# config IP on host VLANs
+
+simp ssh tut h1 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1"
+simp ssh tut h1 --cmd "sudo ifconfig swp1 up"
+simp ssh tut h1 --cmd "sudo ifconfig swp1.57 11.0.0.1/24"
+
+simp ssh tut h2 --cmd "sudo vconfig add swp1 57 >/dev/null 2>&1"
+simp ssh tut h2 --cmd "sudo ifconfig swp1 up"
+simp ssh tut h2 --cmd "sudo ifconfig swp1.57 11.0.0.2/24"
+
+# bring up bridge and ports
+
+simp ssh tut sw1 --cmd "sudo ifconfig br0 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp1 up"
+simp ssh tut sw1 --cmd "sudo ifconfig swp2 up"
+
+# test...
+
+simp ssh tut h1 --cmd "ping -w 1 -c1 11.0.0.2 >/dev/null"
+if [ $? -eq 0 ]; then exit 1; fi
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
+sleep 10
+simp ssh tut h1 --cmd "ping -c10 11.0.0.2 >/dev/null"
diff --git a/tests/rocker/port b/tests/rocker/port
new file mode 100755
index 0000000000..3437f7d7fe
--- /dev/null
+++ b/tests/rocker/port
@@ -0,0 +1,22 @@
+simp destroy ".*"
+simp create -o sw1:rocker:sw1 tut tut.dot
+simp start tut
+while ! simp ssh tut sw1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h1 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+while ! simp ssh tut h2 --cmd "ping -c 1 localhost >/dev/null"; do sleep 1; done
+
+# bring up DUT ports
+
+simp ssh tut sw1 --cmd "sudo ifconfig swp1 11.0.0.1/24"
+simp ssh tut sw1 --cmd "sudo ifconfig swp2 12.0.0.1/24"
+
+# config IP on hosts
+
+simp ssh tut h1 --cmd "sudo ifconfig swp1 11.0.0.2/24"
+simp ssh tut h2 --cmd "sudo ifconfig swp1 12.0.0.2/24"
+
+# test...
+
+simp ssh tut h1 --cmd "ping -c10 11.0.0.1 >/dev/null"
+if [ $? -eq 1 ]; then exit 1; fi
+simp ssh tut h2 --cmd "ping -c10 12.0.0.1 >/dev/null"
diff --git a/tests/rocker/tut.dot b/tests/rocker/tut.dot
new file mode 100644
index 0000000000..87f7266f04
--- /dev/null
+++ b/tests/rocker/tut.dot
@@ -0,0 +1,8 @@
+graph G {
+ graph [hostidtype="hostname", version="1:0", date="04/12/2013"];
+ edge [dir=none, notify="log"];
+ sw1:swp1 -- h1:swp1;
+ sw1:swp2 -- h2:swp1;
+ sw1:swp3 -- h3:swp1;
+ sw1:swp4 -- h4:swp1;
+}