* [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
@ 2014-09-29 13:22 Pawel Wodkowski
2014-09-29 13:51 ` Jastrzebski, MichalX K
2014-09-30 6:19 ` Pawel Wodkowski
0 siblings, 2 replies; 5+ messages in thread
From: Pawel Wodkowski @ 2014-09-29 13:22 UTC (permalink / raw)
To: dev
This patch adds support mode 4 of link bonding. It depend on Delcan Doherty
patches v3 and rte alarms patch v2 or above.
New version handles race issues with setting/cancelin callbacks,
fixes promiscus mode setting in mode 4 and some other minor errors in mode 4
implementation.
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
lib/librte_ether/rte_ether.h | 1 +
lib/librte_pmd_bond/Makefile | 1 +
lib/librte_pmd_bond/rte_eth_bond.h | 4 +
lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++++++---
lib/librte_pmd_bond/rte_eth_bond_args.c | 1 +
lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 +++++++++++++++++++++++++---
lib/librte_pmd_bond/rte_eth_bond_private.h | 42 ++++-
7 files changed, 346 insertions(+), 46 deletions(-)
diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
index 2e08f23..1a3711b 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_ether/rte_ether.h
@@ -293,6 +293,7 @@ struct vlan_hdr {
#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
+#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
#ifdef __cplusplus
}
diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
index 953d75e..c2312c2 100644
--- a/lib/librte_pmd_bond/Makefile
+++ b/lib/librte_pmd_bond/Makefile
@@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
#
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
#
diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
index 6811c7b..b0223c2 100644
--- a/lib/librte_pmd_bond/rte_eth_bond.h
+++ b/lib/librte_pmd_bond/rte_eth_bond.h
@@ -75,6 +75,10 @@ extern "C" {
/**< Broadcast (Mode 3).
* In this mode all transmitted packets will be transmitted on all available
* active slaves of the bonded. */
+#define BONDING_MODE_8023AD (4)
+/**< 802.3AD (Mode 4).
+ * In this mode transmission and reception of packets is managed by LACP
+ * protocol specified in 802.3AD documentation. */
/* Balance Mode Transmit Policies */
#define BALANCE_XMIT_POLICY_LAYER2 (0)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index c690ceb..c547164 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <string.h>
+
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
@@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id)
return 0;
}
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
+
+ internals->active_slaves[active_count] = port_id;
+
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_slave_append(eth_dev);
+
+ internals->active_slave_count = active_count + 1;
+}
+
+void
+deactivate_slave(struct rte_eth_dev *eth_dev,
+ uint8_t slave_pos)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
+
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
+
+ active_count--;
+
+ /* If slave was not at the end of the list
+ * shift active slaves up active array list */
+ if (slave_pos < active_count) {
+ memmove(internals->active_slaves + slave_pos,
+ internals->active_slaves + slave_pos + 1,
+ (active_count - slave_pos) *
+ sizeof(internals->active_slaves[0]));
+ }
+
+ internals->active_slave_count = active_count;
+}
+
uint8_t
number_of_sockets(void)
{
@@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->dev_ops = &default_dev_ops;
eth_dev->pci_dev = pci_dev;
- if (bond_ethdev_mode_set(eth_dev, mode)) {
- RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
- eth_dev->data->port_id, mode);
- goto err;
- }
-
+ internals->port_id = eth_dev->data->port_id;
+ internals->mode = BONDING_MODE_INVALID;
internals->current_primary_port = 0;
internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
internals->user_defined_mac = 0;
@@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
memset(internals->slaves, 0, sizeof(internals->slaves));
+ if (bond_ethdev_mode_set(eth_dev, mode)) {
+ RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
+ eth_dev->data->port_id, mode);
+ goto err;
+ }
+
return eth_dev->data->port_id;
err:
@@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
rte_eth_link_get_nowait(slave_port_id, &link_props);
if (link_props.link_status == 1)
- internals->active_slaves[internals->active_slave_count++] =
- slave_port_id;
+ activate_slave(bonded_eth_dev, slave_port_id);
}
return 0;
}
-
int
rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
{
@@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
return retval;
}
-
static int
__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
{
+ struct rte_eth_dev *bonded_eth_dev;
struct bond_dev_private *internals;
- int i, slave_idx = -1;
+ int i, slave_idx;
if (valid_slave_port_id(slave_port_id) != 0)
return -1;
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
/* first remove from active slave list */
- for (i = 0; i < internals->active_slave_count; i++) {
- if (internals->active_slaves[i] == slave_port_id)
- slave_idx = i;
+ slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count,
+ slave_port_id);
- /* shift active slaves up active array list */
- if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
- internals->active_slaves[i] = internals->active_slaves[i+1];
- }
-
- if (slave_idx >= 0)
- internals->active_slave_count--;
+ if (slave_idx < internals->active_slave_count)
+ deactivate_slave(bonded_eth_dev, slave_idx);
slave_idx = -1;
/* now find in slave list */
@@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
return internals->current_primary_port;
}
+
int
rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
{
@@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
return internals->balance_xmit_policy;
}
-
int
rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
{
@@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
return internals->link_down_delay_ms;
}
-
int
rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
index bbbc69b..a0be0e6 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_args.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
@@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
case BONDING_MODE_ACTIVE_BACKUP:
case BONDING_MODE_BALANCE:
case BONDING_MODE_BROADCAST:
+ case BONDING_MODE_8023AD:
return 0;
default:
RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index 6d0fb1b..13630d9 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -44,6 +44,7 @@
#include "rte_eth_bond.h"
#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad.h"
static uint16_t
bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
@@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue,
bufs, nb_pkts);
}
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ /* Cast to structure, containing bonded device's port id and queue id */
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+ struct bond_dev_private *internals = bd_rx_q->dev_private;
+ struct mode8023ad_data *mode4 = &internals->mode4;
+ struct ether_addr bond_mac;
+
+ struct ether_hdr *hdr;
+ struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */
+
+ uint16_t num_rx_slave = 0; /* Number of packet received on current slave */
+ uint16_t num_rx_total = 0; /* Total number of received packets */
+
+ uint8_t i, j;
+
+ rte_eth_macaddr_get(internals->port_id, &bond_mac);
+
+ for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) {
+ /* Read packets from this slave */
+ num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
+ bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total);
+
+ /* Separate slow protocol packets from other packets */
+ for (j = 0; j < num_rx_slave; j++) {
+ hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
+
+ uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
+ if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
+ bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
+ continue;
+ }
+
+ /* Check if we can receive this packet. Also filter packets if
+ * bonding interface is not in promiscuous mode (slaves are always
+ * in promiscuous mode). */
+ if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) &&
+ likely(internals->promiscuous_en ||
+ is_same_ether_addr(&bond_mac, &hdr->d_addr))) {
+ bufs[num_rx_total++] = pkts[j];
+ } else
+ rte_pktmbuf_free(pkts[j]);
+ }
+ }
+
+ return num_rx_total;
+}
+
static inline uint16_t
ether_hash(struct ether_hdr *eth_hdr)
{
@@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
}
static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct mode8023ad_data *mode4;
+ struct bond_tx_queue *bd_tx_q;
+
+ uint8_t num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+ /* possitions in slaves, not ID */
+ uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+ uint8_t distributing_slaves_count;
+
+ uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
+ uint16_t i, op_slave_idx;
+
+ /* Slow packets from 802.3AX state machines. */
+ struct slow_protocol_msg *slow_msg;
+
+ /* Allocate one additional packet in case 8023AD mode.
+ * First element if not NULL is slow packet. */
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
+ /* Total amount of packets in slave_bufs */
+ uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+ /* Array of slow packets placed in each slave */
+ uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+ mode4 = &internals->mode4;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+ distributing_slaves_count = mode4->distibuting_slaves_count;
+ memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
+ sizeof(slaves[0]) * distributing_slaves_count);
+
+ for (i = 0; i < num_of_slaves; i++)
+ slave_bufs[i][0] = NULL;
+
+ /* It is likely that tx ring will be empty. If it is not empty, it is
+ * likely that there will be only one frame. */
+ while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
+ rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) {
+ i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id);
+
+ /* Assign slow packet to slave or drop it if slave is not in active list
+ * (ex: link down). */
+ if (likely(i < num_of_slaves)) {
+ /* If there is more than one slow packet to the same slave, send
+ * only latest, and drop previouse - tx burst was no called quick
+ * enough. */
+ if (slave_bufs[i][0] != NULL)
+ rte_pktmbuf_free(slave_bufs[i][0]);
+
+ slave_bufs[i][0] = slow_msg->pkt;
+ slave_nb_pkts[i] = 1;
+ slave_slow_packets[i] = 1;
+ } else
+ rte_pktmbuf_free(slow_msg->pkt);
+
+ rte_ring_enqueue(mode4->free_ring, slow_msg);
+ }
+
+ if (likely(distributing_slaves_count > 0)) {
+ /* Populate slaves mbuf with the packets which are to be sent on it */
+ for (i = 0; i < nb_pkts; i++) {
+ /* Select output slave using hash based on xmit policy */
+ op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count,
+ internals->balance_xmit_policy);
+
+ /* Populate slave mbuf arrays with mbufs for that slave. Use only
+ * slaves that are currently distributing. */
+ uint8_t slave_offset = distributing_offsets[op_slave_idx];
+ uint16_t pkt_pos = slave_nb_pkts[slave_offset];
+ slave_nb_pkts[slave_offset]++;
+
+ slave_bufs[slave_offset][pkt_pos] = bufs[i];
+ }
+ }
+
+ /* Send packet burst on each slave device */
+ for (i = 0; i < num_of_slaves; i++) {
+ if (slave_nb_pkts[i] > 0) {
+ num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ slave_bufs[i], slave_nb_pkts[i]);
+
+ /* if tx burst fails move packets to end of bufs */
+ if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+ uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+
+ /* Free slow packet if it exists and not send. */
+ if (slave_slow_packets[i] != 0 && num_tx_slave == 0) {
+ rte_pktmbuf_free(slave_bufs[i][0]);
+ slave_tx_fail_count--;
+ }
+
+ tx_fail_total += slave_tx_fail_count;
+ memcpy(bufs[nb_pkts - tx_fail_total],
+ slave_bufs[i][num_tx_slave],
+ slave_tx_fail_count);
+ }
+
+ if (num_tx_slave > 0)
+ num_tx_slave -= slave_slow_packets[i];
+
+ num_tx_total += num_tx_slave;
+ }
+ }
+
+ return num_tx_total;
+}
+
+static uint16_t
bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
{
@@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link,
}
int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
+{
+ struct ether_addr *mac_addr;
+
+ mac_addr = eth_dev->data->mac_addrs;
+
+ if (eth_dev == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+ return -1;
+ }
+
+ if (dst_mac_addr == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+ return -1;
+ }
+
+ ether_addr_copy(mac_addr, dst_mac_addr);
+ return 0;
+}
+
+int
mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
{
struct ether_addr *mac_addr;
@@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
mac_addr = eth_dev->data->mac_addrs;
if (eth_dev == NULL) {
- RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
+ RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
return -1;
}
@@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
}
}
break;
+ case BONDING_MODE_8023AD:
+ break;
case BONDING_MODE_ACTIVE_BACKUP:
default:
for (i = 0; i < internals->slave_count; i++) {
@@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
break;
+ case BONDING_MODE_8023AD:
+ if (bond_mode_8023ad_init(eth_dev) != 0)
+ return -1;
+
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+ break;
default:
return -1;
}
@@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
if (internals->user_defined_primary_port)
bond_ethdev_primary_set(internals, internals->primary_port);
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_start(eth_dev);
if (internals->link_status_polling_enabled)
rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
@@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
{
struct bond_dev_private *internals = eth_dev->data->dev_private;
+ if (internals->mode == BONDING_MODE_8023AD) {
+ struct mode8023ad_data *data = &internals->mode4;
+ struct slow_protocol_msg *msg;
+
+ bond_mode_8023ad_stop(eth_dev);
+ data->distibuting_slaves_count = 0;
+
+ /* Discard all messages to/from mode 4 state machines */
+ while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) {
+ rte_pktmbuf_free(msg->pkt);
+ rte_ring_enqueue(data->free_ring, msg);
+ }
+
+ while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) {
+ rte_pktmbuf_free(msg->pkt);
+ rte_ring_enqueue(data->free_ring, msg);
+ }
+ }
+
internals->active_slave_count = 0;
internals->link_status_polling_enabled = 0;
@@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
0, dev->pci_dev->numa_node);
if (bd_tx_q == NULL)
- return -1;
+ return -1;
bd_tx_q->queue_id = tx_queue_id;
bd_tx_q->dev_private = dev->data->dev_private;
@@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue)
rte_free(queue);
}
-
static void
bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
{
@@ -884,7 +1105,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
/* If device is currently being configured then don't check slaves link
* status, wait until next period */
- if (rte_spinlock_trylock(&internals->lock)){
+ if (rte_spinlock_trylock(&internals->lock)) {
for (i = 0; i < internals->slave_count; i++) {
if (internals->slaves[i].link_status_polling_enabled) {
slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
@@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
for (i = 0; i < internals->slave_count; i++)
rte_eth_promiscuous_enable(internals->slaves[i].port_id);
break;
+ /* In mode4 promiscus mode is managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
default:
rte_eth_promiscuous_enable(internals->current_primary_port);
-
}
}
@@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
int i;
internals->promiscuous_en = 0;
-
+
switch (internals->mode) {
/* Promiscuous mode is propagated to all slaves */
case BONDING_MODE_ROUND_ROBIN:
@@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
for (i = 0; i < internals->slave_count; i++)
rte_eth_promiscuous_disable(internals->slaves[i].port_id);
break;
+ /* In mode4 promiscus mode is set managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
default:
@@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
struct bond_dev_private *internals;
struct rte_eth_link link;
- int i, valid_slave = 0, active_pos = -1;
+ int i, valid_slave = 0;
+ uint8_t active_pos;
uint8_t lsc_flag = 0;
if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
@@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
return;
/* Search for port in active port list */
- for (i = 0; i < internals->active_slave_count; i++) {
- if (port_id == internals->active_slaves[i]) {
- active_pos = i;
- break;
- }
- }
+ active_pos = find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, port_id);
rte_eth_link_get_nowait(port_id, &link);
if (link.link_status) {
- if (active_pos >= 0)
+ if (active_pos < internals->active_slave_count)
return;
/* if no active slave ports then set this port to be primary port */
@@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
link_properties_set(bonded_eth_dev,
&(slave_eth_dev->data->dev_link));
}
- internals->active_slaves[internals->active_slave_count++] = port_id;
+
+ activate_slave(bonded_eth_dev, port_id);
/* If user has defined the primary port then default to using it */
if (internals->user_defined_primary_port &&
internals->primary_port == port_id)
bond_ethdev_primary_set(internals, port_id);
} else {
- if (active_pos < 0)
+ if (active_pos == internals->active_slave_count)
return;
/* Remove from active slave list */
- for (i = active_pos; i < (internals->active_slave_count - 1); i++)
- internals->active_slaves[i] = internals->active_slaves[i+1];
-
- internals->active_slave_count--;
+ deactivate_slave(bonded_eth_dev, active_pos);
/* No active slaves, change link status to down and reset other
* link properties */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
index 6db5144..77f7bb0 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_private.h
+++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
@@ -42,6 +42,7 @@ extern "C" {
#include <rte_spinlock.h>
#include "rte_eth_bond.h"
+#include "rte_eth_bond_8023ad.h"
#define PMD_BOND_SLAVE_PORT_KVARG ("slave")
#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
@@ -60,6 +61,8 @@ extern "C" {
#define RTE_BOND_LOG(lvl, msg, ...) \
RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__);
+#define BONDING_MODE_INVALID 0xFF
+
extern const char *pmd_bond_init_valid_arguments[];
extern const char *driver_name;
@@ -89,7 +92,13 @@ struct bond_tx_queue {
/**< Copy of TX configuration structure for queue */
};
-
+/** Persisted Slave Configuration Structure */
+struct slave_conf {
+ uint8_t port_id;
+ /**< Port Id of slave eth_dev */
+ struct ether_addr mac_addr;
+ /**< Slave eth_dev original MAC address */
+};
/** Bonded slave devices structure */
struct bond_ethdev_slave_ports {
uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
@@ -124,7 +133,7 @@ struct bond_dev_private {
uint8_t user_defined_mac;
/**< Flag for whether MAC address is user defined or not */
uint8_t promiscuous_en;
- /**< Enabled/disable promiscuous mode on slave devices */
+ /**< Enabled/disable promiscuous mode on bonding device */
uint8_t link_props_set;
/**< flag to denote if the link properties are set */
@@ -143,6 +152,9 @@ struct bond_dev_private {
uint8_t slave_count; /**< Number of bonded slaves */
struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
/**< Arary of bonded slaves details */
+
+ struct mode8023ad_data mode4;
+ /**< Mode 4 private data */
};
extern struct eth_dev_ops default_dev_ops;
@@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops;
int
valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
+/* Search given slave array to find possition of given id.
+ * Return slave pos or slaves_count if not found. */
+static inline uint8_t
+find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
+ uint8_t slave_id ) {
+
+ uint8_t pos;
+ for (pos = 0; pos < slaves_count; pos++) {
+ if (slave_id == slaves[pos])
+ break;
+ }
+
+ return pos;
+}
+
int
valid_port_id(uint8_t port_id);
@@ -160,6 +187,14 @@ int
valid_slave_port_id(uint8_t port_id);
void
+deactivate_slave(struct rte_eth_dev *eth_dev,
+ uint8_t slave_pos );
+
+void
+activate_slave(struct rte_eth_dev *eth_dev,
+ uint8_t port_id );
+
+void
link_properties_set(struct rte_eth_dev *bonded_eth_dev,
struct rte_eth_link *slave_dev_link);
void
@@ -173,6 +208,9 @@ int
mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
+
+int
mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
uint8_t
--
1.7.9.5
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
2014-09-29 13:22 [dpdk-dev] [PATCH v2] bond: Add mode 4 support Pawel Wodkowski
@ 2014-09-29 13:51 ` Jastrzebski, MichalX K
2014-09-30 11:17 ` Wodkowski, PawelX
2014-09-30 6:19 ` Pawel Wodkowski
1 sibling, 1 reply; 5+ messages in thread
From: Jastrzebski, MichalX K @ 2014-09-29 13:51 UTC (permalink / raw)
To: Wodkowski, PawelX, dev
Please don't take this patch into account. Two files are missing.
Best regards
Michal
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pawel Wodkowski
> Sent: Monday, September 29, 2014 3:23 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
>
> This patch adds support mode 4 of link bonding. It depend on Delcan
> Doherty
> patches v3 and rte alarms patch v2 or above.
>
> New version handles race issues with setting/cancelin callbacks,
> fixes promiscus mode setting in mode 4 and some other minor errors in
> mode 4
> implementation.
>
>
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---
> lib/librte_ether/rte_ether.h | 1 +
> lib/librte_pmd_bond/Makefile | 1 +
> lib/librte_pmd_bond/rte_eth_bond.h | 4 +
> lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++++++---
> lib/librte_pmd_bond/rte_eth_bond_args.c | 1 +
> lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261
> +++++++++++++++++++++++++---
> lib/librte_pmd_bond/rte_eth_bond_private.h | 42 ++++-
> 7 files changed, 346 insertions(+), 46 deletions(-)
>
> diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
> index 2e08f23..1a3711b 100644
> --- a/lib/librte_ether/rte_ether.h
> +++ b/lib/librte_ether/rte_ether.h
> @@ -293,6 +293,7 @@ struct vlan_hdr {
> #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
> #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
> #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time
> Protocol. */
> +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker).
> */
>
> #ifdef __cplusplus
> }
> diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
> index 953d75e..c2312c2 100644
> --- a/lib/librte_pmd_bond/Makefile
> +++ b/lib/librte_pmd_bond/Makefile
> @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
> #
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
>
> #
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h
> b/lib/librte_pmd_bond/rte_eth_bond.h
> index 6811c7b..b0223c2 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,10 @@ extern "C" {
> /**< Broadcast (Mode 3).
> * In this mode all transmitted packets will be transmitted on all available
> * active slaves of the bonded. */
> +#define BONDING_MODE_8023AD (4)
> +/**< 802.3AD (Mode 4).
> + * In this mode transmission and reception of packets is managed by LACP
> + * protocol specified in 802.3AD documentation. */
>
> /* Balance Mode Transmit Policies */
> #define BALANCE_XMIT_POLICY_LAYER2 (0)
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c
> b/lib/librte_pmd_bond/rte_eth_bond_api.c
> index c690ceb..c547164 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_api.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
> @@ -31,6 +31,8 @@
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> */
>
> +#include <string.h>
> +
> #include <rte_mbuf.h>
> #include <rte_malloc.h>
> #include <rte_ethdev.h>
> @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id)
> return 0;
> }
>
> +void
> +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
> +{
> + struct bond_dev_private *internals = eth_dev->data->dev_private;
> + uint8_t active_count = internals->active_slave_count;
> +
> + internals->active_slaves[active_count] = port_id;
> +
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_slave_append(eth_dev);
> +
> + internals->active_slave_count = active_count + 1;
> +}
> +
> +void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t slave_pos)
> +{
> + struct bond_dev_private *internals = eth_dev->data->dev_private;
> + uint8_t active_count = internals->active_slave_count;
> +
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
> +
> + active_count--;
> +
> + /* If slave was not at the end of the list
> + * shift active slaves up active array list */
> + if (slave_pos < active_count) {
> + memmove(internals->active_slaves + slave_pos,
> + internals->active_slaves + slave_pos + 1,
> + (active_count - slave_pos) *
> + sizeof(internals->active_slaves[0]));
> + }
> +
> + internals->active_slave_count = active_count;
> +}
> +
> uint8_t
> number_of_sockets(void)
> {
> @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t
> mode, uint8_t socket_id)
> eth_dev->dev_ops = &default_dev_ops;
> eth_dev->pci_dev = pci_dev;
>
> - if (bond_ethdev_mode_set(eth_dev, mode)) {
> - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode
> too %d",
> - eth_dev->data->port_id, mode);
> - goto err;
> - }
> -
> + internals->port_id = eth_dev->data->port_id;
> + internals->mode = BONDING_MODE_INVALID;
> internals->current_primary_port = 0;
> internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
> internals->user_defined_mac = 0;
> @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t
> mode, uint8_t socket_id)
> memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
> memset(internals->slaves, 0, sizeof(internals->slaves));
>
> + if (bond_ethdev_mode_set(eth_dev, mode)) {
> + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode
> too %d",
> + eth_dev->data->port_id, mode);
> + goto err;
> + }
> +
> return eth_dev->data->port_id;
>
> err:
> @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t
> bonded_port_id, uint8_t slave_port_id)
> rte_eth_link_get_nowait(slave_port_id, &link_props);
>
> if (link_props.link_status == 1)
> - internals->active_slaves[internals-
> >active_slave_count++] =
> - slave_port_id;
> + activate_slave(bonded_eth_dev, slave_port_id);
> }
> return 0;
>
> }
>
> -
> int
> rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
> {
> @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id,
> uint8_t slave_port_id)
> return retval;
> }
>
> -
> static int
> __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t
> slave_port_id)
> {
> + struct rte_eth_dev *bonded_eth_dev;
> struct bond_dev_private *internals;
>
> - int i, slave_idx = -1;
> + int i, slave_idx;
>
> if (valid_slave_port_id(slave_port_id) != 0)
> return -1;
>
> - internals = rte_eth_devices[bonded_port_id].data->dev_private;
> + bonded_eth_dev = &rte_eth_devices[bonded_port_id];
> + internals = bonded_eth_dev->data->dev_private;
>
> /* first remove from active slave list */
> - for (i = 0; i < internals->active_slave_count; i++) {
> - if (internals->active_slaves[i] == slave_port_id)
> - slave_idx = i;
> + slave_idx = find_slave_by_id(internals->active_slaves, internals-
> >active_slave_count,
> + slave_port_id);
>
> - /* shift active slaves up active array list */
> - if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
> - internals->active_slaves[i] = internals-
> >active_slaves[i+1];
> - }
> -
> - if (slave_idx >= 0)
> - internals->active_slave_count--;
> + if (slave_idx < internals->active_slave_count)
> + deactivate_slave(bonded_eth_dev, slave_idx);
>
> slave_idx = -1;
> /* now find in slave list */
> @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
>
> return internals->current_primary_port;
> }
> +
> int
> rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t
> len)
> {
> @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t
> bonded_port_id)
> return internals->balance_xmit_policy;
> }
>
> -
> int
> rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t
> internal_ms)
> {
> @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t
> bonded_port_id)
> return internals->link_down_delay_ms;
> }
>
> -
> int
> rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t
> delay_ms)
>
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c
> b/lib/librte_pmd_bond/rte_eth_bond_args.c
> index bbbc69b..a0be0e6 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_args.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
> @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char
> *key __rte_unused,
> case BONDING_MODE_ACTIVE_BACKUP:
> case BONDING_MODE_BALANCE:
> case BONDING_MODE_BROADCAST:
> + case BONDING_MODE_8023AD:
> return 0;
> default:
> RTE_BOND_LOG(ERR, "Invalid slave mode value (%s)
> specified", value);
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> index 6d0fb1b..13630d9 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> @@ -44,6 +44,7 @@
>
> #include "rte_eth_bond.h"
> #include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
>
> static uint16_t
> bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t
> nb_pkts)
> @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue,
> bufs, nb_pkts);
> }
>
> +static uint16_t
> +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> + uint16_t nb_pkts)
> +{
> + /* Cast to structure, containing bonded device's port id and queue id
> */
> + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
> + struct bond_dev_private *internals = bd_rx_q->dev_private;
> + struct mode8023ad_data *mode4 = &internals->mode4;
> + struct ether_addr bond_mac;
> +
> + struct ether_hdr *hdr;
> + struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow
> packet */
> +
> + uint16_t num_rx_slave = 0; /* Number of packet received on
> current slave */
> + uint16_t num_rx_total = 0; /* Total number of received packets
> */
> +
> + uint8_t i, j;
> +
> + rte_eth_macaddr_get(internals->port_id, &bond_mac);
> +
> + for (i = 0; i < internals->active_slave_count && num_rx_total <
> nb_pkts; i++) {
> + /* Read packets from this slave */
> + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
> + bd_rx_q->queue_id, pkts, nb_pkts + 1 -
> num_rx_total);
> +
> + /* Separate slow protocol packets from other packets */
> + for (j = 0; j < num_rx_slave; j++) {
> + hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
> +
> + uint16_t ether_type = rte_be_to_cpu_16(hdr-
> >ether_type);
> + if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
> +
> bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
> + continue;
> + }
> +
> + /* Check if we can receive this packet. Also filter
> packets if
> + * bonding interface is not in promiscuous mode
> (slaves are always
> + * in promiscuous mode). */
> + if (likely(ACTOR_STATE(&mode4->port_list[i],
> COLLECTING)) &&
> + likely(internals->promiscuous_en ||
> + is_same_ether_addr(&bond_mac,
> &hdr->d_addr))) {
> + bufs[num_rx_total++] = pkts[j];
> + } else
> + rte_pktmbuf_free(pkts[j]);
> + }
> + }
> +
> + return num_rx_total;
> +}
> +
> static inline uint16_t
> ether_hash(struct ether_hdr *eth_hdr)
> {
> @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue,
> struct rte_mbuf **bufs,
> }
>
> static uint16_t
> +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> + uint16_t nb_pkts)
> +{
> + struct bond_dev_private *internals;
> + struct mode8023ad_data *mode4;
> + struct bond_tx_queue *bd_tx_q;
> +
> + uint8_t num_of_slaves;
> + uint8_t slaves[RTE_MAX_ETHPORTS];
> + /* possitions in slaves, not ID */
> + uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
> + uint8_t distributing_slaves_count;
> +
> + uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
> + uint16_t i, op_slave_idx;
> +
> + /* Slow packets from 802.3AX state machines. */
> + struct slow_protocol_msg *slow_msg;
> +
> + /* Allocate one additional packet in case 8023AD mode.
> + * First element if not NULL is slow packet. */
> + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
> + /* Total amount of packets in slave_bufs */
> + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
> + /* Array of slow packets placed in each slave */
> + uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
> +
> + bd_tx_q = (struct bond_tx_queue *)queue;
> + internals = bd_tx_q->dev_private;
> + mode4 = &internals->mode4;
> +
> + /* Copy slave list to protect against slave up/down changes during tx
> + * bursting */
> + num_of_slaves = internals->active_slave_count;
> + if (num_of_slaves < 1)
> + return num_tx_total;
> +
> + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
> num_of_slaves);
> +
> + distributing_slaves_count = mode4->distibuting_slaves_count;
> + memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
> + sizeof(slaves[0]) * distributing_slaves_count);
> +
> + for (i = 0; i < num_of_slaves; i++)
> + slave_bufs[i][0] = NULL;
> +
> + /* It is likely that tx ring will be empty. If it is not empty, it is
> + * likely that there will be only one frame. */
> + while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
> + rte_ring_dequeue(mode4->tx_ring, (void
> **)&slow_msg) != -ENOENT) {
> + i = find_slave_by_id(slaves, num_of_slaves, slow_msg-
> >port_id);
> +
> + /* Assign slow packet to slave or drop it if slave is not in
> active list
> + * (ex: link down). */
> + if (likely(i < num_of_slaves)) {
> + /* If there is more than one slow packet to the same
> slave, send
> + * only latest, and drop previouse - tx burst was no
> called quick
> + * enough. */
> + if (slave_bufs[i][0] != NULL)
> + rte_pktmbuf_free(slave_bufs[i][0]);
> +
> + slave_bufs[i][0] = slow_msg->pkt;
> + slave_nb_pkts[i] = 1;
> + slave_slow_packets[i] = 1;
> + } else
> + rte_pktmbuf_free(slow_msg->pkt);
> +
> + rte_ring_enqueue(mode4->free_ring, slow_msg);
> + }
> +
> + if (likely(distributing_slaves_count > 0)) {
> + /* Populate slaves mbuf with the packets which are to be
> sent on it */
> + for (i = 0; i < nb_pkts; i++) {
> + /* Select output slave using hash based on xmit
> policy */
> + op_slave_idx = xmit_slave_hash(bufs[i],
> distributing_slaves_count,
> + internals->balance_xmit_policy);
> +
> + /* Populate slave mbuf arrays with mbufs for that
> slave. Use only
> + * slaves that are currently distributing. */
> + uint8_t slave_offset =
> distributing_offsets[op_slave_idx];
> + uint16_t pkt_pos = slave_nb_pkts[slave_offset];
> + slave_nb_pkts[slave_offset]++;
> +
> + slave_bufs[slave_offset][pkt_pos] = bufs[i];
> + }
> + }
> +
> + /* Send packet burst on each slave device */
> + for (i = 0; i < num_of_slaves; i++) {
> + if (slave_nb_pkts[i] > 0) {
> + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q-
> >queue_id,
> + slave_bufs[i], slave_nb_pkts[i]);
> +
> + /* if tx burst fails move packets to end of bufs */
> + if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
> + uint16_t slave_tx_fail_count =
> slave_nb_pkts[i] - num_tx_slave;
> +
> + /* Free slow packet if it exists and not send.
> */
> + if (slave_slow_packets[i] != 0 &&
> num_tx_slave == 0) {
> + rte_pktmbuf_free(slave_bufs[i][0]);
> + slave_tx_fail_count--;
> + }
> +
> + tx_fail_total += slave_tx_fail_count;
> + memcpy(bufs[nb_pkts - tx_fail_total],
> + slave_bufs[i][num_tx_slave],
> + slave_tx_fail_count);
> + }
> +
> + if (num_tx_slave > 0)
> + num_tx_slave -= slave_slow_packets[i];
> +
> + num_tx_total += num_tx_slave;
> + }
> + }
> +
> + return num_tx_total;
> +}
> +
> +static uint16_t
> bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
> {
> @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link
> *bonded_dev_link,
> }
>
> int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr
> *dst_mac_addr)
> +{
> + struct ether_addr *mac_addr;
> +
> + mac_addr = eth_dev->data->mac_addrs;
> +
> + if (eth_dev == NULL) {
> + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n",
> __func__);
> + return -1;
> + }
> +
> + if (dst_mac_addr == NULL) {
> + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n",
> __func__);
> + return -1;
> + }
> +
> + ether_addr_copy(mac_addr, dst_mac_addr);
> + return 0;
> +}
> +
> +int
> mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr
> *new_mac_addr)
> {
> struct ether_addr *mac_addr;
> @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct
> ether_addr *new_mac_addr)
> mac_addr = eth_dev->data->mac_addrs;
>
> if (eth_dev == NULL) {
> - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
> + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
> return -1;
> }
>
> @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev
> *bonded_eth_dev)
> }
> }
> break;
> + case BONDING_MODE_8023AD:
> + break;
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> for (i = 0; i < internals->slave_count; i++) {
> @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev
> *eth_dev, int mode)
> eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
> eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
> break;
> + case BONDING_MODE_8023AD:
> + if (bond_mode_8023ad_init(eth_dev) != 0)
> + return -1;
> +
> + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
> + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
> + break;
> default:
> return -1;
> }
> @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
> if (internals->user_defined_primary_port)
> bond_ethdev_primary_set(internals, internals-
> >primary_port);
>
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_start(eth_dev);
>
> if (internals->link_status_polling_enabled)
> rte_eal_alarm_set(internals->link_status_polling_interval_ms
> * 1000,
> @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
> {
> struct bond_dev_private *internals = eth_dev->data->dev_private;
>
> + if (internals->mode == BONDING_MODE_8023AD) {
> + struct mode8023ad_data *data = &internals->mode4;
> + struct slow_protocol_msg *msg;
> +
> + bond_mode_8023ad_stop(eth_dev);
> + data->distibuting_slaves_count = 0;
> +
> + /* Discard all messages to/from mode 4 state machines */
> + while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -
> ENOENT) {
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> +
> + while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -
> ENOENT) {
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> + }
> +
> internals->active_slave_count = 0;
> internals->link_status_polling_enabled = 0;
>
> @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev
> *dev, uint16_t tx_queue_id,
> 0, dev->pci_dev->numa_node);
>
> if (bd_tx_q == NULL)
> - return -1;
> + return -1;
>
> bd_tx_q->queue_id = tx_queue_id;
> bd_tx_q->dev_private = dev->data->dev_private;
> @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue)
> rte_free(queue);
> }
>
> -
> static void
> bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
> {
> @@ -884,7 +1105,7 @@
> bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
>
> /* If device is currently being configured then don't check slaves link
> * status, wait until next period */
> - if (rte_spinlock_trylock(&internals->lock)){
> + if (rte_spinlock_trylock(&internals->lock)) {
> for (i = 0; i < internals->slave_count; i++) {
> if (internals->slaves[i].link_status_polling_enabled) {
> slave_ethdev = &rte_eth_devices[internals-
> >slaves[i].port_id];
> @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct
> rte_eth_dev *eth_dev)
> for (i = 0; i < internals->slave_count; i++)
> rte_eth_promiscuous_enable(internals-
> >slaves[i].port_id);
> break;
> + /* In mode4 promiscus mode is managed when slave is
> added/removed */
> + case BONDING_MODE_8023AD:
> + break;
> /* Promiscuous mode is propagated only to primary slave */
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> rte_eth_promiscuous_enable(internals-
> >current_primary_port);
> -
> }
> }
>
> @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct
> rte_eth_dev *dev)
> int i;
>
> internals->promiscuous_en = 0;
> -
> +
> switch (internals->mode) {
> /* Promiscuous mode is propagated to all slaves */
> case BONDING_MODE_ROUND_ROBIN:
> @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct
> rte_eth_dev *dev)
> for (i = 0; i < internals->slave_count; i++)
> rte_eth_promiscuous_disable(internals-
> >slaves[i].port_id);
> break;
> + /* In mode4 promiscus mode is set managed when slave is
> added/removed */
> + case BONDING_MODE_8023AD:
> + break;
> /* Promiscuous mode is propagated only to primary slave */
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id,
> enum rte_eth_event_type type,
> struct bond_dev_private *internals;
> struct rte_eth_link link;
>
> - int i, valid_slave = 0, active_pos = -1;
> + int i, valid_slave = 0;
> + uint8_t active_pos;
> uint8_t lsc_flag = 0;
>
> if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
> @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t
> port_id, enum rte_eth_event_type type,
> return;
>
> /* Search for port in active port list */
> - for (i = 0; i < internals->active_slave_count; i++) {
> - if (port_id == internals->active_slaves[i]) {
> - active_pos = i;
> - break;
> - }
> - }
> + active_pos = find_slave_by_id(internals->active_slaves,
> + internals->active_slave_count, port_id);
>
> rte_eth_link_get_nowait(port_id, &link);
> if (link.link_status) {
> - if (active_pos >= 0)
> + if (active_pos < internals->active_slave_count)
> return;
>
> /* if no active slave ports then set this port to be primary
> port */
> @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t
> port_id, enum rte_eth_event_type type,
> link_properties_set(bonded_eth_dev,
> &(slave_eth_dev->data->dev_link));
> }
> - internals->active_slaves[internals->active_slave_count++] =
> port_id;
> +
> + activate_slave(bonded_eth_dev, port_id);
>
> /* If user has defined the primary port then default to using
> it */
> if (internals->user_defined_primary_port &&
> internals->primary_port == port_id)
> bond_ethdev_primary_set(internals, port_id);
> } else {
> - if (active_pos < 0)
> + if (active_pos == internals->active_slave_count)
> return;
>
> /* Remove from active slave list */
> - for (i = active_pos; i < (internals->active_slave_count - 1); i++)
> - internals->active_slaves[i] = internals-
> >active_slaves[i+1];
> -
> - internals->active_slave_count--;
> + deactivate_slave(bonded_eth_dev, active_pos);
>
> /* No active slaves, change link status to down and reset
> other
> * link properties */
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h
> b/lib/librte_pmd_bond/rte_eth_bond_private.h
> index 6db5144..77f7bb0 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_private.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
> @@ -42,6 +42,7 @@ extern "C" {
> #include <rte_spinlock.h>
>
> #include "rte_eth_bond.h"
> +#include "rte_eth_bond_8023ad.h"
>
> #define PMD_BOND_SLAVE_PORT_KVARG ("slave")
> #define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
> @@ -60,6 +61,8 @@ extern "C" {
> #define RTE_BOND_LOG(lvl, msg, ...) \
> RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__,
> ##__VA_ARGS__);
>
> +#define BONDING_MODE_INVALID 0xFF
> +
> extern const char *pmd_bond_init_valid_arguments[];
>
> extern const char *driver_name;
> @@ -89,7 +92,13 @@ struct bond_tx_queue {
> /**< Copy of TX configuration structure for queue */
> };
>
> -
> +/** Persisted Slave Configuration Structure */
> +struct slave_conf {
> + uint8_t port_id;
> + /**< Port Id of slave eth_dev */
> + struct ether_addr mac_addr;
> + /**< Slave eth_dev original MAC address */
> +};
> /** Bonded slave devices structure */
> struct bond_ethdev_slave_ports {
> uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
> @@ -124,7 +133,7 @@ struct bond_dev_private {
> uint8_t user_defined_mac;
> /**< Flag for whether MAC address is user defined or not */
> uint8_t promiscuous_en;
> - /**< Enabled/disable promiscuous mode on slave devices */
> + /**< Enabled/disable promiscuous mode on bonding device */
> uint8_t link_props_set;
> /**< flag to denote if the link properties are set */
>
> @@ -143,6 +152,9 @@ struct bond_dev_private {
> uint8_t slave_count; /**< Number of bonded
> slaves */
> struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
> /**< Arary of bonded slaves details */
> +
> + struct mode8023ad_data mode4;
> + /**< Mode 4 private data */
> };
>
> extern struct eth_dev_ops default_dev_ops;
> @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops;
> int
> valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
>
> +/* Search given slave array to find possition of given id.
> + * Return slave pos or slaves_count if not found. */
> +static inline uint8_t
> +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
> + uint8_t slave_id ) {
> +
> + uint8_t pos;
> + for (pos = 0; pos < slaves_count; pos++) {
> + if (slave_id == slaves[pos])
> + break;
> + }
> +
> + return pos;
> +}
> +
> int
> valid_port_id(uint8_t port_id);
>
> @@ -160,6 +187,14 @@ int
> valid_slave_port_id(uint8_t port_id);
>
> void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t slave_pos );
> +
> +void
> +activate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t port_id );
> +
> +void
> link_properties_set(struct rte_eth_dev *bonded_eth_dev,
> struct rte_eth_link *slave_dev_link);
> void
> @@ -173,6 +208,9 @@ int
> mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr
> *new_mac_addr);
>
> int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr
> *dst_mac_addr);
> +
> +int
> mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
>
> uint8_t
> --
> 1.7.9.5
^ permalink raw reply [flat|nested] 5+ messages in thread
* [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
2014-09-29 13:22 [dpdk-dev] [PATCH v2] bond: Add mode 4 support Pawel Wodkowski
2014-09-29 13:51 ` Jastrzebski, MichalX K
@ 2014-09-30 6:19 ` Pawel Wodkowski
2014-09-30 19:06 ` Neil Horman
1 sibling, 1 reply; 5+ messages in thread
From: Pawel Wodkowski @ 2014-09-30 6:19 UTC (permalink / raw)
To: dev
This patch adds support mode 4 of link bonding. It depend on Delcan Doherty
patches v3 and rte alarms patch v2 or above.
New version handles race issues with setting/cancelin callbacks,
fixes promiscus mode setting in mode 4 and some other minor errors in mode 4
implementation.
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
---
lib/librte_ether/rte_ether.h | 1 +
lib/librte_pmd_bond/Makefile | 1 +
lib/librte_pmd_bond/rte_eth_bond.h | 4 +
lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1070 ++++++++++++++++++++++++++++
lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 405 +++++++++++
lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++-
lib/librte_pmd_bond/rte_eth_bond_args.c | 1 +
lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 ++++++-
lib/librte_pmd_bond/rte_eth_bond_private.h | 42 +-
9 files changed, 1821 insertions(+), 46 deletions(-)
create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c
create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h
diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
index 2e08f23..1a3711b 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_ether/rte_ether.h
@@ -293,6 +293,7 @@ struct vlan_hdr {
#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
+#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
#ifdef __cplusplus
}
diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
index 953d75e..c2312c2 100644
--- a/lib/librte_pmd_bond/Makefile
+++ b/lib/librte_pmd_bond/Makefile
@@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
#
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
#
diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
index 6811c7b..b0223c2 100644
--- a/lib/librte_pmd_bond/rte_eth_bond.h
+++ b/lib/librte_pmd_bond/rte_eth_bond.h
@@ -75,6 +75,10 @@ extern "C" {
/**< Broadcast (Mode 3).
* In this mode all transmitted packets will be transmitted on all available
* active slaves of the bonded. */
+#define BONDING_MODE_8023AD (4)
+/**< 802.3AD (Mode 4).
+ * In this mode transmission and reception of packets is managed by LACP
+ * protocol specified in 802.3AD documentation. */
/* Balance Mode Transmit Policies */
#define BALANCE_XMIT_POLICY_LAYER2 (0)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
new file mode 100644
index 0000000..de416c6
--- /dev/null
+++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
@@ -0,0 +1,1070 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad.h"
+
+#include <rte_cycles.h>
+
+#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
+#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
+ bond_dbg_get_time_diff_ms(), internals->active_slaves[port_num], \
+ __FUNCTION__, ##__VA_ARGS__)
+
+static unsigned
+bond_dbg_get_time_diff_ms(void)
+{
+ static uint64_t start_time = 0;
+ uint64_t now;
+
+ now = rte_rdtsc();
+ if (start_time == 0)
+ start_time = now;
+
+ return ((now - start_time) * 1000) / rte_get_tsc_hz();
+}
+
+static void
+bond_print_lacp(struct lacpdu *l)
+{
+ char a_address[18];
+ char p_address[18];
+ char a_state[256] = { 0 };
+ char p_state[256] = { 0 };
+
+ static const char *state_labels[] = {
+ "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
+ };
+
+ int a_len = 0;
+ int p_len = 0;
+ uint8_t i;
+ uint8_t *addr;
+
+ addr = l->actor.port_params.system.addr_bytes;
+ snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ addr = l->partner.port_params.system.addr_bytes;
+ snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+ for (i = 0; i < 8; i++) {
+ if ((l->actor.state >> i) & 1) {
+ a_len += snprintf(a_state + a_len, sizeof(a_state) - a_len, "%s ",
+ state_labels[i]);
+ }
+
+ if ((l->partner.state >> i) & 1) {
+ p_len += snprintf(p_state + p_len, sizeof(p_state) - p_len, "%s ",
+ state_labels[i]);
+ }
+ }
+
+ if (a_len && a_state[a_len-1] == ' ')
+ a_state[a_len-1] = '\0';
+
+ if (p_len && p_state[p_len-1] == ' ')
+ p_state[p_len-1] = '\0';
+
+ RTE_LOG(DEBUG, PMD, "LACP: {\n"\
+ " subtype= %02X\n"\
+ " ver_num=%02X\n"\
+ " actor={ tlv=%02X, len=%02X\n"\
+ " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
+ " state={ %s }\n"\
+ " }\n"\
+ " partner={ tlv=%02X, len=%02X\n"\
+ " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
+ " state={ %s }\n"\
+ " }\n"\
+ " collector={info=%02X, length=%02X, max_delay=%04X\n, " \
+ "type_term=%02X, terminator_length = %02X}\n",\
+ l->subtype,\
+ l->version_number,\
+ l->actor.tlv_type_info,\
+ l->actor.info_length,\
+ l->actor.port_params.system_priority,\
+ a_address,\
+ l->actor.port_params.key,\
+ l->actor.port_params.port_priority,\
+ l->actor.port_params.port_number,\
+ a_state,\
+ l->partner.tlv_type_info,\
+ l->partner.info_length,\
+ l->partner.port_params.system_priority,\
+ p_address,\
+ l->partner.port_params.key,\
+ l->partner.port_params.port_priority,\
+ l->partner.port_params.port_number,\
+ p_state,\
+ l->tlv_type_collector_info,\
+ l->collector_info_length,\
+ l->collector_max_delay,\
+ l->tlv_type_terminator,\
+ l->terminator_length);
+
+}
+#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
+
+#else
+#define BOND_PRINT_LACP(lacpdu) do { } while (0)
+#define MODE4_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+static const struct ether_addr lacp_mac_addr = {
+ .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
+};
+
+static void
+timer_cancel(uint64_t *timer)
+{
+ *timer = 0;
+}
+
+static void
+timer_set(uint64_t *timer, uint64_t timeout_ms)
+{
+ *timer = rte_rdtsc() + timeout_ms * rte_get_tsc_hz() / 1000;
+}
+
+/* Forces given timer to be in expired state. */
+static void
+timer_force_expired(uint64_t *timer)
+{
+ *timer = rte_rdtsc();
+}
+
+static bool
+timer_is_stopped(uint64_t *timer)
+{
+ return *timer == 0;
+}
+
+static bool
+timer_is_expired(uint64_t *timer)
+{
+ return *timer <= rte_rdtsc();
+}
+
+/* Timer is in running state if it is not stopped nor expired */
+static bool
+timer_is_running(uint64_t *timer)
+{
+ return !timer_is_stopped(timer) && !timer_is_expired(timer);
+}
+
+static void
+record_default(struct port *port)
+{
+ /* Record default parametes for partner. Partner admin parameters
+ * are not implemented so set them to arbitrary default (last known) and
+ * mark actor that parner is in defaulted state. */
+ port->partner_state = STATE_LACP_ACTIVE;
+ ACTOR_STATE_SET(port, DEFAULTED);
+}
+
+/** Function handles rx state machine.
+ *
+ * This function implements Receive State Machine from point 5.4.12 in
+ * 802.1AX documentation. It should be called periodically.
+ *
+ * @param lacpdu LACPDU received.
+ * @param port Port on which LACPDU was received.
+ */
+static void
+rx_machine(struct bond_dev_private *internals, uint8_t port_num,
+ struct lacpdu *lacp)
+{
+ struct port *port = &internals->mode4.port_list[port_num];
+
+ if (SM_FLAG(port, BEGIN)) {
+ /* Initialize stuff */
+ MODE4_DEBUG("-> INITIALIZE\n");
+ SM_FLAG_CLR(port, MOVED);
+ port->selected = UNSELECTED;
+
+ record_default(port);
+
+ ACTOR_STATE_CLR(port, EXPIRED);
+ timer_cancel(&port->current_while_timer);
+
+ /* DISABLED: On initialization partner is out of sync */
+ PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+ /* LACP DISABLED stuff if LACP not enabled on this port */
+ if (!SM_FLAG(port, LACP_ENABLED))
+ PARTNER_STATE_CLR(port, AGGREGATION);
+ }
+
+ if (!SM_FLAG(port, LACP_ENABLED)) {
+ /* Update parameters only if state changed */
+ if (!timer_is_stopped(&port->current_while_timer)) {
+ port->selected = UNSELECTED;
+ record_default(port);
+ PARTNER_STATE_CLR(port, AGGREGATION);
+ ACTOR_STATE_CLR(port, EXPIRED);
+ timer_cancel(&port->current_while_timer);
+ }
+ return;
+ }
+
+ if (lacp) {
+ MODE4_DEBUG("LACP -> CURRENT\n");
+ BOND_PRINT_LACP(lacp);
+ /* Update selected flag. If partner parameters are defaulted assume they
+ * are match. If not defaulted compare LACP actor with ports parner
+ * params. */
+ if (!(port->actor_state & STATE_DEFAULTED) &&
+ (((port->partner_state ^ lacp->actor.state) & STATE_AGGREGATION) ||
+ memcmp(&port->partner, &lacp->actor.port_params,
+ sizeof(port->partner)) != 0)) {
+ MODE4_DEBUG("selected <- UNSELECTED\n");
+ port->selected = UNSELECTED;
+ }
+
+ /* Record this PDU actor params as partner params */
+ memcpy(&port->partner, &lacp->actor.port_params,
+ sizeof(struct port_params));
+ port->partner_state = lacp->actor.state;
+
+ /* Partner parameters are not defaulted any more */
+ ACTOR_STATE_CLR(port, DEFAULTED);
+
+ /* Update NTT if partners information are outdated */
+ uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
+ STATE_SYNCHRONIZATION | STATE_AGGREGATION;
+
+ if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
+ memcmp(&port->actor, &lacp->partner.port_params,
+ sizeof(struct port_params)) != 0) {
+ port->sm_flags |= SM_FLAGS_NTT;
+ }
+
+ /* If LACP partner params match this port actor params */
+ if (memcmp(&port->actor, &lacp->partner.port_params,
+ sizeof(port->actor)) == 0 &&
+ (port->partner_state & STATE_AGGREGATION) == (port->actor_state
+ & STATE_AGGREGATION))
+ PARTNER_STATE_SET(port, SYNCHRONIZATION);
+ else if (!(port->partner_state & STATE_AGGREGATION) &&
+ (port->actor_state & STATE_AGGREGATION))
+ PARTNER_STATE_SET(port, SYNCHRONIZATION);
+ else
+ PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+
+ if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
+ timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
+ else
+ timer_set(&port->current_while_timer, BOND_8023AD_LONG_TIMEOUT_MS);
+
+ ACTOR_STATE_CLR(port, EXPIRED);
+ return; /* No state change */
+ }
+
+ /* If CURRENT state timer is not running (stopped or expired)
+ * transit to EXPIRED state from DISABLED or CURRENT */
+ if (!timer_is_running(&port->current_while_timer)) {
+ ACTOR_STATE_SET(port, EXPIRED);
+ PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+ PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
+ timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
+ }
+}
+
+/**
+ * Function handles periodic tx state machine.
+ *
+ * Function implements Periodic Transmission state machine from point 5.4.13
+ * in 802.1AX documentation. It should be called periodically.
+ *
+ * @param port Port to handle state machine.
+ */
+static void
+periodic_machine(struct bond_dev_private *internals, uint8_t port_num)
+{
+ struct port *port = &internals->mode4.port_list[port_num];
+ /* Calculate if either site is LACP enabled */
+ uint32_t timeout;
+ uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
+ PARTNER_STATE(port, LACP_ACTIVE);
+
+ uint8_t is_partner_fast, was_partner_fast;
+ /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
+ if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) ||
+ active == 0) {
+ timer_cancel(&port->periodic_timer);
+ timer_force_expired(&port->tx_machine_timer);
+ SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
+
+ MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
+ SM_FLAG(port, BEGIN) ? "begind " : "",
+ SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
+ active ? "LACP active " : "LACP pasive ");
+ return;
+ }
+
+ is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
+ was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
+
+ /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
+ * Other case: check if timer expire or partners settings changed. */
+ if (!timer_is_stopped(&port->periodic_timer)) {
+ if (timer_is_expired(&port->periodic_timer)) {
+ SM_FLAG_SET(port, NTT);
+ } else if (is_partner_fast != was_partner_fast) {
+ /* Partners timeout was slow and now it is fast -> send LACP.
+ * In other case (was fast and now it is slow) just switch
+ * timeout to slow without forcing send of LACP (because standard
+ * say so)*/
+ if (!is_partner_fast)
+ SM_FLAG_SET(port, NTT);
+ } else
+ return; /* Nothing changed */
+ }
+
+ /* Handle state transition to FAST/SLOW LACP timeout */
+ if (is_partner_fast) {
+ timeout = BOND_8023AD_FAST_PERIODIC_MS;
+ SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
+ } else {
+ timeout = BOND_8023AD_SLOW_PERIODIC_MS;
+ SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
+ }
+
+ timer_set(&port->periodic_timer, timeout);
+}
+
+/**
+ * Function handles mux state machine.
+ *
+ * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
+ * It should be called periodically.
+ *
+ * @param port Port to handle state machine.
+ */
+static int
+mux_machine(struct bond_dev_private *internals, uint8_t port_num)
+{
+ bool ntt = false;
+ struct port *port = &internals->mode4.port_list[port_num];
+
+ /* Save current state for later use */
+ const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+ STATE_COLLECTING;
+
+ /* Enter DETACHED state on BEGIN condition or from any other state if
+ * port was unselected */
+ if (SM_FLAG(port, BEGIN) ||
+ port->selected == UNSELECTED || (port->selected == STANDBY &&
+ (port->actor_state & state_mask) != 0)) {
+ /* detach mux from aggregator not used */
+ port->actor_state &= ~state_mask;
+ /* Set ntt to true if BEGIN condition or transition from any other state
+ * which is indicated that wait_while_timer was started */
+ if (SM_FLAG(port, BEGIN) ||
+ !timer_is_stopped(&port->wait_while_timer)) {
+ SM_FLAG_SET(port, NTT);
+ MODE4_DEBUG("-> DETACHED\n");
+ }
+ timer_cancel(&port->wait_while_timer);
+ }
+
+ if (timer_is_stopped(&port->wait_while_timer)) {
+ if (port->selected == SELECTED || port->selected == STANDBY) {
+ timer_set(&port->wait_while_timer,
+ BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS);
+
+ MODE4_DEBUG("DETACHED -> WAITING\n");
+ }
+ /* Waiting state entered */
+ return 0;
+ }
+
+ /* Transit next state if port is ready */
+ if (!timer_is_expired(&port->wait_while_timer))
+ return 0;
+
+ if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
+ !PARTNER_STATE(port, SYNCHRONIZATION)) {
+ /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
+ * sync transit to ATACHED state. */
+ ACTOR_STATE_CLR(port, DISTRIBUTING);
+ ACTOR_STATE_CLR(port, COLLECTING);
+ /* Clear actor sync to activate transit ATACHED in condition bellow */
+ ACTOR_STATE_CLR(port, SYNCHRONIZATION);
+ MODE4_DEBUG("Out of sync -> ATTACHED\n");
+ } else if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
+ /* attach mux to aggregator */
+ RTE_VERIFY((port->actor_state & (STATE_COLLECTING |
+ STATE_DISTRIBUTING)) == 0);
+ ACTOR_STATE_SET(port, SYNCHRONIZATION);
+ ntt = true;
+ MODE4_DEBUG("ATTACHED Entered\n");
+ } else if (!ACTOR_STATE(port, COLLECTING)) {
+ /* Start collecting if in sync */
+ if (PARTNER_STATE(port, SYNCHRONIZATION)) {
+ MODE4_DEBUG("ATTACHED -> COLLECTING\n");
+ ACTOR_STATE_SET(port, COLLECTING);
+ }
+ } else if (ACTOR_STATE(port, COLLECTING)) {
+ /* Check if partner is in COLLECTING state. If so this port can
+ * distribute frames to it */
+ if (!ACTOR_STATE(port, DISTRIBUTING)) {
+ if (PARTNER_STATE(port, COLLECTING)) {
+ /* Enable DISTRIBUTING if partner is collecting */
+ ACTOR_STATE_SET(port, DISTRIBUTING);
+ ntt = true;
+ MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
+ }
+ } else {
+ if (!PARTNER_STATE(port, COLLECTING)) {
+ /* Disable DISTRIBUTING (enter COLLECTING state) if partner
+ * is not collecting */
+ ACTOR_STATE_CLR(port, DISTRIBUTING);
+ ntt = true;
+ MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
+ }
+ }
+ }
+
+ if (ntt != false)
+ SM_FLAG_SET(port, NTT);
+
+ return ntt;
+}
+
+/**
+ * Function handles transmit state machine.
+ *
+ * Function implements Transmit Machine from point 5.4.16 in 802.1AX
+ * documentation.
+ *
+ * @param port
+ */
+static void
+tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct port *port = &internals->mode4.port_list[port_num];
+ struct mode8023ad_data *data = &internals->mode4;
+
+ struct slow_protocol_msg *msg = NULL;
+ struct lacpdu_header *hdr;
+ struct lacpdu *lacpdu;
+
+ /* If periodic timer is not running periodic machine is in NO PERIODIC and
+ * acording to 802.3ax standard tx machine should not transmit any frames
+ * and set ntt to false. */
+ if (timer_is_stopped(&port->periodic_timer))
+ SM_FLAG_CLR(port, NTT);
+
+ if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer))
+ return;
+
+ /* If all conditions are met construct packet to send */
+ if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) {
+ MODE4_DEBUG("tx_machine: no free_lacpdu_ring\n");
+ return;
+ }
+
+ msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool);
+ if (msg->pkt == NULL) {
+ rte_ring_enqueue(data->free_ring, msg);
+ RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
+ return;
+ }
+
+ msg->port_id = internals->active_slaves[port_num];
+ hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *);
+
+ msg->pkt->data_len = sizeof(*hdr);
+ msg->pkt->pkt_len = sizeof(*hdr);
+ /* Source and destination MAC */
+ ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
+ ether_addr_copy(&port->actor.system, &hdr->eth_hdr.s_addr);
+ hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
+
+ lacpdu = &hdr->lacpdu;
+ memset(lacpdu, 0, sizeof(*lacpdu));
+
+ /* Initialize LACP part */
+ lacpdu->subtype = SUBTYPE_LACP;
+ lacpdu->version_number = 1;
+
+ /* ACTOR */
+ lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
+ lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
+ memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
+ sizeof(port->actor));
+ lacpdu->actor.state = port->actor_state;
+
+ /* PARTNER */
+ lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
+ lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
+ memcpy(&lacpdu->partner.port_params, &port->partner,
+ sizeof(struct port_params));
+ lacpdu->partner.state = port->partner_state;
+
+ /* Other fields */
+ lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
+ lacpdu->collector_info_length = 0x10;
+ lacpdu->collector_max_delay = 0;
+
+ lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
+ lacpdu->terminator_length = 0;
+
+ if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) {
+ /* If TX ring full, drop packet and free message. Retransmission
+ * will happen in next function call. */
+ rte_pktmbuf_free(msg->pkt);
+ rte_ring_enqueue(data->free_ring, msg);
+
+ RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring.\n"
+ "Receive and transmit functions must be invoked on bonded interface"
+ " at least 10 times per second or LACP will not work correctly\n");
+ return;
+ }
+
+ MODE4_DEBUG("sending LACP frame\n");
+ BOND_PRINT_LACP(lacpdu);
+
+ SM_FLAG_CLR(port, NTT);
+ /* Add 10% random backoff time to better distribute slow packets
+ * between tx bursts. */
+ timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS +
+ rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100));
+}
+
+/**
+ * Function assigns port to aggregator.
+ *
+ * @param bond_dev_private Pointer to bond_dev_private structure.
+ * @param port_pos Port to assign.
+ */
+static void
+selection_logic(struct bond_dev_private *internals, uint8_t port_num)
+{
+ struct mode8023ad_data *data = &internals->mode4;
+ struct port *agg, *port, *port_list;
+ uint8_t ports_count;
+ uint8_t i;
+
+ ports_count = internals->slave_count;
+ port_list = data->port_list;
+ port = &port_list[port_num];
+
+ /* Skip port if it is selected */
+ if (port->selected == SELECTED)
+ return;
+
+ /* Search for aggregator suitable for this port */
+ for (i = 0; i < ports_count; ++i) {
+ agg = &port_list[i];
+ /* Skip ports that are not aggreagators */
+ if (agg->agregator_idx != i && i == port_num)
+ continue;
+
+ /* Actors system ID is not checked since all slave device have the same
+ * ID (MAC address). */
+ if ((agg->actor.key == port->actor.key &&
+ agg->partner.system_priority == port->partner.system_priority &&
+ is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
+ && (agg->partner.key == port->partner.key)) &&
+ is_zero_ether_addr(&port->partner.system) != 1 &&
+ (agg->actor.key &
+ rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
+
+ port->agregator_idx = i;
+ break;
+ }
+ }
+
+ /* By default, port uses it self as agregator */
+ if (i == ports_count)
+ port->agregator_idx = port_num;
+
+ port->selected = SELECTED;
+
+ MODE4_DEBUG("-> SELECTED: ID=%3u pos=%3u\n"
+ "\t%s ID=%3u pos=%3u\n",
+ internals->active_slaves[port_num], port_num,
+ port->agregator_idx == port_num ?
+ "agregator not found, using default" : "agregator found",
+ port->agregator_idx,
+ internals->active_slaves[port->agregator_idx]);
+}
+
+/**
+ * Helper function which updates current port
+ */
+static void
+update_mux_slaves(struct bond_dev_private *internals)
+{
+ struct mode8023ad_data *data = &internals->mode4;
+ struct port *port;
+ uint8_t current[RTE_MAX_ETHPORTS];
+ uint8_t count = 0;
+ uint8_t i;
+
+ for (i = 0; i < internals->slave_count; i++) {
+ port = &data->port_list[i];
+ if (ACTOR_STATE(port, DISTRIBUTING))
+ current[count++] = i;
+ }
+
+ memcpy(data->distibuting_slaves_offsets, current,
+ sizeof(current[0]) * count);
+ data->distibuting_slaves_count = count;
+}
+
+/* Function maps DPDK speed to bonding speed stored in key field */
+static uint16_t
+link_speed_key(uint16_t speed) {
+ uint16_t key_speed;
+
+ switch (speed) {
+ case ETH_LINK_SPEED_AUTONEG:
+ key_speed = 0x00;
+ break;
+ case ETH_LINK_SPEED_10:
+ key_speed = BOND_LINK_SPEED_KEY_10M;
+ break;
+ case ETH_LINK_SPEED_100:
+ key_speed = BOND_LINK_SPEED_KEY_100M;
+ break;
+ case ETH_LINK_SPEED_1000:
+ key_speed = BOND_LINK_SPEED_KEY_1000M;
+ break;
+ case ETH_LINK_SPEED_10G:
+ key_speed = BOND_LINK_SPEED_KEY_10G;
+ break;
+ case ETH_LINK_SPEED_20G:
+ key_speed = BOND_LINK_SPEED_KEY_20G;
+ break;
+ case ETH_LINK_SPEED_40G:
+ key_speed = BOND_LINK_SPEED_KEY_40G;
+ break;
+ default:
+ /* Unknown speed*/
+ key_speed = 0xFFFF;
+ }
+
+ return key_speed;
+}
+
+static void
+bond_mode_8023ad_periodic_cb(void *arg)
+{
+ struct rte_eth_dev *bond_dev = arg;
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct mode8023ad_data *data = &internals->mode4;
+
+ struct port *port;
+ struct slow_protocol_frame *slow_hdr;
+ struct rte_eth_link link_info;
+ struct ether_addr slave_addr;
+
+ struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE];
+ uint16_t port_num, j, nb_msgs;
+ /* if not 0 collecting/distibuting array need update */
+ uint16_t slaves_changed = 0;
+ bool machines_invoked;
+
+ /* Update link status on each port */
+ for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
+ uint16_t key;
+
+ rte_eth_link_get(internals->active_slaves[port_num], &link_info);
+ rte_eth_macaddr_get(internals->active_slaves[port_num], &slave_addr);
+
+ if (link_info.link_status != 0) {
+ key = link_speed_key(link_info.link_speed) << 1;
+ if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
+ key |= BOND_LINK_FULL_DUPLEX_KEY;
+ } else
+ key = 0;
+
+ port = &data->port_list[port_num];
+ key = rte_cpu_to_be_16(key);
+
+ if (key != port->actor.key) {
+ port->actor.key = key;
+ SM_FLAG_SET(port, NTT);
+ }
+
+ if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
+ SM_FLAG_SET(port, NTT);
+ ether_addr_copy(&slave_addr, &port->actor.system);
+ }
+ }
+
+ nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) msgs,
+ BOND_MODE_8023AX_RX_RING_SIZE);
+
+ for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
+ port = &data->port_list[port_num];
+ if ((port->actor.key &
+ rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
+
+ SM_FLAG_SET(port, BEGIN);
+
+ /* LACP is disabled on half duples or link is down */
+ if (SM_FLAG(port, LACP_ENABLED)) {
+ /* If port was enabled set it to BEGIN state */
+ SM_FLAG_CLR(port, LACP_ENABLED);
+ ACTOR_STATE_CLR(port, DISTRIBUTING);
+ ACTOR_STATE_CLR(port, COLLECTING);
+ slaves_changed++;
+ }
+
+ MODE4_DEBUG("Port %u is not LACP capable!\n",
+ internals->active_slaves[port_num]);
+ /* Skip this port processing */
+ continue;
+ }
+
+ SM_FLAG_SET(port, LACP_ENABLED);
+ machines_invoked = false;
+ /* Find LACP packet */
+ for (j = 0; j < nb_msgs; j++) {
+ if (msgs[j] == NULL || msgs[j]->port_id !=
+ internals->active_slaves[port_num])
+ continue;
+
+ slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt,
+ struct slow_protocol_frame *);
+
+ if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP) {
+ /* This is LACP frame so pass it to rx_machine */
+ struct lacpdu *lacp = (struct lacpdu *)&slow_hdr->slow_protocol;
+ /* Invoke state machines on every active slave port */
+ rx_machine(internals, port_num, lacp);
+ periodic_machine(internals, port_num);
+ slaves_changed += mux_machine(internals, port_num);
+ tx_machine(bond_dev, port_num);
+ selection_logic(internals, port_num);
+
+ machines_invoked = true;
+ } else if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_MARKER) {
+ struct marker *marker;
+
+ marker = (struct marker *) &slow_hdr->slow_protocol;
+ if (marker->tlv_type_marker == MARKER_TLV_TYPE_MARKER_INFO) {
+ /* Reuse received packet to send frame to Marker Responder
+ */
+ marker->tlv_type_marker = MARKER_TLV_TYPE_MARKER_RESP;
+
+ /* Update source MAC, destination MAC is multicast so we
+ * don't update it */
+ mac_address_get(bond_dev, &slow_hdr->eth_hdr.s_addr);
+
+ if (rte_ring_enqueue(data->tx_ring, msgs[j]) == -ENOBUFS) {
+ RTE_LOG(ERR, PMD,
+ "Failed to enqueue packet into tx ring");
+ rte_pktmbuf_free(msgs[j]->pkt);
+ rte_ring_enqueue(data->free_ring, msgs[j]);
+ }
+
+ msgs[j] = NULL;
+ }
+ }
+ }
+
+ if (machines_invoked == false) {
+ rx_machine(internals, port_num, NULL);
+ periodic_machine(internals, port_num);
+ slaves_changed += mux_machine(internals, port_num);
+ tx_machine(bond_dev, port_num);
+ selection_logic(internals, port_num);
+ machines_invoked = true;
+ }
+
+ SM_FLAG_CLR(port, BEGIN);
+ }
+
+ /* Update mux if something changed */
+ if (slaves_changed > 0) {
+ update_mux_slaves(internals);
+ MODE4_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n",
+ data->distibuting_slaves_count,
+ data->distibuting_slaves_offsets[0],
+ data->distibuting_slaves_count > 0 ? " " : "\b\b",
+ data->distibuting_slaves_offsets[1],
+ data->distibuting_slaves_count > 1 ? " " : "\b\b",
+ data->distibuting_slaves_offsets[2],
+ data->distibuting_slaves_count > 2 ? " " : "\b\b",
+ data->distibuting_slaves_offsets[3],
+ data->distibuting_slaves_count > 3 ? " " : "\b\b",
+ data->distibuting_slaves_count > 4 ? "..." : "");
+ }
+
+ /* Free packets that was not reused */
+ for (port_num = 0; port_num < nb_msgs; port_num++) {
+ if (msgs[port_num] != NULL) {
+ rte_pktmbuf_free(msgs[port_num]->pkt);
+ rte_ring_enqueue(data->free_ring, msgs[port_num]);
+ }
+ }
+
+ rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
+ bond_mode_8023ad_periodic_cb, arg);
+}
+
+static void
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_idx)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct mode8023ad_data *data = &internals->mode4;
+
+ struct port *port = &data->port_list[internals->active_slave_count];
+ struct port_params initial = {
+ .system = { { 0 } },
+ .system_priority = rte_cpu_to_be_16(0xFFFF),
+ .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
+ .port_priority = rte_cpu_to_be_16(0x00FF),
+ .port_number = 0,
+ };
+
+ uint8_t slave_id = internals->active_slaves[slave_idx];
+
+ memcpy(&port->actor, &initial, sizeof(struct port_params));
+ port->actor.port_number = slave_id_to_port_number(slave_id);
+
+ memcpy(&port->partner, &initial, sizeof(struct port_params));
+
+ /* default states */
+ port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
+ port->partner_state = STATE_LACP_ACTIVE;
+ port->sm_flags = SM_FLAGS_BEGIN;
+
+ /* use this port as agregator */
+ port->agregator_idx = slave_idx;
+
+ rte_eth_promiscuous_enable(slave_id);
+}
+
+void
+bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+
+ bond_mode_8023ad_activate_slave(bond_dev, internals->active_slave_count);
+}
+
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
+ uint8_t slave_pos)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct mode8023ad_data *data = &internals->mode4;
+ struct port *port;
+ uint8_t i;
+
+ bond_mode_8023ad_stop(bond_dev);
+
+ /* Exclude slave from transmit policy. If this slave is an aggregator
+ * make all aggregated slaves unselected to force sellection logic
+ * to select suitable aggregator for this port */
+ for (i = 0; i < internals->active_slave_count; i++) {
+ port = &data->port_list[slave_pos];
+ if (port->agregator_idx == slave_pos) {
+ port->selected = UNSELECTED;
+ port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
+ STATE_COLLECTING);
+
+ /* Use default aggregator */
+ port->agregator_idx = i;
+ }
+ }
+
+ port = &data->port_list[slave_pos];
+
+ update_mux_slaves(internals);
+
+ /* Remove slave port config */
+ if (slave_pos + 1 < internals->active_slave_count) {
+ memmove(&data->port_list[slave_pos],
+ &data->port_list[slave_pos + 1],
+ sizeof(data->port_list[0]) * (internals->active_slave_count -
+ slave_pos - 1));
+ }
+
+ if (bond_dev->data->dev_started)
+ return bond_mode_8023ad_start(bond_dev);
+
+ return 0;
+}
+
+int
+bond_mode_8023ad_init(struct rte_eth_dev *bond_dev)
+{
+ struct bond_dev_private *internals = bond_dev->data->dev_private;
+ struct mode8023ad_data *data = &internals->mode4;
+ char mem_name[RTE_ETH_NAME_MAX_LEN];
+ int socket_id = bond_dev->pci_dev->numa_node;
+ uint8_t i;
+
+ if (data->mbuf_pool == NULL) {
+ const uint16_t element_size = sizeof(struct slow_protocol_frame) +
+ sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+
+ snprintf(mem_name, sizeof(mem_name), "%s_POOL", bond_dev->data->name);
+ data->mbuf_pool = rte_mempool_create(mem_name,
+ /* FIXME: How big memory pool should be? If driver will not
+ * free packets quick enough there will be ENOMEM in tx_machine.
+ * For now give 512 packets per slave. Hope it will be enough. */
+ (BOND_MODE_8023AX_TX_RING_SIZE + 1) * 512 * RTE_MAX_ETHPORTS,
+ element_size,
+ RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
+ NULL, rte_pktmbuf_init, NULL, socket_id, 0);
+
+ /* Any memory allocation failure in initalization is critical because
+ * resources can't be free, so reinitialization is impossible. */
+ if (data->mbuf_pool == NULL) {
+ RTE_LOG(ERR, PMD, "%s: Failed to initialize LACP rx ring\n",
+ bond_dev->data->name);
+
+ rte_panic("Failed to alocate memory pool ('%s')\n"
+ "for bond device '%s'\n", mem_name, bond_dev->data->name);
+ }
+
+ /* Setup ring for free messages that can be used in RX/TX burst */
+ snprintf(mem_name, sizeof(mem_name), "%s_free", bond_dev->data->name);
+
+ uint16_t free_cnt = BOND_MODE_8023AX_RX_RING_SIZE +
+ BOND_MODE_8023AX_TX_RING_SIZE;
+
+ data->free_ring = rte_ring_create(mem_name, free_cnt, socket_id, 0);
+
+ if (data->free_ring == NULL) {
+ rte_panic("%s: Failed to create slow messages free ring\n",
+ bond_dev->data->name);
+ }
+
+ for (i = 0; i < free_cnt; i++) {
+ struct slow_protocol_msg *msg;
+
+ snprintf(mem_name, sizeof(mem_name), "%s_slow_msg_%u",
+ bond_dev->data->name, i);
+
+ msg = (struct slow_protocol_msg *) rte_malloc_socket(mem_name,
+ sizeof(struct slow_protocol_msg), 0, socket_id);
+
+ if (msg == NULL) {
+ rte_panic("%s: Failed to allocate slow message\n",
+ bond_dev->data->name);
+ }
+
+ rte_ring_enqueue(data->free_ring, msg);
+ }
+
+ /* Setup rings for usage in rx/tx bursts and machines state
+ * call back */
+ snprintf(mem_name, sizeof(mem_name), "%s_rx", bond_dev->data->name);
+ data->rx_ring = rte_ring_create(mem_name,
+ BOND_MODE_8023AX_RX_RING_SIZE, socket_id, 0);
+
+ if (data->rx_ring == NULL) {
+ rte_panic("%s: Failed to create slow messages rx ring\n",
+ bond_dev->data->name);
+ }
+
+ snprintf(mem_name, sizeof(mem_name), "%s_tx", bond_dev->data->name);
+ data->tx_ring = rte_ring_create(mem_name, BOND_MODE_8023AX_TX_RING_SIZE,
+ socket_id, RING_F_SP_ENQ);
+
+ if (data->tx_ring == NULL) {
+ rte_panic("%s: Failed to create slow messages tx ring\n",
+ bond_dev->data->name);
+ }
+ }
+
+ data->distibuting_slaves_count = 0;
+
+ for (i = 0; i < internals->active_slave_count; i++)
+ bond_mode_8023ad_activate_slave(bond_dev, i);
+
+ return 0;
+}
+
+int
+bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
+{
+ return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
+ &bond_mode_8023ad_periodic_cb, bond_dev);
+}
+
+int
+bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
+{
+ if (rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev))
+ return 0;
+
+ return -ENOENT;
+}
+
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+ uint8_t slave_pos, struct rte_mbuf *slot_pkt)
+{
+ struct mode8023ad_data *data;
+ struct slow_protocol_msg *msg = NULL;
+ int retval;
+
+ data = &internals->mode4;
+
+ if (unlikely(rte_ring_dequeue(data->free_ring, (void **)&msg) ==
+ -ENOBUFS)) {
+ rte_pktmbuf_free(slot_pkt);
+ return;
+ }
+
+ msg->pkt = slot_pkt;
+ msg->port_id = internals->active_slaves[slave_pos];
+
+ retval = rte_ring_enqueue(data->rx_ring, msg);
+ if (unlikely(retval == -ENOBUFS)) {
+ /* If RX fing full free lacpdu message and drop packet */
+ rte_ring_enqueue(data->free_ring, msg);
+ rte_pktmbuf_free(slot_pkt);
+ }
+}
diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
new file mode 100644
index 0000000..df250bb
--- /dev/null
+++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
@@ -0,0 +1,405 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_ETH_BOND_8023AD_H_
+#define RTE_ETH_BOND_8023AD_H_
+
+#include <stdint.h>
+
+#include <rte_ether.h>
+#include <rte_byteorder.h>
+#include <rte_spinlock.h>
+
+typedef int bool;
+
+#define true 1
+#define false 0
+
+/**
+ * Timeouts deffinitions (5.4.4 in 802.1AX documentation).
+ */
+#define BOND_8023AD_FAST_PERIODIC_MS 1000
+#define BOND_8023AD_SLOW_PERIODIC_MS 30000
+#define BOND_8023AD_SHORT_TIMEOUT_MS 3000
+#define BOND_8023AD_LONG_TIMEOUT_MS 90000
+#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000
+#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000
+#define BOND_8023AD_TX_PERIOD_MS 333
+/**
+ * Actor/partner states
+ */
+#define STATE_LACP_ACTIVE 0x01
+#define STATE_LACP_SHORT_TIMEOUT 0x02
+#define STATE_AGGREGATION 0x04
+#define STATE_SYNCHRONIZATION 0x08
+#define STATE_COLLECTING 0x10
+#define STATE_DISTRIBUTING 0x20
+/** Partners parameters are defaulted */
+#define STATE_DEFAULTED 0x40
+#define STATE_EXPIRED 0x80
+
+/**
+ * State machine flags
+ */
+#define SM_FLAGS_BEGIN 0x0001
+#define SM_FLAGS_LACP_ENABLED 0x0002
+#define SM_FLAGS_ACTOR_CHURN 0x0004
+#define SM_FLAGS_PARTNER_CHURN 0x0008
+#define SM_FLAGS_MOVED 0x0100
+#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200
+#define SM_FLAGS_NTT 0x0400
+
+#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100
+#define BOND_MODE_8023AX_RX_RING_SIZE (2 * RTE_MAX_ETHPORTS)
+#define BOND_MODE_8023AX_TX_RING_SIZE (2 * RTE_MAX_ETHPORTS)
+
+#define BOND_LINK_FULL_DUPLEX_KEY 0x01
+#define BOND_LINK_SPEED_KEY_10M 0x02
+#define BOND_LINK_SPEED_KEY_100M 0x04
+#define BOND_LINK_SPEED_KEY_1000M 0x08
+#define BOND_LINK_SPEED_KEY_10G 0x10
+#define BOND_LINK_SPEED_KEY_20G 0x11
+#define BOND_LINK_SPEED_KEY_40G 0x12
+
+#define SUBTYPE_LACP 0x01
+
+#define TLV_TYPE_ACTOR_INFORMATION 0x01
+#define TLV_TYPE_PARTNER_INFORMATION 0x02
+#define TLV_TYPE_COLLECTOR_INFORMATION 0x03
+#define TLV_TYPE_TERMINATOR_INFORMATION 0x00
+
+#define CHECK_FLAGS(_variable, _flags) ((_variable) & (_flags))
+#define SET_FLAGS(_variable, _flags) ((_variable) |= (_flags))
+#define CLEAR_FLAGS(_variable, _flags) ((_variable) &= ~(_flags))
+
+#define SM_FLAG(port, flag) (!!CHECK_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag))
+#define SM_FLAG_SET(port, flag) SET_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)
+#define SM_FLAG_CLR(port, flag) CLEAR_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)
+
+#define ACTOR_STATE(port, flag) (!!CHECK_FLAGS((port)->actor_state, STATE_ ## flag))
+#define ACTOR_STATE_SET(port, flag) SET_FLAGS((port)->actor_state, STATE_ ## flag)
+#define ACTOR_STATE_CLR(port, flag) CLEAR_FLAGS((port)->actor_state, STATE_ ## flag)
+
+#define PARTNER_STATE(port, flag) (!!CHECK_FLAGS((port)->partner_state, STATE_ ## flag))
+#define PARTNER_STATE_SET(port, flag) SET_FLAGS((port)->partner_state, STATE_ ## flag)
+#define PARTNER_STATE_CLR(port, flag) CLEAR_FLAGS((port)->partner_state, STATE_ ## flag)
+
+/** Slow protocol LACP frame subtype */
+#define SLOW_SUBTYPE_LACP 0x01
+
+/** Slow procotol marker frame subtype */
+#define SLOW_SUBTYPE_MARKER 0x02
+
+/** Marker type info request */
+#define MARKER_TLV_TYPE_MARKER_INFO 0x01
+
+/** Marker type info response */
+#define MARKER_TLV_TYPE_MARKER_RESP 0x02
+
+/** Generic slow protocol structure */
+struct slow_protocol {
+ uint8_t subtype;
+ uint8_t reserved_119[119];
+} __attribute__((__packed__));
+
+/** Generic slow protocol frame type structure */
+struct slow_protocol_frame {
+ struct ether_hdr eth_hdr;
+ struct slow_protocol slow_protocol;
+} __attribute__((__packed__));
+
+struct port_params {
+ uint16_t system_priority;
+ /**< System priority (unused in current implementation) */
+ struct ether_addr system;
+ /**< System ID - Slave MAC address, same as bonding MAC address */
+ uint16_t key;
+ /**< Speed information (implementation dependednt) and duplex. */
+ uint16_t port_priority;
+ /**< Priority of this (unused in current implementation) */
+ uint16_t port_number;
+ /**< Port number. It corresponds to slave port id. */
+} __attribute__((__packed__));
+
+struct lacpdu_actor_partner_params {
+ uint8_t tlv_type_info;
+ uint8_t info_length;
+ struct port_params port_params;
+ uint8_t state;
+ uint8_t reserved_3[3];
+} __attribute__((__packed__));
+
+/** LACPDU structure (5.4.2 in 802.1AX documentation). */
+struct lacpdu {
+ uint8_t subtype;
+ uint8_t version_number;
+
+ struct lacpdu_actor_partner_params actor;
+ struct lacpdu_actor_partner_params partner;
+
+ uint8_t tlv_type_collector_info;
+ uint8_t collector_info_length;
+ uint16_t collector_max_delay;
+ uint8_t reserved_12[12];
+
+ uint8_t tlv_type_terminator;
+ uint8_t terminator_length;
+ uint8_t reserved_50[50];
+} __attribute__((__packed__));
+
+/** LACPDU frame: Contains ethernet header and LACPDU. */
+struct lacpdu_header {
+ struct ether_hdr eth_hdr;
+ struct lacpdu lacpdu;
+} __attribute__((__packed__));
+
+struct marker {
+ uint8_t subtype;
+ uint8_t version_number;
+
+ uint8_t tlv_type_marker;
+ uint8_t info_length;
+ uint16_t requester_port;
+ struct ether_addr requester_system;
+ uint32_t requester_transaction_id;
+ uint8_t reserved_2[2];
+
+ uint8_t tlv_type_terminator;
+ uint8_t terminator_length;
+ uint8_t reserved_90[90];
+} __attribute__((__packed__));
+
+struct marker_header {
+ struct ether_hdr eth_hdr;
+ struct marker marker;
+} __attribute__((__packed__));
+
+/** Variables associated with the system (5.4.5 in 802.1AX documentation). */
+struct system {
+ struct ether_addr actor_system;
+ /**< The MAC address component of the System Identifier of the System */
+ uint16_t actor_system_priority;
+ /**< The System Priority of the System */
+};
+
+enum selection {
+ UNSELECTED,
+ STANDBY,
+ SELECTED
+};
+
+/** Variables associated with each port (5.4.7 in 802.1AX documentation). */
+struct port {
+ /**
+ * The operational values of the Actor's state parameters. Bitmask
+ * of port states.
+ */
+ uint8_t actor_state;
+
+ /** The operational Actor's port parameters */
+ struct port_params actor;
+
+ /**
+ * The operational value of the Actor's view of the current values of
+ * the Partner's state parameters. The Actor sets this variable either
+ * to the value received from the Partner in an LACPDU, or to the value
+ * of Partner_Admin_Port_State. Bitmask of port states.
+ */
+ uint8_t partner_state;
+
+ /** The operational Partner's port parameters */
+ struct port_params partner;
+
+ /* Additional port parameters not listed in documentation */
+ /** State machine flags */
+ uint16_t sm_flags;
+ enum selection selected;
+
+ uint64_t current_while_timer;
+ uint64_t periodic_timer;
+ uint64_t wait_while_timer;
+ uint64_t tx_machine_timer;
+ /* Agregator parameters */
+ /**
+ * Index in mode8023ad_data::port_list[] of Aggregator
+ * the port is currently attached to.
+ */
+ uint16_t agregator_idx;
+};
+
+
+/**
+ * Struct used to comunicate with 8023ad logic.
+ */
+struct slow_protocol_msg {
+ struct rte_mbuf *pkt;
+ uint8_t port_id;
+};
+
+/** Data specific to mode 802.1AX */
+struct mode8023ad_data {
+ /** Memory pool used to allocated rings */
+ struct rte_mempool *mbuf_pool;
+
+ /** Ring containing free slow_protocol_msg objects. Used to avoid
+ * alocating/freeing memory in RX/TX bursts */
+ struct rte_ring *free_ring;
+
+ /** Ring of struct slow_protocol_msg from RX burst function */
+ struct rte_ring *rx_ring;
+
+ /** Ring of struct slow_protocol_msg to RX burst function */
+ struct rte_ring *tx_ring;
+
+ /** list of all enslaved ports in mode 802.1AX */
+ struct port port_list[RTE_MAX_ETHPORTS];
+
+ /** List of offsets in active slaves array used to tansmit packets. */
+ uint8_t distibuting_slaves_offsets[RTE_MAX_ETHPORTS];
+ uint8_t distibuting_slaves_count;
+};
+
+/* Forward declaration */
+struct bond_dev_private;
+
+/**
+ * Configures 802.1AX mode and all active slaves on bonded interface.
+ *
+ * @param dev Bonded interface
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int
+bond_mode_8023ad_init(struct rte_eth_dev *dev);
+
+/**
+ * Deconfigures 802.1AX mode of the bonded interface and slaves.
+ *
+ * @param dev Bonded interface
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+int bond_mode_8023ad_disable(struct rte_eth_dev *dev);
+
+/**
+ * Starts 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ * 0 if machines was started, 1 if machines was already running,
+ * negative value otherwise.
+ */
+int
+bond_mode_8023ad_start(struct rte_eth_dev *dev);
+
+/**
+ * Stops 802.3AX state machines management logic.
+ * @param dev Bonded interface
+ * @return
+ * 0 if this call stopped state machines, -ENOENT if alarm was not set.
+ */
+int
+bond_mode_8023ad_stop(struct rte_eth_dev *dev);
+
+/**
+ * Passes given slow packet to state machines management logic.
+ * @param internals Bonded device private data.
+ * @param slave_pos Possition in active slaves array on which this packet was received.
+ * @param slot_pkt Slow packet
+ */
+void
+bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
+ uint8_t slave_pos, struct rte_mbuf *slot_pkt);
+
+/**
+ * Appends and initializes slave active_slaves[slave_num] to use with
+ * 802.1AX mode.
+ *
+ * @pre active_slaves[active_slave_count] must contain valid slave id.
+ * @post active_slave_count must be incremented.
+ *
+ * @param dev Bonded interface.
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ */
+void
+bond_mode_8023ad_slave_append(struct rte_eth_dev *dev);
+
+/**
+ * Denitializes and removes given slave from 802.1AX mode.
+ *
+ * @pre active_slaves[slave_num] must contain valid slave id corresponding to
+ * slave initialized in 802.1AX mode.
+ * @post active_slaves[slave_num] must be removed.
+ *
+ * @param dev Bonded interface.
+ * @param slave_num Position of slave in active_slaves array
+ *
+ * @return
+ * 0 on success, negative value otherwise.
+ *
+ */
+int
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
+
+/**
+ * Converts port_number from network byte order to port id.
+ *
+ * @param port_number The 8023ad port number to convert.
+ * @return corresponding slave id
+ */
+static inline uint8_t
+port_number_to_slave_id(uint16_t port_number)
+{
+ uint16_t port_id = rte_be_to_cpu_16(port_number);
+ /* Standard requires that port number must be grater than 0.
+ * Substract 1 to get corresponding slave id */
+ return port_id - 1;
+}
+
+/**
+ * Converts port id to mode 8023ad port number.
+ *
+ * @param slave_id Id of slave to convert.
+ * @return corresponding Port number in network byte order.
+ */
+static inline uint16_t
+slave_id_to_port_number(uint8_t slave_id)
+{
+ /* Standard requires that port ID must be grater than 0.
+ * Add 1 do get corresponding port_number */
+ uint16_t port_number = (uint16_t)slave_id + 1;
+ return rte_cpu_to_be_16(port_number);
+}
+
+#endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
index c690ceb..c547164 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <string.h>
+
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
@@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id)
return 0;
}
+void
+activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
+
+ internals->active_slaves[active_count] = port_id;
+
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_slave_append(eth_dev);
+
+ internals->active_slave_count = active_count + 1;
+}
+
+void
+deactivate_slave(struct rte_eth_dev *eth_dev,
+ uint8_t slave_pos)
+{
+ struct bond_dev_private *internals = eth_dev->data->dev_private;
+ uint8_t active_count = internals->active_slave_count;
+
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
+
+ active_count--;
+
+ /* If slave was not at the end of the list
+ * shift active slaves up active array list */
+ if (slave_pos < active_count) {
+ memmove(internals->active_slaves + slave_pos,
+ internals->active_slaves + slave_pos + 1,
+ (active_count - slave_pos) *
+ sizeof(internals->active_slaves[0]));
+ }
+
+ internals->active_slave_count = active_count;
+}
+
uint8_t
number_of_sockets(void)
{
@@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
eth_dev->dev_ops = &default_dev_ops;
eth_dev->pci_dev = pci_dev;
- if (bond_ethdev_mode_set(eth_dev, mode)) {
- RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
- eth_dev->data->port_id, mode);
- goto err;
- }
-
+ internals->port_id = eth_dev->data->port_id;
+ internals->mode = BONDING_MODE_INVALID;
internals->current_primary_port = 0;
internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
internals->user_defined_mac = 0;
@@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
memset(internals->slaves, 0, sizeof(internals->slaves));
+ if (bond_ethdev_mode_set(eth_dev, mode)) {
+ RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
+ eth_dev->data->port_id, mode);
+ goto err;
+ }
+
return eth_dev->data->port_id;
err:
@@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
rte_eth_link_get_nowait(slave_port_id, &link_props);
if (link_props.link_status == 1)
- internals->active_slaves[internals->active_slave_count++] =
- slave_port_id;
+ activate_slave(bonded_eth_dev, slave_port_id);
}
return 0;
}
-
int
rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
{
@@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
return retval;
}
-
static int
__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
{
+ struct rte_eth_dev *bonded_eth_dev;
struct bond_dev_private *internals;
- int i, slave_idx = -1;
+ int i, slave_idx;
if (valid_slave_port_id(slave_port_id) != 0)
return -1;
- internals = rte_eth_devices[bonded_port_id].data->dev_private;
+ bonded_eth_dev = &rte_eth_devices[bonded_port_id];
+ internals = bonded_eth_dev->data->dev_private;
/* first remove from active slave list */
- for (i = 0; i < internals->active_slave_count; i++) {
- if (internals->active_slaves[i] == slave_port_id)
- slave_idx = i;
+ slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count,
+ slave_port_id);
- /* shift active slaves up active array list */
- if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
- internals->active_slaves[i] = internals->active_slaves[i+1];
- }
-
- if (slave_idx >= 0)
- internals->active_slave_count--;
+ if (slave_idx < internals->active_slave_count)
+ deactivate_slave(bonded_eth_dev, slave_idx);
slave_idx = -1;
/* now find in slave list */
@@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
return internals->current_primary_port;
}
+
int
rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
{
@@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
return internals->balance_xmit_policy;
}
-
int
rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
{
@@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
return internals->link_down_delay_ms;
}
-
int
rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
index bbbc69b..a0be0e6 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_args.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
@@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
case BONDING_MODE_ACTIVE_BACKUP:
case BONDING_MODE_BALANCE:
case BONDING_MODE_BROADCAST:
+ case BONDING_MODE_8023AD:
return 0;
default:
RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index 6d0fb1b..13630d9 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -44,6 +44,7 @@
#include "rte_eth_bond.h"
#include "rte_eth_bond_private.h"
+#include "rte_eth_bond_8023ad.h"
static uint16_t
bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
@@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue,
bufs, nb_pkts);
}
+static uint16_t
+bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ /* Cast to structure, containing bonded device's port id and queue id */
+ struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
+ struct bond_dev_private *internals = bd_rx_q->dev_private;
+ struct mode8023ad_data *mode4 = &internals->mode4;
+ struct ether_addr bond_mac;
+
+ struct ether_hdr *hdr;
+ struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */
+
+ uint16_t num_rx_slave = 0; /* Number of packet received on current slave */
+ uint16_t num_rx_total = 0; /* Total number of received packets */
+
+ uint8_t i, j;
+
+ rte_eth_macaddr_get(internals->port_id, &bond_mac);
+
+ for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) {
+ /* Read packets from this slave */
+ num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
+ bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total);
+
+ /* Separate slow protocol packets from other packets */
+ for (j = 0; j < num_rx_slave; j++) {
+ hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
+
+ uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
+ if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
+ bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
+ continue;
+ }
+
+ /* Check if we can receive this packet. Also filter packets if
+ * bonding interface is not in promiscuous mode (slaves are always
+ * in promiscuous mode). */
+ if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) &&
+ likely(internals->promiscuous_en ||
+ is_same_ether_addr(&bond_mac, &hdr->d_addr))) {
+ bufs[num_rx_total++] = pkts[j];
+ } else
+ rte_pktmbuf_free(pkts[j]);
+ }
+ }
+
+ return num_rx_total;
+}
+
static inline uint16_t
ether_hash(struct ether_hdr *eth_hdr)
{
@@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
}
static uint16_t
+bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
+ uint16_t nb_pkts)
+{
+ struct bond_dev_private *internals;
+ struct mode8023ad_data *mode4;
+ struct bond_tx_queue *bd_tx_q;
+
+ uint8_t num_of_slaves;
+ uint8_t slaves[RTE_MAX_ETHPORTS];
+ /* possitions in slaves, not ID */
+ uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
+ uint8_t distributing_slaves_count;
+
+ uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
+ uint16_t i, op_slave_idx;
+
+ /* Slow packets from 802.3AX state machines. */
+ struct slow_protocol_msg *slow_msg;
+
+ /* Allocate one additional packet in case 8023AD mode.
+ * First element if not NULL is slow packet. */
+ struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
+ /* Total amount of packets in slave_bufs */
+ uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
+ /* Array of slow packets placed in each slave */
+ uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
+
+ bd_tx_q = (struct bond_tx_queue *)queue;
+ internals = bd_tx_q->dev_private;
+ mode4 = &internals->mode4;
+
+ /* Copy slave list to protect against slave up/down changes during tx
+ * bursting */
+ num_of_slaves = internals->active_slave_count;
+ if (num_of_slaves < 1)
+ return num_tx_total;
+
+ memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
+
+ distributing_slaves_count = mode4->distibuting_slaves_count;
+ memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
+ sizeof(slaves[0]) * distributing_slaves_count);
+
+ for (i = 0; i < num_of_slaves; i++)
+ slave_bufs[i][0] = NULL;
+
+ /* It is likely that tx ring will be empty. If it is not empty, it is
+ * likely that there will be only one frame. */
+ while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
+ rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) {
+ i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id);
+
+ /* Assign slow packet to slave or drop it if slave is not in active list
+ * (ex: link down). */
+ if (likely(i < num_of_slaves)) {
+ /* If there is more than one slow packet to the same slave, send
+ * only latest, and drop previouse - tx burst was no called quick
+ * enough. */
+ if (slave_bufs[i][0] != NULL)
+ rte_pktmbuf_free(slave_bufs[i][0]);
+
+ slave_bufs[i][0] = slow_msg->pkt;
+ slave_nb_pkts[i] = 1;
+ slave_slow_packets[i] = 1;
+ } else
+ rte_pktmbuf_free(slow_msg->pkt);
+
+ rte_ring_enqueue(mode4->free_ring, slow_msg);
+ }
+
+ if (likely(distributing_slaves_count > 0)) {
+ /* Populate slaves mbuf with the packets which are to be sent on it */
+ for (i = 0; i < nb_pkts; i++) {
+ /* Select output slave using hash based on xmit policy */
+ op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count,
+ internals->balance_xmit_policy);
+
+ /* Populate slave mbuf arrays with mbufs for that slave. Use only
+ * slaves that are currently distributing. */
+ uint8_t slave_offset = distributing_offsets[op_slave_idx];
+ uint16_t pkt_pos = slave_nb_pkts[slave_offset];
+ slave_nb_pkts[slave_offset]++;
+
+ slave_bufs[slave_offset][pkt_pos] = bufs[i];
+ }
+ }
+
+ /* Send packet burst on each slave device */
+ for (i = 0; i < num_of_slaves; i++) {
+ if (slave_nb_pkts[i] > 0) {
+ num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+ slave_bufs[i], slave_nb_pkts[i]);
+
+ /* if tx burst fails move packets to end of bufs */
+ if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
+ uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
+
+ /* Free slow packet if it exists and not send. */
+ if (slave_slow_packets[i] != 0 && num_tx_slave == 0) {
+ rte_pktmbuf_free(slave_bufs[i][0]);
+ slave_tx_fail_count--;
+ }
+
+ tx_fail_total += slave_tx_fail_count;
+ memcpy(bufs[nb_pkts - tx_fail_total],
+ slave_bufs[i][num_tx_slave],
+ slave_tx_fail_count);
+ }
+
+ if (num_tx_slave > 0)
+ num_tx_slave -= slave_slow_packets[i];
+
+ num_tx_total += num_tx_slave;
+ }
+ }
+
+ return num_tx_total;
+}
+
+static uint16_t
bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
uint16_t nb_pkts)
{
@@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link,
}
int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
+{
+ struct ether_addr *mac_addr;
+
+ mac_addr = eth_dev->data->mac_addrs;
+
+ if (eth_dev == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
+ return -1;
+ }
+
+ if (dst_mac_addr == NULL) {
+ RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
+ return -1;
+ }
+
+ ether_addr_copy(mac_addr, dst_mac_addr);
+ return 0;
+}
+
+int
mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
{
struct ether_addr *mac_addr;
@@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
mac_addr = eth_dev->data->mac_addrs;
if (eth_dev == NULL) {
- RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
+ RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
return -1;
}
@@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
}
}
break;
+ case BONDING_MODE_8023AD:
+ break;
case BONDING_MODE_ACTIVE_BACKUP:
default:
for (i = 0; i < internals->slave_count; i++) {
@@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
break;
+ case BONDING_MODE_8023AD:
+ if (bond_mode_8023ad_init(eth_dev) != 0)
+ return -1;
+
+ eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
+ eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
+ break;
default:
return -1;
}
@@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
if (internals->user_defined_primary_port)
bond_ethdev_primary_set(internals, internals->primary_port);
+ if (internals->mode == BONDING_MODE_8023AD)
+ bond_mode_8023ad_start(eth_dev);
if (internals->link_status_polling_enabled)
rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
@@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
{
struct bond_dev_private *internals = eth_dev->data->dev_private;
+ if (internals->mode == BONDING_MODE_8023AD) {
+ struct mode8023ad_data *data = &internals->mode4;
+ struct slow_protocol_msg *msg;
+
+ bond_mode_8023ad_stop(eth_dev);
+ data->distibuting_slaves_count = 0;
+
+ /* Discard all messages to/from mode 4 state machines */
+ while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) {
+ rte_pktmbuf_free(msg->pkt);
+ rte_ring_enqueue(data->free_ring, msg);
+ }
+
+ while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) {
+ rte_pktmbuf_free(msg->pkt);
+ rte_ring_enqueue(data->free_ring, msg);
+ }
+ }
+
internals->active_slave_count = 0;
internals->link_status_polling_enabled = 0;
@@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
0, dev->pci_dev->numa_node);
if (bd_tx_q == NULL)
- return -1;
+ return -1;
bd_tx_q->queue_id = tx_queue_id;
bd_tx_q->dev_private = dev->data->dev_private;
@@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue)
rte_free(queue);
}
-
static void
bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
{
@@ -884,7 +1105,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
/* If device is currently being configured then don't check slaves link
* status, wait until next period */
- if (rte_spinlock_trylock(&internals->lock)){
+ if (rte_spinlock_trylock(&internals->lock)) {
for (i = 0; i < internals->slave_count; i++) {
if (internals->slaves[i].link_status_polling_enabled) {
slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
@@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
for (i = 0; i < internals->slave_count; i++)
rte_eth_promiscuous_enable(internals->slaves[i].port_id);
break;
+ /* In mode4 promiscus mode is managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
default:
rte_eth_promiscuous_enable(internals->current_primary_port);
-
}
}
@@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
int i;
internals->promiscuous_en = 0;
-
+
switch (internals->mode) {
/* Promiscuous mode is propagated to all slaves */
case BONDING_MODE_ROUND_ROBIN:
@@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
for (i = 0; i < internals->slave_count; i++)
rte_eth_promiscuous_disable(internals->slaves[i].port_id);
break;
+ /* In mode4 promiscus mode is set managed when slave is added/removed */
+ case BONDING_MODE_8023AD:
+ break;
/* Promiscuous mode is propagated only to primary slave */
case BONDING_MODE_ACTIVE_BACKUP:
default:
@@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
struct bond_dev_private *internals;
struct rte_eth_link link;
- int i, valid_slave = 0, active_pos = -1;
+ int i, valid_slave = 0;
+ uint8_t active_pos;
uint8_t lsc_flag = 0;
if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
@@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
return;
/* Search for port in active port list */
- for (i = 0; i < internals->active_slave_count; i++) {
- if (port_id == internals->active_slaves[i]) {
- active_pos = i;
- break;
- }
- }
+ active_pos = find_slave_by_id(internals->active_slaves,
+ internals->active_slave_count, port_id);
rte_eth_link_get_nowait(port_id, &link);
if (link.link_status) {
- if (active_pos >= 0)
+ if (active_pos < internals->active_slave_count)
return;
/* if no active slave ports then set this port to be primary port */
@@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
link_properties_set(bonded_eth_dev,
&(slave_eth_dev->data->dev_link));
}
- internals->active_slaves[internals->active_slave_count++] = port_id;
+
+ activate_slave(bonded_eth_dev, port_id);
/* If user has defined the primary port then default to using it */
if (internals->user_defined_primary_port &&
internals->primary_port == port_id)
bond_ethdev_primary_set(internals, port_id);
} else {
- if (active_pos < 0)
+ if (active_pos == internals->active_slave_count)
return;
/* Remove from active slave list */
- for (i = active_pos; i < (internals->active_slave_count - 1); i++)
- internals->active_slaves[i] = internals->active_slaves[i+1];
-
- internals->active_slave_count--;
+ deactivate_slave(bonded_eth_dev, active_pos);
/* No active slaves, change link status to down and reset other
* link properties */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
index 6db5144..77f7bb0 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_private.h
+++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
@@ -42,6 +42,7 @@ extern "C" {
#include <rte_spinlock.h>
#include "rte_eth_bond.h"
+#include "rte_eth_bond_8023ad.h"
#define PMD_BOND_SLAVE_PORT_KVARG ("slave")
#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
@@ -60,6 +61,8 @@ extern "C" {
#define RTE_BOND_LOG(lvl, msg, ...) \
RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__);
+#define BONDING_MODE_INVALID 0xFF
+
extern const char *pmd_bond_init_valid_arguments[];
extern const char *driver_name;
@@ -89,7 +92,13 @@ struct bond_tx_queue {
/**< Copy of TX configuration structure for queue */
};
-
+/** Persisted Slave Configuration Structure */
+struct slave_conf {
+ uint8_t port_id;
+ /**< Port Id of slave eth_dev */
+ struct ether_addr mac_addr;
+ /**< Slave eth_dev original MAC address */
+};
/** Bonded slave devices structure */
struct bond_ethdev_slave_ports {
uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
@@ -124,7 +133,7 @@ struct bond_dev_private {
uint8_t user_defined_mac;
/**< Flag for whether MAC address is user defined or not */
uint8_t promiscuous_en;
- /**< Enabled/disable promiscuous mode on slave devices */
+ /**< Enabled/disable promiscuous mode on bonding device */
uint8_t link_props_set;
/**< flag to denote if the link properties are set */
@@ -143,6 +152,9 @@ struct bond_dev_private {
uint8_t slave_count; /**< Number of bonded slaves */
struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
/**< Arary of bonded slaves details */
+
+ struct mode8023ad_data mode4;
+ /**< Mode 4 private data */
};
extern struct eth_dev_ops default_dev_ops;
@@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops;
int
valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
+/* Search given slave array to find possition of given id.
+ * Return slave pos or slaves_count if not found. */
+static inline uint8_t
+find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
+ uint8_t slave_id ) {
+
+ uint8_t pos;
+ for (pos = 0; pos < slaves_count; pos++) {
+ if (slave_id == slaves[pos])
+ break;
+ }
+
+ return pos;
+}
+
int
valid_port_id(uint8_t port_id);
@@ -160,6 +187,14 @@ int
valid_slave_port_id(uint8_t port_id);
void
+deactivate_slave(struct rte_eth_dev *eth_dev,
+ uint8_t slave_pos );
+
+void
+activate_slave(struct rte_eth_dev *eth_dev,
+ uint8_t port_id );
+
+void
link_properties_set(struct rte_eth_dev *bonded_eth_dev,
struct rte_eth_link *slave_dev_link);
void
@@ -173,6 +208,9 @@ int
mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
int
+mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
+
+int
mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
uint8_t
--
1.7.9.5
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
2014-09-29 13:51 ` Jastrzebski, MichalX K
@ 2014-09-30 11:17 ` Wodkowski, PawelX
0 siblings, 0 replies; 5+ messages in thread
From: Wodkowski, PawelX @ 2014-09-30 11:17 UTC (permalink / raw)
To: Jastrzebski, MichalX K, dev
Fixed patch version sent.
Pawel
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
2014-09-30 6:19 ` Pawel Wodkowski
@ 2014-09-30 19:06 ` Neil Horman
0 siblings, 0 replies; 5+ messages in thread
From: Neil Horman @ 2014-09-30 19:06 UTC (permalink / raw)
To: Pawel Wodkowski; +Cc: dev
On Tue, Sep 30, 2014 at 07:19:33AM +0100, Pawel Wodkowski wrote:
> This patch adds support mode 4 of link bonding. It depend on Delcan Doherty
> patches v3 and rte alarms patch v2 or above.
>
> New version handles race issues with setting/cancelin callbacks,
> fixes promiscus mode setting in mode 4 and some other minor errors in mode 4
> implementation.
>
>
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Looks good.
Acked-by: Neil Horman <nhorman@tuxdriver.com>
> ---
> lib/librte_ether/rte_ether.h | 1 +
> lib/librte_pmd_bond/Makefile | 1 +
> lib/librte_pmd_bond/rte_eth_bond.h | 4 +
> lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1070 ++++++++++++++++++++++++++++
> lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 405 +++++++++++
> lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++-
> lib/librte_pmd_bond/rte_eth_bond_args.c | 1 +
> lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 ++++++-
> lib/librte_pmd_bond/rte_eth_bond_private.h | 42 +-
> 9 files changed, 1821 insertions(+), 46 deletions(-)
> create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h
>
> diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
> index 2e08f23..1a3711b 100644
> --- a/lib/librte_ether/rte_ether.h
> +++ b/lib/librte_ether/rte_ether.h
> @@ -293,6 +293,7 @@ struct vlan_hdr {
> #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
> #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
> #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
> +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
>
> #ifdef __cplusplus
> }
> diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
> index 953d75e..c2312c2 100644
> --- a/lib/librte_pmd_bond/Makefile
> +++ b/lib/librte_pmd_bond/Makefile
> @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
> #
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
>
> #
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
> index 6811c7b..b0223c2 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,10 @@ extern "C" {
> /**< Broadcast (Mode 3).
> * In this mode all transmitted packets will be transmitted on all available
> * active slaves of the bonded. */
> +#define BONDING_MODE_8023AD (4)
> +/**< 802.3AD (Mode 4).
> + * In this mode transmission and reception of packets is managed by LACP
> + * protocol specified in 802.3AD documentation. */
>
> /* Balance Mode Transmit Policies */
> #define BALANCE_XMIT_POLICY_LAYER2 (0)
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> new file mode 100644
> index 0000000..de416c6
> --- /dev/null
> +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c
> @@ -0,0 +1,1070 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stddef.h>
> +#include <string.h>
> +
> +#include <rte_alarm.h>
> +#include <rte_malloc.h>
> +#include <rte_errno.h>
> +
> +#include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
> +
> +#include <rte_cycles.h>
> +
> +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
> +#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
> + bond_dbg_get_time_diff_ms(), internals->active_slaves[port_num], \
> + __FUNCTION__, ##__VA_ARGS__)
> +
> +static unsigned
> +bond_dbg_get_time_diff_ms(void)
> +{
> + static uint64_t start_time = 0;
> + uint64_t now;
> +
> + now = rte_rdtsc();
> + if (start_time == 0)
> + start_time = now;
> +
> + return ((now - start_time) * 1000) / rte_get_tsc_hz();
> +}
> +
> +static void
> +bond_print_lacp(struct lacpdu *l)
> +{
> + char a_address[18];
> + char p_address[18];
> + char a_state[256] = { 0 };
> + char p_state[256] = { 0 };
> +
> + static const char *state_labels[] = {
> + "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
> + };
> +
> + int a_len = 0;
> + int p_len = 0;
> + uint8_t i;
> + uint8_t *addr;
> +
> + addr = l->actor.port_params.system.addr_bytes;
> + snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
> + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
> +
> + addr = l->partner.port_params.system.addr_bytes;
> + snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
> + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
> +
> + for (i = 0; i < 8; i++) {
> + if ((l->actor.state >> i) & 1) {
> + a_len += snprintf(a_state + a_len, sizeof(a_state) - a_len, "%s ",
> + state_labels[i]);
> + }
> +
> + if ((l->partner.state >> i) & 1) {
> + p_len += snprintf(p_state + p_len, sizeof(p_state) - p_len, "%s ",
> + state_labels[i]);
> + }
> + }
> +
> + if (a_len && a_state[a_len-1] == ' ')
> + a_state[a_len-1] = '\0';
> +
> + if (p_len && p_state[p_len-1] == ' ')
> + p_state[p_len-1] = '\0';
> +
> + RTE_LOG(DEBUG, PMD, "LACP: {\n"\
> + " subtype= %02X\n"\
> + " ver_num=%02X\n"\
> + " actor={ tlv=%02X, len=%02X\n"\
> + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
> + " state={ %s }\n"\
> + " }\n"\
> + " partner={ tlv=%02X, len=%02X\n"\
> + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
> + " state={ %s }\n"\
> + " }\n"\
> + " collector={info=%02X, length=%02X, max_delay=%04X\n, " \
> + "type_term=%02X, terminator_length = %02X}\n",\
> + l->subtype,\
> + l->version_number,\
> + l->actor.tlv_type_info,\
> + l->actor.info_length,\
> + l->actor.port_params.system_priority,\
> + a_address,\
> + l->actor.port_params.key,\
> + l->actor.port_params.port_priority,\
> + l->actor.port_params.port_number,\
> + a_state,\
> + l->partner.tlv_type_info,\
> + l->partner.info_length,\
> + l->partner.port_params.system_priority,\
> + p_address,\
> + l->partner.port_params.key,\
> + l->partner.port_params.port_priority,\
> + l->partner.port_params.port_number,\
> + p_state,\
> + l->tlv_type_collector_info,\
> + l->collector_info_length,\
> + l->collector_max_delay,\
> + l->tlv_type_terminator,\
> + l->terminator_length);
> +
> +}
> +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
> +
> +#else
> +#define BOND_PRINT_LACP(lacpdu) do { } while (0)
> +#define MODE4_DEBUG(fmt, ...) do { } while (0)
> +#endif
> +
> +static const struct ether_addr lacp_mac_addr = {
> + .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
> +};
> +
> +static void
> +timer_cancel(uint64_t *timer)
> +{
> + *timer = 0;
> +}
> +
> +static void
> +timer_set(uint64_t *timer, uint64_t timeout_ms)
> +{
> + *timer = rte_rdtsc() + timeout_ms * rte_get_tsc_hz() / 1000;
> +}
> +
> +/* Forces given timer to be in expired state. */
> +static void
> +timer_force_expired(uint64_t *timer)
> +{
> + *timer = rte_rdtsc();
> +}
> +
> +static bool
> +timer_is_stopped(uint64_t *timer)
> +{
> + return *timer == 0;
> +}
> +
> +static bool
> +timer_is_expired(uint64_t *timer)
> +{
> + return *timer <= rte_rdtsc();
> +}
> +
> +/* Timer is in running state if it is not stopped nor expired */
> +static bool
> +timer_is_running(uint64_t *timer)
> +{
> + return !timer_is_stopped(timer) && !timer_is_expired(timer);
> +}
> +
> +static void
> +record_default(struct port *port)
> +{
> + /* Record default parametes for partner. Partner admin parameters
> + * are not implemented so set them to arbitrary default (last known) and
> + * mark actor that parner is in defaulted state. */
> + port->partner_state = STATE_LACP_ACTIVE;
> + ACTOR_STATE_SET(port, DEFAULTED);
> +}
> +
> +/** Function handles rx state machine.
> + *
> + * This function implements Receive State Machine from point 5.4.12 in
> + * 802.1AX documentation. It should be called periodically.
> + *
> + * @param lacpdu LACPDU received.
> + * @param port Port on which LACPDU was received.
> + */
> +static void
> +rx_machine(struct bond_dev_private *internals, uint8_t port_num,
> + struct lacpdu *lacp)
> +{
> + struct port *port = &internals->mode4.port_list[port_num];
> +
> + if (SM_FLAG(port, BEGIN)) {
> + /* Initialize stuff */
> + MODE4_DEBUG("-> INITIALIZE\n");
> + SM_FLAG_CLR(port, MOVED);
> + port->selected = UNSELECTED;
> +
> + record_default(port);
> +
> + ACTOR_STATE_CLR(port, EXPIRED);
> + timer_cancel(&port->current_while_timer);
> +
> + /* DISABLED: On initialization partner is out of sync */
> + PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +
> + /* LACP DISABLED stuff if LACP not enabled on this port */
> + if (!SM_FLAG(port, LACP_ENABLED))
> + PARTNER_STATE_CLR(port, AGGREGATION);
> + }
> +
> + if (!SM_FLAG(port, LACP_ENABLED)) {
> + /* Update parameters only if state changed */
> + if (!timer_is_stopped(&port->current_while_timer)) {
> + port->selected = UNSELECTED;
> + record_default(port);
> + PARTNER_STATE_CLR(port, AGGREGATION);
> + ACTOR_STATE_CLR(port, EXPIRED);
> + timer_cancel(&port->current_while_timer);
> + }
> + return;
> + }
> +
> + if (lacp) {
> + MODE4_DEBUG("LACP -> CURRENT\n");
> + BOND_PRINT_LACP(lacp);
> + /* Update selected flag. If partner parameters are defaulted assume they
> + * are match. If not defaulted compare LACP actor with ports parner
> + * params. */
> + if (!(port->actor_state & STATE_DEFAULTED) &&
> + (((port->partner_state ^ lacp->actor.state) & STATE_AGGREGATION) ||
> + memcmp(&port->partner, &lacp->actor.port_params,
> + sizeof(port->partner)) != 0)) {
> + MODE4_DEBUG("selected <- UNSELECTED\n");
> + port->selected = UNSELECTED;
> + }
> +
> + /* Record this PDU actor params as partner params */
> + memcpy(&port->partner, &lacp->actor.port_params,
> + sizeof(struct port_params));
> + port->partner_state = lacp->actor.state;
> +
> + /* Partner parameters are not defaulted any more */
> + ACTOR_STATE_CLR(port, DEFAULTED);
> +
> + /* Update NTT if partners information are outdated */
> + uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
> + STATE_SYNCHRONIZATION | STATE_AGGREGATION;
> +
> + if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
> + memcmp(&port->actor, &lacp->partner.port_params,
> + sizeof(struct port_params)) != 0) {
> + port->sm_flags |= SM_FLAGS_NTT;
> + }
> +
> + /* If LACP partner params match this port actor params */
> + if (memcmp(&port->actor, &lacp->partner.port_params,
> + sizeof(port->actor)) == 0 &&
> + (port->partner_state & STATE_AGGREGATION) == (port->actor_state
> + & STATE_AGGREGATION))
> + PARTNER_STATE_SET(port, SYNCHRONIZATION);
> + else if (!(port->partner_state & STATE_AGGREGATION) &&
> + (port->actor_state & STATE_AGGREGATION))
> + PARTNER_STATE_SET(port, SYNCHRONIZATION);
> + else
> + PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> +
> + if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
> + timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
> + else
> + timer_set(&port->current_while_timer, BOND_8023AD_LONG_TIMEOUT_MS);
> +
> + ACTOR_STATE_CLR(port, EXPIRED);
> + return; /* No state change */
> + }
> +
> + /* If CURRENT state timer is not running (stopped or expired)
> + * transit to EXPIRED state from DISABLED or CURRENT */
> + if (!timer_is_running(&port->current_while_timer)) {
> + ACTOR_STATE_SET(port, EXPIRED);
> + PARTNER_STATE_CLR(port, SYNCHRONIZATION);
> + PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
> + timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS);
> + }
> +}
> +
> +/**
> + * Function handles periodic tx state machine.
> + *
> + * Function implements Periodic Transmission state machine from point 5.4.13
> + * in 802.1AX documentation. It should be called periodically.
> + *
> + * @param port Port to handle state machine.
> + */
> +static void
> +periodic_machine(struct bond_dev_private *internals, uint8_t port_num)
> +{
> + struct port *port = &internals->mode4.port_list[port_num];
> + /* Calculate if either site is LACP enabled */
> + uint32_t timeout;
> + uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
> + PARTNER_STATE(port, LACP_ACTIVE);
> +
> + uint8_t is_partner_fast, was_partner_fast;
> + /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
> + if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) ||
> + active == 0) {
> + timer_cancel(&port->periodic_timer);
> + timer_force_expired(&port->tx_machine_timer);
> + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
> +
> + MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
> + SM_FLAG(port, BEGIN) ? "begind " : "",
> + SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
> + active ? "LACP active " : "LACP pasive ");
> + return;
> + }
> +
> + is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
> + was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
> +
> + /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
> + * Other case: check if timer expire or partners settings changed. */
> + if (!timer_is_stopped(&port->periodic_timer)) {
> + if (timer_is_expired(&port->periodic_timer)) {
> + SM_FLAG_SET(port, NTT);
> + } else if (is_partner_fast != was_partner_fast) {
> + /* Partners timeout was slow and now it is fast -> send LACP.
> + * In other case (was fast and now it is slow) just switch
> + * timeout to slow without forcing send of LACP (because standard
> + * say so)*/
> + if (!is_partner_fast)
> + SM_FLAG_SET(port, NTT);
> + } else
> + return; /* Nothing changed */
> + }
> +
> + /* Handle state transition to FAST/SLOW LACP timeout */
> + if (is_partner_fast) {
> + timeout = BOND_8023AD_FAST_PERIODIC_MS;
> + SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
> + } else {
> + timeout = BOND_8023AD_SLOW_PERIODIC_MS;
> + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
> + }
> +
> + timer_set(&port->periodic_timer, timeout);
> +}
> +
> +/**
> + * Function handles mux state machine.
> + *
> + * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
> + * It should be called periodically.
> + *
> + * @param port Port to handle state machine.
> + */
> +static int
> +mux_machine(struct bond_dev_private *internals, uint8_t port_num)
> +{
> + bool ntt = false;
> + struct port *port = &internals->mode4.port_list[port_num];
> +
> + /* Save current state for later use */
> + const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
> + STATE_COLLECTING;
> +
> + /* Enter DETACHED state on BEGIN condition or from any other state if
> + * port was unselected */
> + if (SM_FLAG(port, BEGIN) ||
> + port->selected == UNSELECTED || (port->selected == STANDBY &&
> + (port->actor_state & state_mask) != 0)) {
> + /* detach mux from aggregator not used */
> + port->actor_state &= ~state_mask;
> + /* Set ntt to true if BEGIN condition or transition from any other state
> + * which is indicated that wait_while_timer was started */
> + if (SM_FLAG(port, BEGIN) ||
> + !timer_is_stopped(&port->wait_while_timer)) {
> + SM_FLAG_SET(port, NTT);
> + MODE4_DEBUG("-> DETACHED\n");
> + }
> + timer_cancel(&port->wait_while_timer);
> + }
> +
> + if (timer_is_stopped(&port->wait_while_timer)) {
> + if (port->selected == SELECTED || port->selected == STANDBY) {
> + timer_set(&port->wait_while_timer,
> + BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS);
> +
> + MODE4_DEBUG("DETACHED -> WAITING\n");
> + }
> + /* Waiting state entered */
> + return 0;
> + }
> +
> + /* Transit next state if port is ready */
> + if (!timer_is_expired(&port->wait_while_timer))
> + return 0;
> +
> + if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
> + !PARTNER_STATE(port, SYNCHRONIZATION)) {
> + /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
> + * sync transit to ATACHED state. */
> + ACTOR_STATE_CLR(port, DISTRIBUTING);
> + ACTOR_STATE_CLR(port, COLLECTING);
> + /* Clear actor sync to activate transit ATACHED in condition bellow */
> + ACTOR_STATE_CLR(port, SYNCHRONIZATION);
> + MODE4_DEBUG("Out of sync -> ATTACHED\n");
> + } else if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
> + /* attach mux to aggregator */
> + RTE_VERIFY((port->actor_state & (STATE_COLLECTING |
> + STATE_DISTRIBUTING)) == 0);
> + ACTOR_STATE_SET(port, SYNCHRONIZATION);
> + ntt = true;
> + MODE4_DEBUG("ATTACHED Entered\n");
> + } else if (!ACTOR_STATE(port, COLLECTING)) {
> + /* Start collecting if in sync */
> + if (PARTNER_STATE(port, SYNCHRONIZATION)) {
> + MODE4_DEBUG("ATTACHED -> COLLECTING\n");
> + ACTOR_STATE_SET(port, COLLECTING);
> + }
> + } else if (ACTOR_STATE(port, COLLECTING)) {
> + /* Check if partner is in COLLECTING state. If so this port can
> + * distribute frames to it */
> + if (!ACTOR_STATE(port, DISTRIBUTING)) {
> + if (PARTNER_STATE(port, COLLECTING)) {
> + /* Enable DISTRIBUTING if partner is collecting */
> + ACTOR_STATE_SET(port, DISTRIBUTING);
> + ntt = true;
> + MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
> + }
> + } else {
> + if (!PARTNER_STATE(port, COLLECTING)) {
> + /* Disable DISTRIBUTING (enter COLLECTING state) if partner
> + * is not collecting */
> + ACTOR_STATE_CLR(port, DISTRIBUTING);
> + ntt = true;
> + MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
> + }
> + }
> + }
> +
> + if (ntt != false)
> + SM_FLAG_SET(port, NTT);
> +
> + return ntt;
> +}
> +
> +/**
> + * Function handles transmit state machine.
> + *
> + * Function implements Transmit Machine from point 5.4.16 in 802.1AX
> + * documentation.
> + *
> + * @param port
> + */
> +static void
> +tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num)
> +{
> + struct bond_dev_private *internals = bond_dev->data->dev_private;
> + struct port *port = &internals->mode4.port_list[port_num];
> + struct mode8023ad_data *data = &internals->mode4;
> +
> + struct slow_protocol_msg *msg = NULL;
> + struct lacpdu_header *hdr;
> + struct lacpdu *lacpdu;
> +
> + /* If periodic timer is not running periodic machine is in NO PERIODIC and
> + * acording to 802.3ax standard tx machine should not transmit any frames
> + * and set ntt to false. */
> + if (timer_is_stopped(&port->periodic_timer))
> + SM_FLAG_CLR(port, NTT);
> +
> + if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer))
> + return;
> +
> + /* If all conditions are met construct packet to send */
> + if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) {
> + MODE4_DEBUG("tx_machine: no free_lacpdu_ring\n");
> + return;
> + }
> +
> + msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool);
> + if (msg->pkt == NULL) {
> + rte_ring_enqueue(data->free_ring, msg);
> + RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
> + return;
> + }
> +
> + msg->port_id = internals->active_slaves[port_num];
> + hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *);
> +
> + msg->pkt->data_len = sizeof(*hdr);
> + msg->pkt->pkt_len = sizeof(*hdr);
> + /* Source and destination MAC */
> + ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
> + ether_addr_copy(&port->actor.system, &hdr->eth_hdr.s_addr);
> + hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
> +
> + lacpdu = &hdr->lacpdu;
> + memset(lacpdu, 0, sizeof(*lacpdu));
> +
> + /* Initialize LACP part */
> + lacpdu->subtype = SUBTYPE_LACP;
> + lacpdu->version_number = 1;
> +
> + /* ACTOR */
> + lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
> + lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
> + memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
> + sizeof(port->actor));
> + lacpdu->actor.state = port->actor_state;
> +
> + /* PARTNER */
> + lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
> + lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
> + memcpy(&lacpdu->partner.port_params, &port->partner,
> + sizeof(struct port_params));
> + lacpdu->partner.state = port->partner_state;
> +
> + /* Other fields */
> + lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
> + lacpdu->collector_info_length = 0x10;
> + lacpdu->collector_max_delay = 0;
> +
> + lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
> + lacpdu->terminator_length = 0;
> +
> + if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) {
> + /* If TX ring full, drop packet and free message. Retransmission
> + * will happen in next function call. */
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> +
> + RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring.\n"
> + "Receive and transmit functions must be invoked on bonded interface"
> + " at least 10 times per second or LACP will not work correctly\n");
> + return;
> + }
> +
> + MODE4_DEBUG("sending LACP frame\n");
> + BOND_PRINT_LACP(lacpdu);
> +
> + SM_FLAG_CLR(port, NTT);
> + /* Add 10% random backoff time to better distribute slow packets
> + * between tx bursts. */
> + timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS +
> + rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100));
> +}
> +
> +/**
> + * Function assigns port to aggregator.
> + *
> + * @param bond_dev_private Pointer to bond_dev_private structure.
> + * @param port_pos Port to assign.
> + */
> +static void
> +selection_logic(struct bond_dev_private *internals, uint8_t port_num)
> +{
> + struct mode8023ad_data *data = &internals->mode4;
> + struct port *agg, *port, *port_list;
> + uint8_t ports_count;
> + uint8_t i;
> +
> + ports_count = internals->slave_count;
> + port_list = data->port_list;
> + port = &port_list[port_num];
> +
> + /* Skip port if it is selected */
> + if (port->selected == SELECTED)
> + return;
> +
> + /* Search for aggregator suitable for this port */
> + for (i = 0; i < ports_count; ++i) {
> + agg = &port_list[i];
> + /* Skip ports that are not aggreagators */
> + if (agg->agregator_idx != i && i == port_num)
> + continue;
> +
> + /* Actors system ID is not checked since all slave device have the same
> + * ID (MAC address). */
> + if ((agg->actor.key == port->actor.key &&
> + agg->partner.system_priority == port->partner.system_priority &&
> + is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
> + && (agg->partner.key == port->partner.key)) &&
> + is_zero_ether_addr(&port->partner.system) != 1 &&
> + (agg->actor.key &
> + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
> +
> + port->agregator_idx = i;
> + break;
> + }
> + }
> +
> + /* By default, port uses it self as agregator */
> + if (i == ports_count)
> + port->agregator_idx = port_num;
> +
> + port->selected = SELECTED;
> +
> + MODE4_DEBUG("-> SELECTED: ID=%3u pos=%3u\n"
> + "\t%s ID=%3u pos=%3u\n",
> + internals->active_slaves[port_num], port_num,
> + port->agregator_idx == port_num ?
> + "agregator not found, using default" : "agregator found",
> + port->agregator_idx,
> + internals->active_slaves[port->agregator_idx]);
> +}
> +
> +/**
> + * Helper function which updates current port
> + */
> +static void
> +update_mux_slaves(struct bond_dev_private *internals)
> +{
> + struct mode8023ad_data *data = &internals->mode4;
> + struct port *port;
> + uint8_t current[RTE_MAX_ETHPORTS];
> + uint8_t count = 0;
> + uint8_t i;
> +
> + for (i = 0; i < internals->slave_count; i++) {
> + port = &data->port_list[i];
> + if (ACTOR_STATE(port, DISTRIBUTING))
> + current[count++] = i;
> + }
> +
> + memcpy(data->distibuting_slaves_offsets, current,
> + sizeof(current[0]) * count);
> + data->distibuting_slaves_count = count;
> +}
> +
> +/* Function maps DPDK speed to bonding speed stored in key field */
> +static uint16_t
> +link_speed_key(uint16_t speed) {
> + uint16_t key_speed;
> +
> + switch (speed) {
> + case ETH_LINK_SPEED_AUTONEG:
> + key_speed = 0x00;
> + break;
> + case ETH_LINK_SPEED_10:
> + key_speed = BOND_LINK_SPEED_KEY_10M;
> + break;
> + case ETH_LINK_SPEED_100:
> + key_speed = BOND_LINK_SPEED_KEY_100M;
> + break;
> + case ETH_LINK_SPEED_1000:
> + key_speed = BOND_LINK_SPEED_KEY_1000M;
> + break;
> + case ETH_LINK_SPEED_10G:
> + key_speed = BOND_LINK_SPEED_KEY_10G;
> + break;
> + case ETH_LINK_SPEED_20G:
> + key_speed = BOND_LINK_SPEED_KEY_20G;
> + break;
> + case ETH_LINK_SPEED_40G:
> + key_speed = BOND_LINK_SPEED_KEY_40G;
> + break;
> + default:
> + /* Unknown speed*/
> + key_speed = 0xFFFF;
> + }
> +
> + return key_speed;
> +}
> +
> +static void
> +bond_mode_8023ad_periodic_cb(void *arg)
> +{
> + struct rte_eth_dev *bond_dev = arg;
> + struct bond_dev_private *internals = bond_dev->data->dev_private;
> + struct mode8023ad_data *data = &internals->mode4;
> +
> + struct port *port;
> + struct slow_protocol_frame *slow_hdr;
> + struct rte_eth_link link_info;
> + struct ether_addr slave_addr;
> +
> + struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE];
> + uint16_t port_num, j, nb_msgs;
> + /* if not 0 collecting/distibuting array need update */
> + uint16_t slaves_changed = 0;
> + bool machines_invoked;
> +
> + /* Update link status on each port */
> + for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
> + uint16_t key;
> +
> + rte_eth_link_get(internals->active_slaves[port_num], &link_info);
> + rte_eth_macaddr_get(internals->active_slaves[port_num], &slave_addr);
> +
> + if (link_info.link_status != 0) {
> + key = link_speed_key(link_info.link_speed) << 1;
> + if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
> + key |= BOND_LINK_FULL_DUPLEX_KEY;
> + } else
> + key = 0;
> +
> + port = &data->port_list[port_num];
> + key = rte_cpu_to_be_16(key);
> +
> + if (key != port->actor.key) {
> + port->actor.key = key;
> + SM_FLAG_SET(port, NTT);
> + }
> +
> + if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
> + SM_FLAG_SET(port, NTT);
> + ether_addr_copy(&slave_addr, &port->actor.system);
> + }
> + }
> +
> + nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) msgs,
> + BOND_MODE_8023AX_RX_RING_SIZE);
> +
> + for (port_num = 0; port_num < internals->active_slave_count; port_num++) {
> + port = &data->port_list[port_num];
> + if ((port->actor.key &
> + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
> +
> + SM_FLAG_SET(port, BEGIN);
> +
> + /* LACP is disabled on half duples or link is down */
> + if (SM_FLAG(port, LACP_ENABLED)) {
> + /* If port was enabled set it to BEGIN state */
> + SM_FLAG_CLR(port, LACP_ENABLED);
> + ACTOR_STATE_CLR(port, DISTRIBUTING);
> + ACTOR_STATE_CLR(port, COLLECTING);
> + slaves_changed++;
> + }
> +
> + MODE4_DEBUG("Port %u is not LACP capable!\n",
> + internals->active_slaves[port_num]);
> + /* Skip this port processing */
> + continue;
> + }
> +
> + SM_FLAG_SET(port, LACP_ENABLED);
> + machines_invoked = false;
> + /* Find LACP packet */
> + for (j = 0; j < nb_msgs; j++) {
> + if (msgs[j] == NULL || msgs[j]->port_id !=
> + internals->active_slaves[port_num])
> + continue;
> +
> + slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt,
> + struct slow_protocol_frame *);
> +
> + if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP) {
> + /* This is LACP frame so pass it to rx_machine */
> + struct lacpdu *lacp = (struct lacpdu *)&slow_hdr->slow_protocol;
> + /* Invoke state machines on every active slave port */
> + rx_machine(internals, port_num, lacp);
> + periodic_machine(internals, port_num);
> + slaves_changed += mux_machine(internals, port_num);
> + tx_machine(bond_dev, port_num);
> + selection_logic(internals, port_num);
> +
> + machines_invoked = true;
> + } else if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_MARKER) {
> + struct marker *marker;
> +
> + marker = (struct marker *) &slow_hdr->slow_protocol;
> + if (marker->tlv_type_marker == MARKER_TLV_TYPE_MARKER_INFO) {
> + /* Reuse received packet to send frame to Marker Responder
> + */
> + marker->tlv_type_marker = MARKER_TLV_TYPE_MARKER_RESP;
> +
> + /* Update source MAC, destination MAC is multicast so we
> + * don't update it */
> + mac_address_get(bond_dev, &slow_hdr->eth_hdr.s_addr);
> +
> + if (rte_ring_enqueue(data->tx_ring, msgs[j]) == -ENOBUFS) {
> + RTE_LOG(ERR, PMD,
> + "Failed to enqueue packet into tx ring");
> + rte_pktmbuf_free(msgs[j]->pkt);
> + rte_ring_enqueue(data->free_ring, msgs[j]);
> + }
> +
> + msgs[j] = NULL;
> + }
> + }
> + }
> +
> + if (machines_invoked == false) {
> + rx_machine(internals, port_num, NULL);
> + periodic_machine(internals, port_num);
> + slaves_changed += mux_machine(internals, port_num);
> + tx_machine(bond_dev, port_num);
> + selection_logic(internals, port_num);
> + machines_invoked = true;
> + }
> +
> + SM_FLAG_CLR(port, BEGIN);
> + }
> +
> + /* Update mux if something changed */
> + if (slaves_changed > 0) {
> + update_mux_slaves(internals);
> + MODE4_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n",
> + data->distibuting_slaves_count,
> + data->distibuting_slaves_offsets[0],
> + data->distibuting_slaves_count > 0 ? " " : "\b\b",
> + data->distibuting_slaves_offsets[1],
> + data->distibuting_slaves_count > 1 ? " " : "\b\b",
> + data->distibuting_slaves_offsets[2],
> + data->distibuting_slaves_count > 2 ? " " : "\b\b",
> + data->distibuting_slaves_offsets[3],
> + data->distibuting_slaves_count > 3 ? " " : "\b\b",
> + data->distibuting_slaves_count > 4 ? "..." : "");
> + }
> +
> + /* Free packets that was not reused */
> + for (port_num = 0; port_num < nb_msgs; port_num++) {
> + if (msgs[port_num] != NULL) {
> + rte_pktmbuf_free(msgs[port_num]->pkt);
> + rte_ring_enqueue(data->free_ring, msgs[port_num]);
> + }
> + }
> +
> + rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
> + bond_mode_8023ad_periodic_cb, arg);
> +}
> +
> +static void
> +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_idx)
> +{
> + struct bond_dev_private *internals = bond_dev->data->dev_private;
> + struct mode8023ad_data *data = &internals->mode4;
> +
> + struct port *port = &data->port_list[internals->active_slave_count];
> + struct port_params initial = {
> + .system = { { 0 } },
> + .system_priority = rte_cpu_to_be_16(0xFFFF),
> + .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
> + .port_priority = rte_cpu_to_be_16(0x00FF),
> + .port_number = 0,
> + };
> +
> + uint8_t slave_id = internals->active_slaves[slave_idx];
> +
> + memcpy(&port->actor, &initial, sizeof(struct port_params));
> + port->actor.port_number = slave_id_to_port_number(slave_id);
> +
> + memcpy(&port->partner, &initial, sizeof(struct port_params));
> +
> + /* default states */
> + port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
> + port->partner_state = STATE_LACP_ACTIVE;
> + port->sm_flags = SM_FLAGS_BEGIN;
> +
> + /* use this port as agregator */
> + port->agregator_idx = slave_idx;
> +
> + rte_eth_promiscuous_enable(slave_id);
> +}
> +
> +void
> +bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev)
> +{
> + struct bond_dev_private *internals = bond_dev->data->dev_private;
> +
> + bond_mode_8023ad_activate_slave(bond_dev, internals->active_slave_count);
> +}
> +
> +int
> +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
> + uint8_t slave_pos)
> +{
> + struct bond_dev_private *internals = bond_dev->data->dev_private;
> + struct mode8023ad_data *data = &internals->mode4;
> + struct port *port;
> + uint8_t i;
> +
> + bond_mode_8023ad_stop(bond_dev);
> +
> + /* Exclude slave from transmit policy. If this slave is an aggregator
> + * make all aggregated slaves unselected to force sellection logic
> + * to select suitable aggregator for this port */
> + for (i = 0; i < internals->active_slave_count; i++) {
> + port = &data->port_list[slave_pos];
> + if (port->agregator_idx == slave_pos) {
> + port->selected = UNSELECTED;
> + port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
> + STATE_COLLECTING);
> +
> + /* Use default aggregator */
> + port->agregator_idx = i;
> + }
> + }
> +
> + port = &data->port_list[slave_pos];
> +
> + update_mux_slaves(internals);
> +
> + /* Remove slave port config */
> + if (slave_pos + 1 < internals->active_slave_count) {
> + memmove(&data->port_list[slave_pos],
> + &data->port_list[slave_pos + 1],
> + sizeof(data->port_list[0]) * (internals->active_slave_count -
> + slave_pos - 1));
> + }
> +
> + if (bond_dev->data->dev_started)
> + return bond_mode_8023ad_start(bond_dev);
> +
> + return 0;
> +}
> +
> +int
> +bond_mode_8023ad_init(struct rte_eth_dev *bond_dev)
> +{
> + struct bond_dev_private *internals = bond_dev->data->dev_private;
> + struct mode8023ad_data *data = &internals->mode4;
> + char mem_name[RTE_ETH_NAME_MAX_LEN];
> + int socket_id = bond_dev->pci_dev->numa_node;
> + uint8_t i;
> +
> + if (data->mbuf_pool == NULL) {
> + const uint16_t element_size = sizeof(struct slow_protocol_frame) +
> + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
> +
> + snprintf(mem_name, sizeof(mem_name), "%s_POOL", bond_dev->data->name);
> + data->mbuf_pool = rte_mempool_create(mem_name,
> + /* FIXME: How big memory pool should be? If driver will not
> + * free packets quick enough there will be ENOMEM in tx_machine.
> + * For now give 512 packets per slave. Hope it will be enough. */
> + (BOND_MODE_8023AX_TX_RING_SIZE + 1) * 512 * RTE_MAX_ETHPORTS,
> + element_size,
> + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
> + sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init,
> + NULL, rte_pktmbuf_init, NULL, socket_id, 0);
> +
> + /* Any memory allocation failure in initalization is critical because
> + * resources can't be free, so reinitialization is impossible. */
> + if (data->mbuf_pool == NULL) {
> + RTE_LOG(ERR, PMD, "%s: Failed to initialize LACP rx ring\n",
> + bond_dev->data->name);
> +
> + rte_panic("Failed to alocate memory pool ('%s')\n"
> + "for bond device '%s'\n", mem_name, bond_dev->data->name);
> + }
> +
> + /* Setup ring for free messages that can be used in RX/TX burst */
> + snprintf(mem_name, sizeof(mem_name), "%s_free", bond_dev->data->name);
> +
> + uint16_t free_cnt = BOND_MODE_8023AX_RX_RING_SIZE +
> + BOND_MODE_8023AX_TX_RING_SIZE;
> +
> + data->free_ring = rte_ring_create(mem_name, free_cnt, socket_id, 0);
> +
> + if (data->free_ring == NULL) {
> + rte_panic("%s: Failed to create slow messages free ring\n",
> + bond_dev->data->name);
> + }
> +
> + for (i = 0; i < free_cnt; i++) {
> + struct slow_protocol_msg *msg;
> +
> + snprintf(mem_name, sizeof(mem_name), "%s_slow_msg_%u",
> + bond_dev->data->name, i);
> +
> + msg = (struct slow_protocol_msg *) rte_malloc_socket(mem_name,
> + sizeof(struct slow_protocol_msg), 0, socket_id);
> +
> + if (msg == NULL) {
> + rte_panic("%s: Failed to allocate slow message\n",
> + bond_dev->data->name);
> + }
> +
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> +
> + /* Setup rings for usage in rx/tx bursts and machines state
> + * call back */
> + snprintf(mem_name, sizeof(mem_name), "%s_rx", bond_dev->data->name);
> + data->rx_ring = rte_ring_create(mem_name,
> + BOND_MODE_8023AX_RX_RING_SIZE, socket_id, 0);
> +
> + if (data->rx_ring == NULL) {
> + rte_panic("%s: Failed to create slow messages rx ring\n",
> + bond_dev->data->name);
> + }
> +
> + snprintf(mem_name, sizeof(mem_name), "%s_tx", bond_dev->data->name);
> + data->tx_ring = rte_ring_create(mem_name, BOND_MODE_8023AX_TX_RING_SIZE,
> + socket_id, RING_F_SP_ENQ);
> +
> + if (data->tx_ring == NULL) {
> + rte_panic("%s: Failed to create slow messages tx ring\n",
> + bond_dev->data->name);
> + }
> + }
> +
> + data->distibuting_slaves_count = 0;
> +
> + for (i = 0; i < internals->active_slave_count; i++)
> + bond_mode_8023ad_activate_slave(bond_dev, i);
> +
> + return 0;
> +}
> +
> +int
> +bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
> +{
> + return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000,
> + &bond_mode_8023ad_periodic_cb, bond_dev);
> +}
> +
> +int
> +bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
> +{
> + if (rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev))
> + return 0;
> +
> + return -ENOENT;
> +}
> +
> +void
> +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
> + uint8_t slave_pos, struct rte_mbuf *slot_pkt)
> +{
> + struct mode8023ad_data *data;
> + struct slow_protocol_msg *msg = NULL;
> + int retval;
> +
> + data = &internals->mode4;
> +
> + if (unlikely(rte_ring_dequeue(data->free_ring, (void **)&msg) ==
> + -ENOBUFS)) {
> + rte_pktmbuf_free(slot_pkt);
> + return;
> + }
> +
> + msg->pkt = slot_pkt;
> + msg->port_id = internals->active_slaves[slave_pos];
> +
> + retval = rte_ring_enqueue(data->rx_ring, msg);
> + if (unlikely(retval == -ENOBUFS)) {
> + /* If RX fing full free lacpdu message and drop packet */
> + rte_ring_enqueue(data->free_ring, msg);
> + rte_pktmbuf_free(slot_pkt);
> + }
> +}
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> new file mode 100644
> index 0000000..df250bb
> --- /dev/null
> +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h
> @@ -0,0 +1,405 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef RTE_ETH_BOND_8023AD_H_
> +#define RTE_ETH_BOND_8023AD_H_
> +
> +#include <stdint.h>
> +
> +#include <rte_ether.h>
> +#include <rte_byteorder.h>
> +#include <rte_spinlock.h>
> +
> +typedef int bool;
> +
> +#define true 1
> +#define false 0
> +
> +/**
> + * Timeouts deffinitions (5.4.4 in 802.1AX documentation).
> + */
> +#define BOND_8023AD_FAST_PERIODIC_MS 1000
> +#define BOND_8023AD_SLOW_PERIODIC_MS 30000
> +#define BOND_8023AD_SHORT_TIMEOUT_MS 3000
> +#define BOND_8023AD_LONG_TIMEOUT_MS 90000
> +#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000
> +#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000
> +#define BOND_8023AD_TX_PERIOD_MS 333
> +/**
> + * Actor/partner states
> + */
> +#define STATE_LACP_ACTIVE 0x01
> +#define STATE_LACP_SHORT_TIMEOUT 0x02
> +#define STATE_AGGREGATION 0x04
> +#define STATE_SYNCHRONIZATION 0x08
> +#define STATE_COLLECTING 0x10
> +#define STATE_DISTRIBUTING 0x20
> +/** Partners parameters are defaulted */
> +#define STATE_DEFAULTED 0x40
> +#define STATE_EXPIRED 0x80
> +
> +/**
> + * State machine flags
> + */
> +#define SM_FLAGS_BEGIN 0x0001
> +#define SM_FLAGS_LACP_ENABLED 0x0002
> +#define SM_FLAGS_ACTOR_CHURN 0x0004
> +#define SM_FLAGS_PARTNER_CHURN 0x0008
> +#define SM_FLAGS_MOVED 0x0100
> +#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200
> +#define SM_FLAGS_NTT 0x0400
> +
> +#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100
> +#define BOND_MODE_8023AX_RX_RING_SIZE (2 * RTE_MAX_ETHPORTS)
> +#define BOND_MODE_8023AX_TX_RING_SIZE (2 * RTE_MAX_ETHPORTS)
> +
> +#define BOND_LINK_FULL_DUPLEX_KEY 0x01
> +#define BOND_LINK_SPEED_KEY_10M 0x02
> +#define BOND_LINK_SPEED_KEY_100M 0x04
> +#define BOND_LINK_SPEED_KEY_1000M 0x08
> +#define BOND_LINK_SPEED_KEY_10G 0x10
> +#define BOND_LINK_SPEED_KEY_20G 0x11
> +#define BOND_LINK_SPEED_KEY_40G 0x12
> +
> +#define SUBTYPE_LACP 0x01
> +
> +#define TLV_TYPE_ACTOR_INFORMATION 0x01
> +#define TLV_TYPE_PARTNER_INFORMATION 0x02
> +#define TLV_TYPE_COLLECTOR_INFORMATION 0x03
> +#define TLV_TYPE_TERMINATOR_INFORMATION 0x00
> +
> +#define CHECK_FLAGS(_variable, _flags) ((_variable) & (_flags))
> +#define SET_FLAGS(_variable, _flags) ((_variable) |= (_flags))
> +#define CLEAR_FLAGS(_variable, _flags) ((_variable) &= ~(_flags))
> +
> +#define SM_FLAG(port, flag) (!!CHECK_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag))
> +#define SM_FLAG_SET(port, flag) SET_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)
> +#define SM_FLAG_CLR(port, flag) CLEAR_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)
> +
> +#define ACTOR_STATE(port, flag) (!!CHECK_FLAGS((port)->actor_state, STATE_ ## flag))
> +#define ACTOR_STATE_SET(port, flag) SET_FLAGS((port)->actor_state, STATE_ ## flag)
> +#define ACTOR_STATE_CLR(port, flag) CLEAR_FLAGS((port)->actor_state, STATE_ ## flag)
> +
> +#define PARTNER_STATE(port, flag) (!!CHECK_FLAGS((port)->partner_state, STATE_ ## flag))
> +#define PARTNER_STATE_SET(port, flag) SET_FLAGS((port)->partner_state, STATE_ ## flag)
> +#define PARTNER_STATE_CLR(port, flag) CLEAR_FLAGS((port)->partner_state, STATE_ ## flag)
> +
> +/** Slow protocol LACP frame subtype */
> +#define SLOW_SUBTYPE_LACP 0x01
> +
> +/** Slow procotol marker frame subtype */
> +#define SLOW_SUBTYPE_MARKER 0x02
> +
> +/** Marker type info request */
> +#define MARKER_TLV_TYPE_MARKER_INFO 0x01
> +
> +/** Marker type info response */
> +#define MARKER_TLV_TYPE_MARKER_RESP 0x02
> +
> +/** Generic slow protocol structure */
> +struct slow_protocol {
> + uint8_t subtype;
> + uint8_t reserved_119[119];
> +} __attribute__((__packed__));
> +
> +/** Generic slow protocol frame type structure */
> +struct slow_protocol_frame {
> + struct ether_hdr eth_hdr;
> + struct slow_protocol slow_protocol;
> +} __attribute__((__packed__));
> +
> +struct port_params {
> + uint16_t system_priority;
> + /**< System priority (unused in current implementation) */
> + struct ether_addr system;
> + /**< System ID - Slave MAC address, same as bonding MAC address */
> + uint16_t key;
> + /**< Speed information (implementation dependednt) and duplex. */
> + uint16_t port_priority;
> + /**< Priority of this (unused in current implementation) */
> + uint16_t port_number;
> + /**< Port number. It corresponds to slave port id. */
> +} __attribute__((__packed__));
> +
> +struct lacpdu_actor_partner_params {
> + uint8_t tlv_type_info;
> + uint8_t info_length;
> + struct port_params port_params;
> + uint8_t state;
> + uint8_t reserved_3[3];
> +} __attribute__((__packed__));
> +
> +/** LACPDU structure (5.4.2 in 802.1AX documentation). */
> +struct lacpdu {
> + uint8_t subtype;
> + uint8_t version_number;
> +
> + struct lacpdu_actor_partner_params actor;
> + struct lacpdu_actor_partner_params partner;
> +
> + uint8_t tlv_type_collector_info;
> + uint8_t collector_info_length;
> + uint16_t collector_max_delay;
> + uint8_t reserved_12[12];
> +
> + uint8_t tlv_type_terminator;
> + uint8_t terminator_length;
> + uint8_t reserved_50[50];
> +} __attribute__((__packed__));
> +
> +/** LACPDU frame: Contains ethernet header and LACPDU. */
> +struct lacpdu_header {
> + struct ether_hdr eth_hdr;
> + struct lacpdu lacpdu;
> +} __attribute__((__packed__));
> +
> +struct marker {
> + uint8_t subtype;
> + uint8_t version_number;
> +
> + uint8_t tlv_type_marker;
> + uint8_t info_length;
> + uint16_t requester_port;
> + struct ether_addr requester_system;
> + uint32_t requester_transaction_id;
> + uint8_t reserved_2[2];
> +
> + uint8_t tlv_type_terminator;
> + uint8_t terminator_length;
> + uint8_t reserved_90[90];
> +} __attribute__((__packed__));
> +
> +struct marker_header {
> + struct ether_hdr eth_hdr;
> + struct marker marker;
> +} __attribute__((__packed__));
> +
> +/** Variables associated with the system (5.4.5 in 802.1AX documentation). */
> +struct system {
> + struct ether_addr actor_system;
> + /**< The MAC address component of the System Identifier of the System */
> + uint16_t actor_system_priority;
> + /**< The System Priority of the System */
> +};
> +
> +enum selection {
> + UNSELECTED,
> + STANDBY,
> + SELECTED
> +};
> +
> +/** Variables associated with each port (5.4.7 in 802.1AX documentation). */
> +struct port {
> + /**
> + * The operational values of the Actor's state parameters. Bitmask
> + * of port states.
> + */
> + uint8_t actor_state;
> +
> + /** The operational Actor's port parameters */
> + struct port_params actor;
> +
> + /**
> + * The operational value of the Actor's view of the current values of
> + * the Partner's state parameters. The Actor sets this variable either
> + * to the value received from the Partner in an LACPDU, or to the value
> + * of Partner_Admin_Port_State. Bitmask of port states.
> + */
> + uint8_t partner_state;
> +
> + /** The operational Partner's port parameters */
> + struct port_params partner;
> +
> + /* Additional port parameters not listed in documentation */
> + /** State machine flags */
> + uint16_t sm_flags;
> + enum selection selected;
> +
> + uint64_t current_while_timer;
> + uint64_t periodic_timer;
> + uint64_t wait_while_timer;
> + uint64_t tx_machine_timer;
> + /* Agregator parameters */
> + /**
> + * Index in mode8023ad_data::port_list[] of Aggregator
> + * the port is currently attached to.
> + */
> + uint16_t agregator_idx;
> +};
> +
> +
> +/**
> + * Struct used to comunicate with 8023ad logic.
> + */
> +struct slow_protocol_msg {
> + struct rte_mbuf *pkt;
> + uint8_t port_id;
> +};
> +
> +/** Data specific to mode 802.1AX */
> +struct mode8023ad_data {
> + /** Memory pool used to allocated rings */
> + struct rte_mempool *mbuf_pool;
> +
> + /** Ring containing free slow_protocol_msg objects. Used to avoid
> + * alocating/freeing memory in RX/TX bursts */
> + struct rte_ring *free_ring;
> +
> + /** Ring of struct slow_protocol_msg from RX burst function */
> + struct rte_ring *rx_ring;
> +
> + /** Ring of struct slow_protocol_msg to RX burst function */
> + struct rte_ring *tx_ring;
> +
> + /** list of all enslaved ports in mode 802.1AX */
> + struct port port_list[RTE_MAX_ETHPORTS];
> +
> + /** List of offsets in active slaves array used to tansmit packets. */
> + uint8_t distibuting_slaves_offsets[RTE_MAX_ETHPORTS];
> + uint8_t distibuting_slaves_count;
> +};
> +
> +/* Forward declaration */
> +struct bond_dev_private;
> +
> +/**
> + * Configures 802.1AX mode and all active slaves on bonded interface.
> + *
> + * @param dev Bonded interface
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> +int
> +bond_mode_8023ad_init(struct rte_eth_dev *dev);
> +
> +/**
> + * Deconfigures 802.1AX mode of the bonded interface and slaves.
> + *
> + * @param dev Bonded interface
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> +int bond_mode_8023ad_disable(struct rte_eth_dev *dev);
> +
> +/**
> + * Starts 802.3AX state machines management logic.
> + * @param dev Bonded interface
> + * @return
> + * 0 if machines was started, 1 if machines was already running,
> + * negative value otherwise.
> + */
> +int
> +bond_mode_8023ad_start(struct rte_eth_dev *dev);
> +
> +/**
> + * Stops 802.3AX state machines management logic.
> + * @param dev Bonded interface
> + * @return
> + * 0 if this call stopped state machines, -ENOENT if alarm was not set.
> + */
> +int
> +bond_mode_8023ad_stop(struct rte_eth_dev *dev);
> +
> +/**
> + * Passes given slow packet to state machines management logic.
> + * @param internals Bonded device private data.
> + * @param slave_pos Possition in active slaves array on which this packet was received.
> + * @param slot_pkt Slow packet
> + */
> +void
> +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
> + uint8_t slave_pos, struct rte_mbuf *slot_pkt);
> +
> +/**
> + * Appends and initializes slave active_slaves[slave_num] to use with
> + * 802.1AX mode.
> + *
> + * @pre active_slaves[active_slave_count] must contain valid slave id.
> + * @post active_slave_count must be incremented.
> + *
> + * @param dev Bonded interface.
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + */
> +void
> +bond_mode_8023ad_slave_append(struct rte_eth_dev *dev);
> +
> +/**
> + * Denitializes and removes given slave from 802.1AX mode.
> + *
> + * @pre active_slaves[slave_num] must contain valid slave id corresponding to
> + * slave initialized in 802.1AX mode.
> + * @post active_slaves[slave_num] must be removed.
> + *
> + * @param dev Bonded interface.
> + * @param slave_num Position of slave in active_slaves array
> + *
> + * @return
> + * 0 on success, negative value otherwise.
> + *
> + */
> +int
> +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
> +
> +/**
> + * Converts port_number from network byte order to port id.
> + *
> + * @param port_number The 8023ad port number to convert.
> + * @return corresponding slave id
> + */
> +static inline uint8_t
> +port_number_to_slave_id(uint16_t port_number)
> +{
> + uint16_t port_id = rte_be_to_cpu_16(port_number);
> + /* Standard requires that port number must be grater than 0.
> + * Substract 1 to get corresponding slave id */
> + return port_id - 1;
> +}
> +
> +/**
> + * Converts port id to mode 8023ad port number.
> + *
> + * @param slave_id Id of slave to convert.
> + * @return corresponding Port number in network byte order.
> + */
> +static inline uint16_t
> +slave_id_to_port_number(uint8_t slave_id)
> +{
> + /* Standard requires that port ID must be grater than 0.
> + * Add 1 do get corresponding port_number */
> + uint16_t port_number = (uint16_t)slave_id + 1;
> + return rte_cpu_to_be_16(port_number);
> +}
> +
> +#endif /* RTE_ETH_BOND_8023AD_H_ */
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c
> index c690ceb..c547164 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_api.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
> @@ -31,6 +31,8 @@
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> */
>
> +#include <string.h>
> +
> #include <rte_mbuf.h>
> #include <rte_malloc.h>
> #include <rte_ethdev.h>
> @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id)
> return 0;
> }
>
> +void
> +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
> +{
> + struct bond_dev_private *internals = eth_dev->data->dev_private;
> + uint8_t active_count = internals->active_slave_count;
> +
> + internals->active_slaves[active_count] = port_id;
> +
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_slave_append(eth_dev);
> +
> + internals->active_slave_count = active_count + 1;
> +}
> +
> +void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t slave_pos)
> +{
> + struct bond_dev_private *internals = eth_dev->data->dev_private;
> + uint8_t active_count = internals->active_slave_count;
> +
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
> +
> + active_count--;
> +
> + /* If slave was not at the end of the list
> + * shift active slaves up active array list */
> + if (slave_pos < active_count) {
> + memmove(internals->active_slaves + slave_pos,
> + internals->active_slaves + slave_pos + 1,
> + (active_count - slave_pos) *
> + sizeof(internals->active_slaves[0]));
> + }
> +
> + internals->active_slave_count = active_count;
> +}
> +
> uint8_t
> number_of_sockets(void)
> {
> @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
> eth_dev->dev_ops = &default_dev_ops;
> eth_dev->pci_dev = pci_dev;
>
> - if (bond_ethdev_mode_set(eth_dev, mode)) {
> - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
> - eth_dev->data->port_id, mode);
> - goto err;
> - }
> -
> + internals->port_id = eth_dev->data->port_id;
> + internals->mode = BONDING_MODE_INVALID;
> internals->current_primary_port = 0;
> internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
> internals->user_defined_mac = 0;
> @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
> memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
> memset(internals->slaves, 0, sizeof(internals->slaves));
>
> + if (bond_ethdev_mode_set(eth_dev, mode)) {
> + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
> + eth_dev->data->port_id, mode);
> + goto err;
> + }
> +
> return eth_dev->data->port_id;
>
> err:
> @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
> rte_eth_link_get_nowait(slave_port_id, &link_props);
>
> if (link_props.link_status == 1)
> - internals->active_slaves[internals->active_slave_count++] =
> - slave_port_id;
> + activate_slave(bonded_eth_dev, slave_port_id);
> }
> return 0;
>
> }
>
> -
> int
> rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
> {
> @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
> return retval;
> }
>
> -
> static int
> __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
> {
> + struct rte_eth_dev *bonded_eth_dev;
> struct bond_dev_private *internals;
>
> - int i, slave_idx = -1;
> + int i, slave_idx;
>
> if (valid_slave_port_id(slave_port_id) != 0)
> return -1;
>
> - internals = rte_eth_devices[bonded_port_id].data->dev_private;
> + bonded_eth_dev = &rte_eth_devices[bonded_port_id];
> + internals = bonded_eth_dev->data->dev_private;
>
> /* first remove from active slave list */
> - for (i = 0; i < internals->active_slave_count; i++) {
> - if (internals->active_slaves[i] == slave_port_id)
> - slave_idx = i;
> + slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count,
> + slave_port_id);
>
> - /* shift active slaves up active array list */
> - if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
> - internals->active_slaves[i] = internals->active_slaves[i+1];
> - }
> -
> - if (slave_idx >= 0)
> - internals->active_slave_count--;
> + if (slave_idx < internals->active_slave_count)
> + deactivate_slave(bonded_eth_dev, slave_idx);
>
> slave_idx = -1;
> /* now find in slave list */
> @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
>
> return internals->current_primary_port;
> }
> +
> int
> rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
> {
> @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
> return internals->balance_xmit_policy;
> }
>
> -
> int
> rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
> {
> @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
> return internals->link_down_delay_ms;
> }
>
> -
> int
> rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
>
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
> index bbbc69b..a0be0e6 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_args.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
> @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
> case BONDING_MODE_ACTIVE_BACKUP:
> case BONDING_MODE_BALANCE:
> case BONDING_MODE_BROADCAST:
> + case BONDING_MODE_8023AD:
> return 0;
> default:
> RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> index 6d0fb1b..13630d9 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> @@ -44,6 +44,7 @@
>
> #include "rte_eth_bond.h"
> #include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
>
> static uint16_t
> bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue,
> bufs, nb_pkts);
> }
>
> +static uint16_t
> +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> + uint16_t nb_pkts)
> +{
> + /* Cast to structure, containing bonded device's port id and queue id */
> + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
> + struct bond_dev_private *internals = bd_rx_q->dev_private;
> + struct mode8023ad_data *mode4 = &internals->mode4;
> + struct ether_addr bond_mac;
> +
> + struct ether_hdr *hdr;
> + struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */
> +
> + uint16_t num_rx_slave = 0; /* Number of packet received on current slave */
> + uint16_t num_rx_total = 0; /* Total number of received packets */
> +
> + uint8_t i, j;
> +
> + rte_eth_macaddr_get(internals->port_id, &bond_mac);
> +
> + for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) {
> + /* Read packets from this slave */
> + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
> + bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total);
> +
> + /* Separate slow protocol packets from other packets */
> + for (j = 0; j < num_rx_slave; j++) {
> + hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
> +
> + uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
> + if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
> + bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
> + continue;
> + }
> +
> + /* Check if we can receive this packet. Also filter packets if
> + * bonding interface is not in promiscuous mode (slaves are always
> + * in promiscuous mode). */
> + if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) &&
> + likely(internals->promiscuous_en ||
> + is_same_ether_addr(&bond_mac, &hdr->d_addr))) {
> + bufs[num_rx_total++] = pkts[j];
> + } else
> + rte_pktmbuf_free(pkts[j]);
> + }
> + }
> +
> + return num_rx_total;
> +}
> +
> static inline uint16_t
> ether_hash(struct ether_hdr *eth_hdr)
> {
> @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
> }
>
> static uint16_t
> +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> + uint16_t nb_pkts)
> +{
> + struct bond_dev_private *internals;
> + struct mode8023ad_data *mode4;
> + struct bond_tx_queue *bd_tx_q;
> +
> + uint8_t num_of_slaves;
> + uint8_t slaves[RTE_MAX_ETHPORTS];
> + /* possitions in slaves, not ID */
> + uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
> + uint8_t distributing_slaves_count;
> +
> + uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
> + uint16_t i, op_slave_idx;
> +
> + /* Slow packets from 802.3AX state machines. */
> + struct slow_protocol_msg *slow_msg;
> +
> + /* Allocate one additional packet in case 8023AD mode.
> + * First element if not NULL is slow packet. */
> + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
> + /* Total amount of packets in slave_bufs */
> + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
> + /* Array of slow packets placed in each slave */
> + uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
> +
> + bd_tx_q = (struct bond_tx_queue *)queue;
> + internals = bd_tx_q->dev_private;
> + mode4 = &internals->mode4;
> +
> + /* Copy slave list to protect against slave up/down changes during tx
> + * bursting */
> + num_of_slaves = internals->active_slave_count;
> + if (num_of_slaves < 1)
> + return num_tx_total;
> +
> + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
> +
> + distributing_slaves_count = mode4->distibuting_slaves_count;
> + memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
> + sizeof(slaves[0]) * distributing_slaves_count);
> +
> + for (i = 0; i < num_of_slaves; i++)
> + slave_bufs[i][0] = NULL;
> +
> + /* It is likely that tx ring will be empty. If it is not empty, it is
> + * likely that there will be only one frame. */
> + while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
> + rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) {
> + i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id);
> +
> + /* Assign slow packet to slave or drop it if slave is not in active list
> + * (ex: link down). */
> + if (likely(i < num_of_slaves)) {
> + /* If there is more than one slow packet to the same slave, send
> + * only latest, and drop previouse - tx burst was no called quick
> + * enough. */
> + if (slave_bufs[i][0] != NULL)
> + rte_pktmbuf_free(slave_bufs[i][0]);
> +
> + slave_bufs[i][0] = slow_msg->pkt;
> + slave_nb_pkts[i] = 1;
> + slave_slow_packets[i] = 1;
> + } else
> + rte_pktmbuf_free(slow_msg->pkt);
> +
> + rte_ring_enqueue(mode4->free_ring, slow_msg);
> + }
> +
> + if (likely(distributing_slaves_count > 0)) {
> + /* Populate slaves mbuf with the packets which are to be sent on it */
> + for (i = 0; i < nb_pkts; i++) {
> + /* Select output slave using hash based on xmit policy */
> + op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count,
> + internals->balance_xmit_policy);
> +
> + /* Populate slave mbuf arrays with mbufs for that slave. Use only
> + * slaves that are currently distributing. */
> + uint8_t slave_offset = distributing_offsets[op_slave_idx];
> + uint16_t pkt_pos = slave_nb_pkts[slave_offset];
> + slave_nb_pkts[slave_offset]++;
> +
> + slave_bufs[slave_offset][pkt_pos] = bufs[i];
> + }
> + }
> +
> + /* Send packet burst on each slave device */
> + for (i = 0; i < num_of_slaves; i++) {
> + if (slave_nb_pkts[i] > 0) {
> + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
> + slave_bufs[i], slave_nb_pkts[i]);
> +
> + /* if tx burst fails move packets to end of bufs */
> + if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
> + uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
> +
> + /* Free slow packet if it exists and not send. */
> + if (slave_slow_packets[i] != 0 && num_tx_slave == 0) {
> + rte_pktmbuf_free(slave_bufs[i][0]);
> + slave_tx_fail_count--;
> + }
> +
> + tx_fail_total += slave_tx_fail_count;
> + memcpy(bufs[nb_pkts - tx_fail_total],
> + slave_bufs[i][num_tx_slave],
> + slave_tx_fail_count);
> + }
> +
> + if (num_tx_slave > 0)
> + num_tx_slave -= slave_slow_packets[i];
> +
> + num_tx_total += num_tx_slave;
> + }
> + }
> +
> + return num_tx_total;
> +}
> +
> +static uint16_t
> bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
> {
> @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link,
> }
>
> int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
> +{
> + struct ether_addr *mac_addr;
> +
> + mac_addr = eth_dev->data->mac_addrs;
> +
> + if (eth_dev == NULL) {
> + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
> + return -1;
> + }
> +
> + if (dst_mac_addr == NULL) {
> + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
> + return -1;
> + }
> +
> + ether_addr_copy(mac_addr, dst_mac_addr);
> + return 0;
> +}
> +
> +int
> mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
> {
> struct ether_addr *mac_addr;
> @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
> mac_addr = eth_dev->data->mac_addrs;
>
> if (eth_dev == NULL) {
> - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
> + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
> return -1;
> }
>
> @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
> }
> }
> break;
> + case BONDING_MODE_8023AD:
> + break;
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> for (i = 0; i < internals->slave_count; i++) {
> @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
> eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
> eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
> break;
> + case BONDING_MODE_8023AD:
> + if (bond_mode_8023ad_init(eth_dev) != 0)
> + return -1;
> +
> + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
> + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
> + break;
> default:
> return -1;
> }
> @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
> if (internals->user_defined_primary_port)
> bond_ethdev_primary_set(internals, internals->primary_port);
>
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_start(eth_dev);
>
> if (internals->link_status_polling_enabled)
> rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
> @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
> {
> struct bond_dev_private *internals = eth_dev->data->dev_private;
>
> + if (internals->mode == BONDING_MODE_8023AD) {
> + struct mode8023ad_data *data = &internals->mode4;
> + struct slow_protocol_msg *msg;
> +
> + bond_mode_8023ad_stop(eth_dev);
> + data->distibuting_slaves_count = 0;
> +
> + /* Discard all messages to/from mode 4 state machines */
> + while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) {
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> +
> + while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) {
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> + }
> +
> internals->active_slave_count = 0;
> internals->link_status_polling_enabled = 0;
>
> @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> 0, dev->pci_dev->numa_node);
>
> if (bd_tx_q == NULL)
> - return -1;
> + return -1;
>
> bd_tx_q->queue_id = tx_queue_id;
> bd_tx_q->dev_private = dev->data->dev_private;
> @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue)
> rte_free(queue);
> }
>
> -
> static void
> bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
> {
> @@ -884,7 +1105,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
>
> /* If device is currently being configured then don't check slaves link
> * status, wait until next period */
> - if (rte_spinlock_trylock(&internals->lock)){
> + if (rte_spinlock_trylock(&internals->lock)) {
> for (i = 0; i < internals->slave_count; i++) {
> if (internals->slaves[i].link_status_polling_enabled) {
> slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
> @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
> for (i = 0; i < internals->slave_count; i++)
> rte_eth_promiscuous_enable(internals->slaves[i].port_id);
> break;
> + /* In mode4 promiscus mode is managed when slave is added/removed */
> + case BONDING_MODE_8023AD:
> + break;
> /* Promiscuous mode is propagated only to primary slave */
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> rte_eth_promiscuous_enable(internals->current_primary_port);
> -
> }
> }
>
> @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
> int i;
>
> internals->promiscuous_en = 0;
> -
> +
> switch (internals->mode) {
> /* Promiscuous mode is propagated to all slaves */
> case BONDING_MODE_ROUND_ROBIN:
> @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
> for (i = 0; i < internals->slave_count; i++)
> rte_eth_promiscuous_disable(internals->slaves[i].port_id);
> break;
> + /* In mode4 promiscus mode is set managed when slave is added/removed */
> + case BONDING_MODE_8023AD:
> + break;
> /* Promiscuous mode is propagated only to primary slave */
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
> struct bond_dev_private *internals;
> struct rte_eth_link link;
>
> - int i, valid_slave = 0, active_pos = -1;
> + int i, valid_slave = 0;
> + uint8_t active_pos;
> uint8_t lsc_flag = 0;
>
> if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
> @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
> return;
>
> /* Search for port in active port list */
> - for (i = 0; i < internals->active_slave_count; i++) {
> - if (port_id == internals->active_slaves[i]) {
> - active_pos = i;
> - break;
> - }
> - }
> + active_pos = find_slave_by_id(internals->active_slaves,
> + internals->active_slave_count, port_id);
>
> rte_eth_link_get_nowait(port_id, &link);
> if (link.link_status) {
> - if (active_pos >= 0)
> + if (active_pos < internals->active_slave_count)
> return;
>
> /* if no active slave ports then set this port to be primary port */
> @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
> link_properties_set(bonded_eth_dev,
> &(slave_eth_dev->data->dev_link));
> }
> - internals->active_slaves[internals->active_slave_count++] = port_id;
> +
> + activate_slave(bonded_eth_dev, port_id);
>
> /* If user has defined the primary port then default to using it */
> if (internals->user_defined_primary_port &&
> internals->primary_port == port_id)
> bond_ethdev_primary_set(internals, port_id);
> } else {
> - if (active_pos < 0)
> + if (active_pos == internals->active_slave_count)
> return;
>
> /* Remove from active slave list */
> - for (i = active_pos; i < (internals->active_slave_count - 1); i++)
> - internals->active_slaves[i] = internals->active_slaves[i+1];
> -
> - internals->active_slave_count--;
> + deactivate_slave(bonded_eth_dev, active_pos);
>
> /* No active slaves, change link status to down and reset other
> * link properties */
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
> index 6db5144..77f7bb0 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_private.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
> @@ -42,6 +42,7 @@ extern "C" {
> #include <rte_spinlock.h>
>
> #include "rte_eth_bond.h"
> +#include "rte_eth_bond_8023ad.h"
>
> #define PMD_BOND_SLAVE_PORT_KVARG ("slave")
> #define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
> @@ -60,6 +61,8 @@ extern "C" {
> #define RTE_BOND_LOG(lvl, msg, ...) \
> RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__);
>
> +#define BONDING_MODE_INVALID 0xFF
> +
> extern const char *pmd_bond_init_valid_arguments[];
>
> extern const char *driver_name;
> @@ -89,7 +92,13 @@ struct bond_tx_queue {
> /**< Copy of TX configuration structure for queue */
> };
>
> -
> +/** Persisted Slave Configuration Structure */
> +struct slave_conf {
> + uint8_t port_id;
> + /**< Port Id of slave eth_dev */
> + struct ether_addr mac_addr;
> + /**< Slave eth_dev original MAC address */
> +};
> /** Bonded slave devices structure */
> struct bond_ethdev_slave_ports {
> uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
> @@ -124,7 +133,7 @@ struct bond_dev_private {
> uint8_t user_defined_mac;
> /**< Flag for whether MAC address is user defined or not */
> uint8_t promiscuous_en;
> - /**< Enabled/disable promiscuous mode on slave devices */
> + /**< Enabled/disable promiscuous mode on bonding device */
> uint8_t link_props_set;
> /**< flag to denote if the link properties are set */
>
> @@ -143,6 +152,9 @@ struct bond_dev_private {
> uint8_t slave_count; /**< Number of bonded slaves */
> struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
> /**< Arary of bonded slaves details */
> +
> + struct mode8023ad_data mode4;
> + /**< Mode 4 private data */
> };
>
> extern struct eth_dev_ops default_dev_ops;
> @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops;
> int
> valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
>
> +/* Search given slave array to find possition of given id.
> + * Return slave pos or slaves_count if not found. */
> +static inline uint8_t
> +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
> + uint8_t slave_id ) {
> +
> + uint8_t pos;
> + for (pos = 0; pos < slaves_count; pos++) {
> + if (slave_id == slaves[pos])
> + break;
> + }
> +
> + return pos;
> +}
> +
> int
> valid_port_id(uint8_t port_id);
>
> @@ -160,6 +187,14 @@ int
> valid_slave_port_id(uint8_t port_id);
>
> void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t slave_pos );
> +
> +void
> +activate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t port_id );
> +
> +void
> link_properties_set(struct rte_eth_dev *bonded_eth_dev,
> struct rte_eth_link *slave_dev_link);
> void
> @@ -173,6 +208,9 @@ int
> mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr);
>
> int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr);
> +
> +int
> mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
>
> uint8_t
> --
> 1.7.9.5
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-09-30 19:00 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-29 13:22 [dpdk-dev] [PATCH v2] bond: Add mode 4 support Pawel Wodkowski
2014-09-29 13:51 ` Jastrzebski, MichalX K
2014-09-30 11:17 ` Wodkowski, PawelX
2014-09-30 6:19 ` Pawel Wodkowski
2014-09-30 19:06 ` Neil Horman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).