This patch adds support mode 4 of link bonding. It depend on Delcan Doherty patches v3 and rte alarms patch v2 or above. New version handles race issues with setting/cancelin callbacks, fixes promiscus mode setting in mode 4 and some other minor errors in mode 4 implementation. Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com> --- lib/librte_ether/rte_ether.h | 1 + lib/librte_pmd_bond/Makefile | 1 + lib/librte_pmd_bond/rte_eth_bond.h | 4 + lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++++++--- lib/librte_pmd_bond/rte_eth_bond_args.c | 1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 +++++++++++++++++++++++++--- lib/librte_pmd_bond/rte_eth_bond_private.h | 42 ++++- 7 files changed, 346 insertions(+), 46 deletions(-) diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h index 2e08f23..1a3711b 100644 --- a/lib/librte_ether/rte_ether.h +++ b/lib/librte_ether/rte_ether.h @@ -293,6 +293,7 @@ struct vlan_hdr { #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #ifdef __cplusplus } diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile index 953d75e..c2312c2 100644 --- a/lib/librte_pmd_bond/Makefile +++ b/lib/librte_pmd_bond/Makefile @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS) # SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c # diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h index 6811c7b..b0223c2 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -75,6 +75,10 @@ extern "C" { /**< Broadcast (Mode 3). * In this mode all transmitted packets will be transmitted on all available * active slaves of the bonded. */ +#define BONDING_MODE_8023AD (4) +/**< 802.3AD (Mode 4). + * In this mode transmission and reception of packets is managed by LACP + * protocol specified in 802.3AD documentation. */ /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c index c690ceb..c547164 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_api.c +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c @@ -31,6 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <string.h> + #include <rte_mbuf.h> #include <rte_malloc.h> #include <rte_ethdev.h> @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id) return 0; } +void +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + internals->active_slaves[active_count] = port_id; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_slave_append(eth_dev); + + internals->active_slave_count = active_count + 1; +} + +void +deactivate_slave(struct rte_eth_dev *eth_dev, + uint8_t slave_pos) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos); + + active_count--; + + /* If slave was not at the end of the list + * shift active slaves up active array list */ + if (slave_pos < active_count) { + memmove(internals->active_slaves + slave_pos, + internals->active_slaves + slave_pos + 1, + (active_count - slave_pos) * + sizeof(internals->active_slaves[0])); + } + + internals->active_slave_count = active_count; +} + uint8_t number_of_sockets(void) { @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) eth_dev->dev_ops = &default_dev_ops; eth_dev->pci_dev = pci_dev; - if (bond_ethdev_mode_set(eth_dev, mode)) { - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", - eth_dev->data->port_id, mode); - goto err; - } - + internals->port_id = eth_dev->data->port_id; + internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = 0; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; internals->user_defined_mac = 0; @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); memset(internals->slaves, 0, sizeof(internals->slaves)); + if (bond_ethdev_mode_set(eth_dev, mode)) { + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", + eth_dev->data->port_id, mode); + goto err; + } + return eth_dev->data->port_id; err: @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) rte_eth_link_get_nowait(slave_port_id, &link_props); if (link_props.link_status == 1) - internals->active_slaves[internals->active_slave_count++] = - slave_port_id; + activate_slave(bonded_eth_dev, slave_port_id); } return 0; } - int rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) { @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) return retval; } - static int __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) { + struct rte_eth_dev *bonded_eth_dev; struct bond_dev_private *internals; - int i, slave_idx = -1; + int i, slave_idx; if (valid_slave_port_id(slave_port_id) != 0) return -1; - internals = rte_eth_devices[bonded_port_id].data->dev_private; + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; /* first remove from active slave list */ - for (i = 0; i < internals->active_slave_count; i++) { - if (internals->active_slaves[i] == slave_port_id) - slave_idx = i; + slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count, + slave_port_id); - /* shift active slaves up active array list */ - if (slave_idx >= 0 && i < (internals->active_slave_count - 1)) - internals->active_slaves[i] = internals->active_slaves[i+1]; - } - - if (slave_idx >= 0) - internals->active_slave_count--; + if (slave_idx < internals->active_slave_count) + deactivate_slave(bonded_eth_dev, slave_idx); slave_idx = -1; /* now find in slave list */ @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id) return internals->current_primary_port; } + int rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len) { @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id) return internals->balance_xmit_policy; } - int rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms) { @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id) return internals->link_down_delay_ms; } - int rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c index bbbc69b..a0be0e6 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_args.c +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, case BONDING_MODE_ACTIVE_BACKUP: case BONDING_MODE_BALANCE: case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: return 0; default: RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c index 6d0fb1b..13630d9 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c @@ -44,6 +44,7 @@ #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad.h" static uint16_t bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue, bufs, nb_pkts); } +static uint16_t +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + struct bond_dev_private *internals = bd_rx_q->dev_private; + struct mode8023ad_data *mode4 = &internals->mode4; + struct ether_addr bond_mac; + + struct ether_hdr *hdr; + struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */ + + uint16_t num_rx_slave = 0; /* Number of packet received on current slave */ + uint16_t num_rx_total = 0; /* Total number of received packets */ + + uint8_t i, j; + + rte_eth_macaddr_get(internals->port_id, &bond_mac); + + for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) { + /* Read packets from this slave */ + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i], + bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total); + + /* Separate slow protocol packets from other packets */ + for (j = 0; j < num_rx_slave; j++) { + hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *); + + uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type); + if (unlikely(ether_type == ETHER_TYPE_SLOW)) { + bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]); + continue; + } + + /* Check if we can receive this packet. Also filter packets if + * bonding interface is not in promiscuous mode (slaves are always + * in promiscuous mode). */ + if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) && + likely(internals->promiscuous_en || + is_same_ether_addr(&bond_mac, &hdr->d_addr))) { + bufs[num_rx_total++] = pkts[j]; + } else + rte_pktmbuf_free(pkts[j]); + } + } + + return num_rx_total; +} + static inline uint16_t ether_hash(struct ether_hdr *eth_hdr) { @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, } static uint16_t +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct mode8023ad_data *mode4; + struct bond_tx_queue *bd_tx_q; + + uint8_t num_of_slaves; + uint8_t slaves[RTE_MAX_ETHPORTS]; + /* possitions in slaves, not ID */ + uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; + uint8_t distributing_slaves_count; + + uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0; + uint16_t i, op_slave_idx; + + /* Slow packets from 802.3AX state machines. */ + struct slow_protocol_msg *slow_msg; + + /* Allocate one additional packet in case 8023AD mode. + * First element if not NULL is slow packet. */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1]; + /* Total amount of packets in slave_bufs */ + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + /* Array of slow packets placed in each slave */ + uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 }; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + mode4 = &internals->mode4; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); + + distributing_slaves_count = mode4->distibuting_slaves_count; + memcpy(distributing_offsets, mode4->distibuting_slaves_offsets, + sizeof(slaves[0]) * distributing_slaves_count); + + for (i = 0; i < num_of_slaves; i++) + slave_bufs[i][0] = NULL; + + /* It is likely that tx ring will be empty. If it is not empty, it is + * likely that there will be only one frame. */ + while (unlikely(!rte_ring_empty(mode4->tx_ring)) && + rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) { + i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id); + + /* Assign slow packet to slave or drop it if slave is not in active list + * (ex: link down). */ + if (likely(i < num_of_slaves)) { + /* If there is more than one slow packet to the same slave, send + * only latest, and drop previouse - tx burst was no called quick + * enough. */ + if (slave_bufs[i][0] != NULL) + rte_pktmbuf_free(slave_bufs[i][0]); + + slave_bufs[i][0] = slow_msg->pkt; + slave_nb_pkts[i] = 1; + slave_slow_packets[i] = 1; + } else + rte_pktmbuf_free(slow_msg->pkt); + + rte_ring_enqueue(mode4->free_ring, slow_msg); + } + + if (likely(distributing_slaves_count > 0)) { + /* Populate slaves mbuf with the packets which are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { + /* Select output slave using hash based on xmit policy */ + op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count, + internals->balance_xmit_policy); + + /* Populate slave mbuf arrays with mbufs for that slave. Use only + * slaves that are currently distributing. */ + uint8_t slave_offset = distributing_offsets[op_slave_idx]; + uint16_t pkt_pos = slave_nb_pkts[slave_offset]; + slave_nb_pkts[slave_offset]++; + + slave_bufs[slave_offset][pkt_pos] = bufs[i]; + } + } + + /* Send packet burst on each slave device */ + for (i = 0; i < num_of_slaves; i++) { + if (slave_nb_pkts[i] > 0) { + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + slave_bufs[i], slave_nb_pkts[i]); + + /* if tx burst fails move packets to end of bufs */ + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { + uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; + + /* Free slow packet if it exists and not send. */ + if (slave_slow_packets[i] != 0 && num_tx_slave == 0) { + rte_pktmbuf_free(slave_bufs[i][0]); + slave_tx_fail_count--; + } + + tx_fail_total += slave_tx_fail_count; + memcpy(bufs[nb_pkts - tx_fail_total], + slave_bufs[i][num_tx_slave], + slave_tx_fail_count); + } + + if (num_tx_slave > 0) + num_tx_slave -= slave_slow_packets[i]; + + num_tx_total += num_tx_slave; + } + } + + return num_tx_total; +} + +static uint16_t bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link, } int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) +{ + struct ether_addr *mac_addr; + + mac_addr = eth_dev->data->mac_addrs; + + if (eth_dev == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); + return -1; + } + + if (dst_mac_addr == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); + return -1; + } + + ether_addr_copy(mac_addr, dst_mac_addr); + return 0; +} + +int mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) { struct ether_addr *mac_addr; @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) mac_addr = eth_dev->data->mac_addrs; if (eth_dev == NULL) { - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); return -1; } @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) } } break; + case BONDING_MODE_8023AD: + break; case BONDING_MODE_ACTIVE_BACKUP: default: for (i = 0; i < internals->slave_count; i++) { @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; break; + case BONDING_MODE_8023AD: + if (bond_mode_8023ad_init(eth_dev) != 0) + return -1; + + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; + break; default: return -1; } @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) if (internals->user_defined_primary_port) bond_ethdev_primary_set(internals, internals->primary_port); + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_start(eth_dev); if (internals->link_status_polling_enabled) rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) { struct bond_dev_private *internals = eth_dev->data->dev_private; + if (internals->mode == BONDING_MODE_8023AD) { + struct mode8023ad_data *data = &internals->mode4; + struct slow_protocol_msg *msg; + + bond_mode_8023ad_stop(eth_dev); + data->distibuting_slaves_count = 0; + + /* Discard all messages to/from mode 4 state machines */ + while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) { + rte_pktmbuf_free(msg->pkt); + rte_ring_enqueue(data->free_ring, msg); + } + + while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) { + rte_pktmbuf_free(msg->pkt); + rte_ring_enqueue(data->free_ring, msg); + } + } + internals->active_slave_count = 0; internals->link_status_polling_enabled = 0; @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 0, dev->pci_dev->numa_node); if (bd_tx_q == NULL) - return -1; + return -1; bd_tx_q->queue_id = tx_queue_id; bd_tx_q->dev_private = dev->data->dev_private; @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue) rte_free(queue); } - static void bond_ethdev_slave_link_status_change_monitor(void *cb_arg) { @@ -884,7 +1105,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg) /* If device is currently being configured then don't check slaves link * status, wait until next period */ - if (rte_spinlock_trylock(&internals->lock)){ + if (rte_spinlock_trylock(&internals->lock)) { for (i = 0; i < internals->slave_count; i++) { if (internals->slaves[i].link_status_polling_enabled) { slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) for (i = 0; i < internals->slave_count; i++) rte_eth_promiscuous_enable(internals->slaves[i].port_id); break; + /* In mode4 promiscus mode is managed when slave is added/removed */ + case BONDING_MODE_8023AD: + break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: default: rte_eth_promiscuous_enable(internals->current_primary_port); - } } @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) int i; internals->promiscuous_en = 0; - + switch (internals->mode) { /* Promiscuous mode is propagated to all slaves */ case BONDING_MODE_ROUND_ROBIN: @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) for (i = 0; i < internals->slave_count; i++) rte_eth_promiscuous_disable(internals->slaves[i].port_id); break; + /* In mode4 promiscus mode is set managed when slave is added/removed */ + case BONDING_MODE_8023AD: + break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: default: @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, struct bond_dev_private *internals; struct rte_eth_link link; - int i, valid_slave = 0, active_pos = -1; + int i, valid_slave = 0; + uint8_t active_pos; uint8_t lsc_flag = 0; if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, return; /* Search for port in active port list */ - for (i = 0; i < internals->active_slave_count; i++) { - if (port_id == internals->active_slaves[i]) { - active_pos = i; - break; - } - } + active_pos = find_slave_by_id(internals->active_slaves, + internals->active_slave_count, port_id); rte_eth_link_get_nowait(port_id, &link); if (link.link_status) { - if (active_pos >= 0) + if (active_pos < internals->active_slave_count) return; /* if no active slave ports then set this port to be primary port */ @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, link_properties_set(bonded_eth_dev, &(slave_eth_dev->data->dev_link)); } - internals->active_slaves[internals->active_slave_count++] = port_id; + + activate_slave(bonded_eth_dev, port_id); /* If user has defined the primary port then default to using it */ if (internals->user_defined_primary_port && internals->primary_port == port_id) bond_ethdev_primary_set(internals, port_id); } else { - if (active_pos < 0) + if (active_pos == internals->active_slave_count) return; /* Remove from active slave list */ - for (i = active_pos; i < (internals->active_slave_count - 1); i++) - internals->active_slaves[i] = internals->active_slaves[i+1]; - - internals->active_slave_count--; + deactivate_slave(bonded_eth_dev, active_pos); /* No active slaves, change link status to down and reset other * link properties */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h index 6db5144..77f7bb0 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_private.h +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h @@ -42,6 +42,7 @@ extern "C" { #include <rte_spinlock.h> #include "rte_eth_bond.h" +#include "rte_eth_bond_8023ad.h" #define PMD_BOND_SLAVE_PORT_KVARG ("slave") #define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary") @@ -60,6 +61,8 @@ extern "C" { #define RTE_BOND_LOG(lvl, msg, ...) \ RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__); +#define BONDING_MODE_INVALID 0xFF + extern const char *pmd_bond_init_valid_arguments[]; extern const char *driver_name; @@ -89,7 +92,13 @@ struct bond_tx_queue { /**< Copy of TX configuration structure for queue */ }; - +/** Persisted Slave Configuration Structure */ +struct slave_conf { + uint8_t port_id; + /**< Port Id of slave eth_dev */ + struct ether_addr mac_addr; + /**< Slave eth_dev original MAC address */ +}; /** Bonded slave devices structure */ struct bond_ethdev_slave_ports { uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */ @@ -124,7 +133,7 @@ struct bond_dev_private { uint8_t user_defined_mac; /**< Flag for whether MAC address is user defined or not */ uint8_t promiscuous_en; - /**< Enabled/disable promiscuous mode on slave devices */ + /**< Enabled/disable promiscuous mode on bonding device */ uint8_t link_props_set; /**< flag to denote if the link properties are set */ @@ -143,6 +152,9 @@ struct bond_dev_private { uint8_t slave_count; /**< Number of bonded slaves */ struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; /**< Arary of bonded slaves details */ + + struct mode8023ad_data mode4; + /**< Mode 4 private data */ }; extern struct eth_dev_ops default_dev_ops; @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops; int valid_bonded_ethdev(struct rte_eth_dev *eth_dev); +/* Search given slave array to find possition of given id. + * Return slave pos or slaves_count if not found. */ +static inline uint8_t +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, + uint8_t slave_id ) { + + uint8_t pos; + for (pos = 0; pos < slaves_count; pos++) { + if (slave_id == slaves[pos]) + break; + } + + return pos; +} + int valid_port_id(uint8_t port_id); @@ -160,6 +187,14 @@ int valid_slave_port_id(uint8_t port_id); void +deactivate_slave(struct rte_eth_dev *eth_dev, + uint8_t slave_pos ); + +void +activate_slave(struct rte_eth_dev *eth_dev, + uint8_t port_id ); + +void link_properties_set(struct rte_eth_dev *bonded_eth_dev, struct rte_eth_link *slave_dev_link); void @@ -173,6 +208,9 @@ int mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr); int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr); + +int mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev); uint8_t -- 1.7.9.5
Please don't take this patch into account. Two files are missing.
Best regards
Michal
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Pawel Wodkowski
> Sent: Monday, September 29, 2014 3:23 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] bond: Add mode 4 support.
>
> This patch adds support mode 4 of link bonding. It depend on Delcan
> Doherty
> patches v3 and rte alarms patch v2 or above.
>
> New version handles race issues with setting/cancelin callbacks,
> fixes promiscus mode setting in mode 4 and some other minor errors in
> mode 4
> implementation.
>
>
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> ---
> lib/librte_ether/rte_ether.h | 1 +
> lib/librte_pmd_bond/Makefile | 1 +
> lib/librte_pmd_bond/rte_eth_bond.h | 4 +
> lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++++++---
> lib/librte_pmd_bond/rte_eth_bond_args.c | 1 +
> lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261
> +++++++++++++++++++++++++---
> lib/librte_pmd_bond/rte_eth_bond_private.h | 42 ++++-
> 7 files changed, 346 insertions(+), 46 deletions(-)
>
> diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
> index 2e08f23..1a3711b 100644
> --- a/lib/librte_ether/rte_ether.h
> +++ b/lib/librte_ether/rte_ether.h
> @@ -293,6 +293,7 @@ struct vlan_hdr {
> #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
> #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
> #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time
> Protocol. */
> +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker).
> */
>
> #ifdef __cplusplus
> }
> diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile
> index 953d75e..c2312c2 100644
> --- a/lib/librte_pmd_bond/Makefile
> +++ b/lib/librte_pmd_bond/Makefile
> @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
> #
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
> SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
>
> #
> diff --git a/lib/librte_pmd_bond/rte_eth_bond.h
> b/lib/librte_pmd_bond/rte_eth_bond.h
> index 6811c7b..b0223c2 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond.h
> @@ -75,6 +75,10 @@ extern "C" {
> /**< Broadcast (Mode 3).
> * In this mode all transmitted packets will be transmitted on all available
> * active slaves of the bonded. */
> +#define BONDING_MODE_8023AD (4)
> +/**< 802.3AD (Mode 4).
> + * In this mode transmission and reception of packets is managed by LACP
> + * protocol specified in 802.3AD documentation. */
>
> /* Balance Mode Transmit Policies */
> #define BALANCE_XMIT_POLICY_LAYER2 (0)
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c
> b/lib/librte_pmd_bond/rte_eth_bond_api.c
> index c690ceb..c547164 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_api.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
> @@ -31,6 +31,8 @@
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> */
>
> +#include <string.h>
> +
> #include <rte_mbuf.h>
> #include <rte_malloc.h>
> #include <rte_ethdev.h>
> @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id)
> return 0;
> }
>
> +void
> +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
> +{
> + struct bond_dev_private *internals = eth_dev->data->dev_private;
> + uint8_t active_count = internals->active_slave_count;
> +
> + internals->active_slaves[active_count] = port_id;
> +
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_slave_append(eth_dev);
> +
> + internals->active_slave_count = active_count + 1;
> +}
> +
> +void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t slave_pos)
> +{
> + struct bond_dev_private *internals = eth_dev->data->dev_private;
> + uint8_t active_count = internals->active_slave_count;
> +
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos);
> +
> + active_count--;
> +
> + /* If slave was not at the end of the list
> + * shift active slaves up active array list */
> + if (slave_pos < active_count) {
> + memmove(internals->active_slaves + slave_pos,
> + internals->active_slaves + slave_pos + 1,
> + (active_count - slave_pos) *
> + sizeof(internals->active_slaves[0]));
> + }
> +
> + internals->active_slave_count = active_count;
> +}
> +
> uint8_t
> number_of_sockets(void)
> {
> @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t
> mode, uint8_t socket_id)
> eth_dev->dev_ops = &default_dev_ops;
> eth_dev->pci_dev = pci_dev;
>
> - if (bond_ethdev_mode_set(eth_dev, mode)) {
> - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode
> too %d",
> - eth_dev->data->port_id, mode);
> - goto err;
> - }
> -
> + internals->port_id = eth_dev->data->port_id;
> + internals->mode = BONDING_MODE_INVALID;
> internals->current_primary_port = 0;
> internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
> internals->user_defined_mac = 0;
> @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t
> mode, uint8_t socket_id)
> memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
> memset(internals->slaves, 0, sizeof(internals->slaves));
>
> + if (bond_ethdev_mode_set(eth_dev, mode)) {
> + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode
> too %d",
> + eth_dev->data->port_id, mode);
> + goto err;
> + }
> +
> return eth_dev->data->port_id;
>
> err:
> @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t
> bonded_port_id, uint8_t slave_port_id)
> rte_eth_link_get_nowait(slave_port_id, &link_props);
>
> if (link_props.link_status == 1)
> - internals->active_slaves[internals-
> >active_slave_count++] =
> - slave_port_id;
> + activate_slave(bonded_eth_dev, slave_port_id);
> }
> return 0;
>
> }
>
> -
> int
> rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
> {
> @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id,
> uint8_t slave_port_id)
> return retval;
> }
>
> -
> static int
> __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t
> slave_port_id)
> {
> + struct rte_eth_dev *bonded_eth_dev;
> struct bond_dev_private *internals;
>
> - int i, slave_idx = -1;
> + int i, slave_idx;
>
> if (valid_slave_port_id(slave_port_id) != 0)
> return -1;
>
> - internals = rte_eth_devices[bonded_port_id].data->dev_private;
> + bonded_eth_dev = &rte_eth_devices[bonded_port_id];
> + internals = bonded_eth_dev->data->dev_private;
>
> /* first remove from active slave list */
> - for (i = 0; i < internals->active_slave_count; i++) {
> - if (internals->active_slaves[i] == slave_port_id)
> - slave_idx = i;
> + slave_idx = find_slave_by_id(internals->active_slaves, internals-
> >active_slave_count,
> + slave_port_id);
>
> - /* shift active slaves up active array list */
> - if (slave_idx >= 0 && i < (internals->active_slave_count - 1))
> - internals->active_slaves[i] = internals-
> >active_slaves[i+1];
> - }
> -
> - if (slave_idx >= 0)
> - internals->active_slave_count--;
> + if (slave_idx < internals->active_slave_count)
> + deactivate_slave(bonded_eth_dev, slave_idx);
>
> slave_idx = -1;
> /* now find in slave list */
> @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
>
> return internals->current_primary_port;
> }
> +
> int
> rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t
> len)
> {
> @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t
> bonded_port_id)
> return internals->balance_xmit_policy;
> }
>
> -
> int
> rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t
> internal_ms)
> {
> @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t
> bonded_port_id)
> return internals->link_down_delay_ms;
> }
>
> -
> int
> rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t
> delay_ms)
>
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c
> b/lib/librte_pmd_bond/rte_eth_bond_args.c
> index bbbc69b..a0be0e6 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_args.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
> @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char
> *key __rte_unused,
> case BONDING_MODE_ACTIVE_BACKUP:
> case BONDING_MODE_BALANCE:
> case BONDING_MODE_BROADCAST:
> + case BONDING_MODE_8023AD:
> return 0;
> default:
> RTE_BOND_LOG(ERR, "Invalid slave mode value (%s)
> specified", value);
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> index 6d0fb1b..13630d9 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
> @@ -44,6 +44,7 @@
>
> #include "rte_eth_bond.h"
> #include "rte_eth_bond_private.h"
> +#include "rte_eth_bond_8023ad.h"
>
> static uint16_t
> bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t
> nb_pkts)
> @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue,
> bufs, nb_pkts);
> }
>
> +static uint16_t
> +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> + uint16_t nb_pkts)
> +{
> + /* Cast to structure, containing bonded device's port id and queue id
> */
> + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
> + struct bond_dev_private *internals = bd_rx_q->dev_private;
> + struct mode8023ad_data *mode4 = &internals->mode4;
> + struct ether_addr bond_mac;
> +
> + struct ether_hdr *hdr;
> + struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow
> packet */
> +
> + uint16_t num_rx_slave = 0; /* Number of packet received on
> current slave */
> + uint16_t num_rx_total = 0; /* Total number of received packets
> */
> +
> + uint8_t i, j;
> +
> + rte_eth_macaddr_get(internals->port_id, &bond_mac);
> +
> + for (i = 0; i < internals->active_slave_count && num_rx_total <
> nb_pkts; i++) {
> + /* Read packets from this slave */
> + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
> + bd_rx_q->queue_id, pkts, nb_pkts + 1 -
> num_rx_total);
> +
> + /* Separate slow protocol packets from other packets */
> + for (j = 0; j < num_rx_slave; j++) {
> + hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *);
> +
> + uint16_t ether_type = rte_be_to_cpu_16(hdr-
> >ether_type);
> + if (unlikely(ether_type == ETHER_TYPE_SLOW)) {
> +
> bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]);
> + continue;
> + }
> +
> + /* Check if we can receive this packet. Also filter
> packets if
> + * bonding interface is not in promiscuous mode
> (slaves are always
> + * in promiscuous mode). */
> + if (likely(ACTOR_STATE(&mode4->port_list[i],
> COLLECTING)) &&
> + likely(internals->promiscuous_en ||
> + is_same_ether_addr(&bond_mac,
> &hdr->d_addr))) {
> + bufs[num_rx_total++] = pkts[j];
> + } else
> + rte_pktmbuf_free(pkts[j]);
> + }
> + }
> +
> + return num_rx_total;
> +}
> +
> static inline uint16_t
> ether_hash(struct ether_hdr *eth_hdr)
> {
> @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue,
> struct rte_mbuf **bufs,
> }
>
> static uint16_t
> +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
> + uint16_t nb_pkts)
> +{
> + struct bond_dev_private *internals;
> + struct mode8023ad_data *mode4;
> + struct bond_tx_queue *bd_tx_q;
> +
> + uint8_t num_of_slaves;
> + uint8_t slaves[RTE_MAX_ETHPORTS];
> + /* possitions in slaves, not ID */
> + uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
> + uint8_t distributing_slaves_count;
> +
> + uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0;
> + uint16_t i, op_slave_idx;
> +
> + /* Slow packets from 802.3AX state machines. */
> + struct slow_protocol_msg *slow_msg;
> +
> + /* Allocate one additional packet in case 8023AD mode.
> + * First element if not NULL is slow packet. */
> + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1];
> + /* Total amount of packets in slave_bufs */
> + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
> + /* Array of slow packets placed in each slave */
> + uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 };
> +
> + bd_tx_q = (struct bond_tx_queue *)queue;
> + internals = bd_tx_q->dev_private;
> + mode4 = &internals->mode4;
> +
> + /* Copy slave list to protect against slave up/down changes during tx
> + * bursting */
> + num_of_slaves = internals->active_slave_count;
> + if (num_of_slaves < 1)
> + return num_tx_total;
> +
> + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
> num_of_slaves);
> +
> + distributing_slaves_count = mode4->distibuting_slaves_count;
> + memcpy(distributing_offsets, mode4->distibuting_slaves_offsets,
> + sizeof(slaves[0]) * distributing_slaves_count);
> +
> + for (i = 0; i < num_of_slaves; i++)
> + slave_bufs[i][0] = NULL;
> +
> + /* It is likely that tx ring will be empty. If it is not empty, it is
> + * likely that there will be only one frame. */
> + while (unlikely(!rte_ring_empty(mode4->tx_ring)) &&
> + rte_ring_dequeue(mode4->tx_ring, (void
> **)&slow_msg) != -ENOENT) {
> + i = find_slave_by_id(slaves, num_of_slaves, slow_msg-
> >port_id);
> +
> + /* Assign slow packet to slave or drop it if slave is not in
> active list
> + * (ex: link down). */
> + if (likely(i < num_of_slaves)) {
> + /* If there is more than one slow packet to the same
> slave, send
> + * only latest, and drop previouse - tx burst was no
> called quick
> + * enough. */
> + if (slave_bufs[i][0] != NULL)
> + rte_pktmbuf_free(slave_bufs[i][0]);
> +
> + slave_bufs[i][0] = slow_msg->pkt;
> + slave_nb_pkts[i] = 1;
> + slave_slow_packets[i] = 1;
> + } else
> + rte_pktmbuf_free(slow_msg->pkt);
> +
> + rte_ring_enqueue(mode4->free_ring, slow_msg);
> + }
> +
> + if (likely(distributing_slaves_count > 0)) {
> + /* Populate slaves mbuf with the packets which are to be
> sent on it */
> + for (i = 0; i < nb_pkts; i++) {
> + /* Select output slave using hash based on xmit
> policy */
> + op_slave_idx = xmit_slave_hash(bufs[i],
> distributing_slaves_count,
> + internals->balance_xmit_policy);
> +
> + /* Populate slave mbuf arrays with mbufs for that
> slave. Use only
> + * slaves that are currently distributing. */
> + uint8_t slave_offset =
> distributing_offsets[op_slave_idx];
> + uint16_t pkt_pos = slave_nb_pkts[slave_offset];
> + slave_nb_pkts[slave_offset]++;
> +
> + slave_bufs[slave_offset][pkt_pos] = bufs[i];
> + }
> + }
> +
> + /* Send packet burst on each slave device */
> + for (i = 0; i < num_of_slaves; i++) {
> + if (slave_nb_pkts[i] > 0) {
> + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q-
> >queue_id,
> + slave_bufs[i], slave_nb_pkts[i]);
> +
> + /* if tx burst fails move packets to end of bufs */
> + if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
> + uint16_t slave_tx_fail_count =
> slave_nb_pkts[i] - num_tx_slave;
> +
> + /* Free slow packet if it exists and not send.
> */
> + if (slave_slow_packets[i] != 0 &&
> num_tx_slave == 0) {
> + rte_pktmbuf_free(slave_bufs[i][0]);
> + slave_tx_fail_count--;
> + }
> +
> + tx_fail_total += slave_tx_fail_count;
> + memcpy(bufs[nb_pkts - tx_fail_total],
> + slave_bufs[i][num_tx_slave],
> + slave_tx_fail_count);
> + }
> +
> + if (num_tx_slave > 0)
> + num_tx_slave -= slave_slow_packets[i];
> +
> + num_tx_total += num_tx_slave;
> + }
> + }
> +
> + return num_tx_total;
> +}
> +
> +static uint16_t
> bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
> {
> @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link
> *bonded_dev_link,
> }
>
> int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr
> *dst_mac_addr)
> +{
> + struct ether_addr *mac_addr;
> +
> + mac_addr = eth_dev->data->mac_addrs;
> +
> + if (eth_dev == NULL) {
> + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n",
> __func__);
> + return -1;
> + }
> +
> + if (dst_mac_addr == NULL) {
> + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n",
> __func__);
> + return -1;
> + }
> +
> + ether_addr_copy(mac_addr, dst_mac_addr);
> + return 0;
> +}
> +
> +int
> mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr
> *new_mac_addr)
> {
> struct ether_addr *mac_addr;
> @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct
> ether_addr *new_mac_addr)
> mac_addr = eth_dev->data->mac_addrs;
>
> if (eth_dev == NULL) {
> - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
> + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
> return -1;
> }
>
> @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev
> *bonded_eth_dev)
> }
> }
> break;
> + case BONDING_MODE_8023AD:
> + break;
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> for (i = 0; i < internals->slave_count; i++) {
> @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev
> *eth_dev, int mode)
> eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
> eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
> break;
> + case BONDING_MODE_8023AD:
> + if (bond_mode_8023ad_init(eth_dev) != 0)
> + return -1;
> +
> + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
> + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
> + break;
> default:
> return -1;
> }
> @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
> if (internals->user_defined_primary_port)
> bond_ethdev_primary_set(internals, internals-
> >primary_port);
>
> + if (internals->mode == BONDING_MODE_8023AD)
> + bond_mode_8023ad_start(eth_dev);
>
> if (internals->link_status_polling_enabled)
> rte_eal_alarm_set(internals->link_status_polling_interval_ms
> * 1000,
> @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
> {
> struct bond_dev_private *internals = eth_dev->data->dev_private;
>
> + if (internals->mode == BONDING_MODE_8023AD) {
> + struct mode8023ad_data *data = &internals->mode4;
> + struct slow_protocol_msg *msg;
> +
> + bond_mode_8023ad_stop(eth_dev);
> + data->distibuting_slaves_count = 0;
> +
> + /* Discard all messages to/from mode 4 state machines */
> + while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -
> ENOENT) {
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> +
> + while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -
> ENOENT) {
> + rte_pktmbuf_free(msg->pkt);
> + rte_ring_enqueue(data->free_ring, msg);
> + }
> + }
> +
> internals->active_slave_count = 0;
> internals->link_status_polling_enabled = 0;
>
> @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev
> *dev, uint16_t tx_queue_id,
> 0, dev->pci_dev->numa_node);
>
> if (bd_tx_q == NULL)
> - return -1;
> + return -1;
>
> bd_tx_q->queue_id = tx_queue_id;
> bd_tx_q->dev_private = dev->data->dev_private;
> @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue)
> rte_free(queue);
> }
>
> -
> static void
> bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
> {
> @@ -884,7 +1105,7 @@
> bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
>
> /* If device is currently being configured then don't check slaves link
> * status, wait until next period */
> - if (rte_spinlock_trylock(&internals->lock)){
> + if (rte_spinlock_trylock(&internals->lock)) {
> for (i = 0; i < internals->slave_count; i++) {
> if (internals->slaves[i].link_status_polling_enabled) {
> slave_ethdev = &rte_eth_devices[internals-
> >slaves[i].port_id];
> @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct
> rte_eth_dev *eth_dev)
> for (i = 0; i < internals->slave_count; i++)
> rte_eth_promiscuous_enable(internals-
> >slaves[i].port_id);
> break;
> + /* In mode4 promiscus mode is managed when slave is
> added/removed */
> + case BONDING_MODE_8023AD:
> + break;
> /* Promiscuous mode is propagated only to primary slave */
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> rte_eth_promiscuous_enable(internals-
> >current_primary_port);
> -
> }
> }
>
> @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct
> rte_eth_dev *dev)
> int i;
>
> internals->promiscuous_en = 0;
> -
> +
> switch (internals->mode) {
> /* Promiscuous mode is propagated to all slaves */
> case BONDING_MODE_ROUND_ROBIN:
> @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct
> rte_eth_dev *dev)
> for (i = 0; i < internals->slave_count; i++)
> rte_eth_promiscuous_disable(internals-
> >slaves[i].port_id);
> break;
> + /* In mode4 promiscus mode is set managed when slave is
> added/removed */
> + case BONDING_MODE_8023AD:
> + break;
> /* Promiscuous mode is propagated only to primary slave */
> case BONDING_MODE_ACTIVE_BACKUP:
> default:
> @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id,
> enum rte_eth_event_type type,
> struct bond_dev_private *internals;
> struct rte_eth_link link;
>
> - int i, valid_slave = 0, active_pos = -1;
> + int i, valid_slave = 0;
> + uint8_t active_pos;
> uint8_t lsc_flag = 0;
>
> if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
> @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t
> port_id, enum rte_eth_event_type type,
> return;
>
> /* Search for port in active port list */
> - for (i = 0; i < internals->active_slave_count; i++) {
> - if (port_id == internals->active_slaves[i]) {
> - active_pos = i;
> - break;
> - }
> - }
> + active_pos = find_slave_by_id(internals->active_slaves,
> + internals->active_slave_count, port_id);
>
> rte_eth_link_get_nowait(port_id, &link);
> if (link.link_status) {
> - if (active_pos >= 0)
> + if (active_pos < internals->active_slave_count)
> return;
>
> /* if no active slave ports then set this port to be primary
> port */
> @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t
> port_id, enum rte_eth_event_type type,
> link_properties_set(bonded_eth_dev,
> &(slave_eth_dev->data->dev_link));
> }
> - internals->active_slaves[internals->active_slave_count++] =
> port_id;
> +
> + activate_slave(bonded_eth_dev, port_id);
>
> /* If user has defined the primary port then default to using
> it */
> if (internals->user_defined_primary_port &&
> internals->primary_port == port_id)
> bond_ethdev_primary_set(internals, port_id);
> } else {
> - if (active_pos < 0)
> + if (active_pos == internals->active_slave_count)
> return;
>
> /* Remove from active slave list */
> - for (i = active_pos; i < (internals->active_slave_count - 1); i++)
> - internals->active_slaves[i] = internals-
> >active_slaves[i+1];
> -
> - internals->active_slave_count--;
> + deactivate_slave(bonded_eth_dev, active_pos);
>
> /* No active slaves, change link status to down and reset
> other
> * link properties */
> diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h
> b/lib/librte_pmd_bond/rte_eth_bond_private.h
> index 6db5144..77f7bb0 100644
> --- a/lib/librte_pmd_bond/rte_eth_bond_private.h
> +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
> @@ -42,6 +42,7 @@ extern "C" {
> #include <rte_spinlock.h>
>
> #include "rte_eth_bond.h"
> +#include "rte_eth_bond_8023ad.h"
>
> #define PMD_BOND_SLAVE_PORT_KVARG ("slave")
> #define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary")
> @@ -60,6 +61,8 @@ extern "C" {
> #define RTE_BOND_LOG(lvl, msg, ...) \
> RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__,
> ##__VA_ARGS__);
>
> +#define BONDING_MODE_INVALID 0xFF
> +
> extern const char *pmd_bond_init_valid_arguments[];
>
> extern const char *driver_name;
> @@ -89,7 +92,13 @@ struct bond_tx_queue {
> /**< Copy of TX configuration structure for queue */
> };
>
> -
> +/** Persisted Slave Configuration Structure */
> +struct slave_conf {
> + uint8_t port_id;
> + /**< Port Id of slave eth_dev */
> + struct ether_addr mac_addr;
> + /**< Slave eth_dev original MAC address */
> +};
> /** Bonded slave devices structure */
> struct bond_ethdev_slave_ports {
> uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */
> @@ -124,7 +133,7 @@ struct bond_dev_private {
> uint8_t user_defined_mac;
> /**< Flag for whether MAC address is user defined or not */
> uint8_t promiscuous_en;
> - /**< Enabled/disable promiscuous mode on slave devices */
> + /**< Enabled/disable promiscuous mode on bonding device */
> uint8_t link_props_set;
> /**< flag to denote if the link properties are set */
>
> @@ -143,6 +152,9 @@ struct bond_dev_private {
> uint8_t slave_count; /**< Number of bonded
> slaves */
> struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
> /**< Arary of bonded slaves details */
> +
> + struct mode8023ad_data mode4;
> + /**< Mode 4 private data */
> };
>
> extern struct eth_dev_ops default_dev_ops;
> @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops;
> int
> valid_bonded_ethdev(struct rte_eth_dev *eth_dev);
>
> +/* Search given slave array to find possition of given id.
> + * Return slave pos or slaves_count if not found. */
> +static inline uint8_t
> +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count,
> + uint8_t slave_id ) {
> +
> + uint8_t pos;
> + for (pos = 0; pos < slaves_count; pos++) {
> + if (slave_id == slaves[pos])
> + break;
> + }
> +
> + return pos;
> +}
> +
> int
> valid_port_id(uint8_t port_id);
>
> @@ -160,6 +187,14 @@ int
> valid_slave_port_id(uint8_t port_id);
>
> void
> +deactivate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t slave_pos );
> +
> +void
> +activate_slave(struct rte_eth_dev *eth_dev,
> + uint8_t port_id );
> +
> +void
> link_properties_set(struct rte_eth_dev *bonded_eth_dev,
> struct rte_eth_link *slave_dev_link);
> void
> @@ -173,6 +208,9 @@ int
> mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr
> *new_mac_addr);
>
> int
> +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr
> *dst_mac_addr);
> +
> +int
> mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev);
>
> uint8_t
> --
> 1.7.9.5
This patch adds support mode 4 of link bonding. It depend on Delcan Doherty patches v3 and rte alarms patch v2 or above. New version handles race issues with setting/cancelin callbacks, fixes promiscus mode setting in mode 4 and some other minor errors in mode 4 implementation. Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com> --- lib/librte_ether/rte_ether.h | 1 + lib/librte_pmd_bond/Makefile | 1 + lib/librte_pmd_bond/rte_eth_bond.h | 4 + lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1070 ++++++++++++++++++++++++++++ lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 405 +++++++++++ lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++- lib/librte_pmd_bond/rte_eth_bond_args.c | 1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 ++++++- lib/librte_pmd_bond/rte_eth_bond_private.h | 42 +- 9 files changed, 1821 insertions(+), 46 deletions(-) create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h index 2e08f23..1a3711b 100644 --- a/lib/librte_ether/rte_ether.h +++ b/lib/librte_ether/rte_ether.h @@ -293,6 +293,7 @@ struct vlan_hdr { #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #ifdef __cplusplus } diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile index 953d75e..c2312c2 100644 --- a/lib/librte_pmd_bond/Makefile +++ b/lib/librte_pmd_bond/Makefile @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS) # SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c # diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h index 6811c7b..b0223c2 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -75,6 +75,10 @@ extern "C" { /**< Broadcast (Mode 3). * In this mode all transmitted packets will be transmitted on all available * active slaves of the bonded. */ +#define BONDING_MODE_8023AD (4) +/**< 802.3AD (Mode 4). + * In this mode transmission and reception of packets is managed by LACP + * protocol specified in 802.3AD documentation. */ /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c new file mode 100644 index 0000000..de416c6 --- /dev/null +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c @@ -0,0 +1,1070 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stddef.h> +#include <string.h> + +#include <rte_alarm.h> +#include <rte_malloc.h> +#include <rte_errno.h> + +#include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad.h" + +#include <rte_cycles.h> + +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD +#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \ + bond_dbg_get_time_diff_ms(), internals->active_slaves[port_num], \ + __FUNCTION__, ##__VA_ARGS__) + +static unsigned +bond_dbg_get_time_diff_ms(void) +{ + static uint64_t start_time = 0; + uint64_t now; + + now = rte_rdtsc(); + if (start_time == 0) + start_time = now; + + return ((now - start_time) * 1000) / rte_get_tsc_hz(); +} + +static void +bond_print_lacp(struct lacpdu *l) +{ + char a_address[18]; + char p_address[18]; + char a_state[256] = { 0 }; + char p_state[256] = { 0 }; + + static const char *state_labels[] = { + "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP" + }; + + int a_len = 0; + int p_len = 0; + uint8_t i; + uint8_t *addr; + + addr = l->actor.port_params.system.addr_bytes; + snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + addr = l->partner.port_params.system.addr_bytes; + snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + for (i = 0; i < 8; i++) { + if ((l->actor.state >> i) & 1) { + a_len += snprintf(a_state + a_len, sizeof(a_state) - a_len, "%s ", + state_labels[i]); + } + + if ((l->partner.state >> i) & 1) { + p_len += snprintf(p_state + p_len, sizeof(p_state) - p_len, "%s ", + state_labels[i]); + } + } + + if (a_len && a_state[a_len-1] == ' ') + a_state[a_len-1] = '\0'; + + if (p_len && p_state[p_len-1] == ' ') + p_state[p_len-1] = '\0'; + + RTE_LOG(DEBUG, PMD, "LACP: {\n"\ + " subtype= %02X\n"\ + " ver_num=%02X\n"\ + " actor={ tlv=%02X, len=%02X\n"\ + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ + " state={ %s }\n"\ + " }\n"\ + " partner={ tlv=%02X, len=%02X\n"\ + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ + " state={ %s }\n"\ + " }\n"\ + " collector={info=%02X, length=%02X, max_delay=%04X\n, " \ + "type_term=%02X, terminator_length = %02X}\n",\ + l->subtype,\ + l->version_number,\ + l->actor.tlv_type_info,\ + l->actor.info_length,\ + l->actor.port_params.system_priority,\ + a_address,\ + l->actor.port_params.key,\ + l->actor.port_params.port_priority,\ + l->actor.port_params.port_number,\ + a_state,\ + l->partner.tlv_type_info,\ + l->partner.info_length,\ + l->partner.port_params.system_priority,\ + p_address,\ + l->partner.port_params.key,\ + l->partner.port_params.port_priority,\ + l->partner.port_params.port_number,\ + p_state,\ + l->tlv_type_collector_info,\ + l->collector_info_length,\ + l->collector_max_delay,\ + l->tlv_type_terminator,\ + l->terminator_length); + +} +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu) + +#else +#define BOND_PRINT_LACP(lacpdu) do { } while (0) +#define MODE4_DEBUG(fmt, ...) do { } while (0) +#endif + +static const struct ether_addr lacp_mac_addr = { + .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 } +}; + +static void +timer_cancel(uint64_t *timer) +{ + *timer = 0; +} + +static void +timer_set(uint64_t *timer, uint64_t timeout_ms) +{ + *timer = rte_rdtsc() + timeout_ms * rte_get_tsc_hz() / 1000; +} + +/* Forces given timer to be in expired state. */ +static void +timer_force_expired(uint64_t *timer) +{ + *timer = rte_rdtsc(); +} + +static bool +timer_is_stopped(uint64_t *timer) +{ + return *timer == 0; +} + +static bool +timer_is_expired(uint64_t *timer) +{ + return *timer <= rte_rdtsc(); +} + +/* Timer is in running state if it is not stopped nor expired */ +static bool +timer_is_running(uint64_t *timer) +{ + return !timer_is_stopped(timer) && !timer_is_expired(timer); +} + +static void +record_default(struct port *port) +{ + /* Record default parametes for partner. Partner admin parameters + * are not implemented so set them to arbitrary default (last known) and + * mark actor that parner is in defaulted state. */ + port->partner_state = STATE_LACP_ACTIVE; + ACTOR_STATE_SET(port, DEFAULTED); +} + +/** Function handles rx state machine. + * + * This function implements Receive State Machine from point 5.4.12 in + * 802.1AX documentation. It should be called periodically. + * + * @param lacpdu LACPDU received. + * @param port Port on which LACPDU was received. + */ +static void +rx_machine(struct bond_dev_private *internals, uint8_t port_num, + struct lacpdu *lacp) +{ + struct port *port = &internals->mode4.port_list[port_num]; + + if (SM_FLAG(port, BEGIN)) { + /* Initialize stuff */ + MODE4_DEBUG("-> INITIALIZE\n"); + SM_FLAG_CLR(port, MOVED); + port->selected = UNSELECTED; + + record_default(port); + + ACTOR_STATE_CLR(port, EXPIRED); + timer_cancel(&port->current_while_timer); + + /* DISABLED: On initialization partner is out of sync */ + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + + /* LACP DISABLED stuff if LACP not enabled on this port */ + if (!SM_FLAG(port, LACP_ENABLED)) + PARTNER_STATE_CLR(port, AGGREGATION); + } + + if (!SM_FLAG(port, LACP_ENABLED)) { + /* Update parameters only if state changed */ + if (!timer_is_stopped(&port->current_while_timer)) { + port->selected = UNSELECTED; + record_default(port); + PARTNER_STATE_CLR(port, AGGREGATION); + ACTOR_STATE_CLR(port, EXPIRED); + timer_cancel(&port->current_while_timer); + } + return; + } + + if (lacp) { + MODE4_DEBUG("LACP -> CURRENT\n"); + BOND_PRINT_LACP(lacp); + /* Update selected flag. If partner parameters are defaulted assume they + * are match. If not defaulted compare LACP actor with ports parner + * params. */ + if (!(port->actor_state & STATE_DEFAULTED) && + (((port->partner_state ^ lacp->actor.state) & STATE_AGGREGATION) || + memcmp(&port->partner, &lacp->actor.port_params, + sizeof(port->partner)) != 0)) { + MODE4_DEBUG("selected <- UNSELECTED\n"); + port->selected = UNSELECTED; + } + + /* Record this PDU actor params as partner params */ + memcpy(&port->partner, &lacp->actor.port_params, + sizeof(struct port_params)); + port->partner_state = lacp->actor.state; + + /* Partner parameters are not defaulted any more */ + ACTOR_STATE_CLR(port, DEFAULTED); + + /* Update NTT if partners information are outdated */ + uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT | + STATE_SYNCHRONIZATION | STATE_AGGREGATION; + + if (((port->actor_state ^ lacp->partner.state) & state_mask) || + memcmp(&port->actor, &lacp->partner.port_params, + sizeof(struct port_params)) != 0) { + port->sm_flags |= SM_FLAGS_NTT; + } + + /* If LACP partner params match this port actor params */ + if (memcmp(&port->actor, &lacp->partner.port_params, + sizeof(port->actor)) == 0 && + (port->partner_state & STATE_AGGREGATION) == (port->actor_state + & STATE_AGGREGATION)) + PARTNER_STATE_SET(port, SYNCHRONIZATION); + else if (!(port->partner_state & STATE_AGGREGATION) && + (port->actor_state & STATE_AGGREGATION)) + PARTNER_STATE_SET(port, SYNCHRONIZATION); + else + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + + if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT)) + timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS); + else + timer_set(&port->current_while_timer, BOND_8023AD_LONG_TIMEOUT_MS); + + ACTOR_STATE_CLR(port, EXPIRED); + return; /* No state change */ + } + + /* If CURRENT state timer is not running (stopped or expired) + * transit to EXPIRED state from DISABLED or CURRENT */ + if (!timer_is_running(&port->current_while_timer)) { + ACTOR_STATE_SET(port, EXPIRED); + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT); + timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS); + } +} + +/** + * Function handles periodic tx state machine. + * + * Function implements Periodic Transmission state machine from point 5.4.13 + * in 802.1AX documentation. It should be called periodically. + * + * @param port Port to handle state machine. + */ +static void +periodic_machine(struct bond_dev_private *internals, uint8_t port_num) +{ + struct port *port = &internals->mode4.port_list[port_num]; + /* Calculate if either site is LACP enabled */ + uint32_t timeout; + uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) || + PARTNER_STATE(port, LACP_ACTIVE); + + uint8_t is_partner_fast, was_partner_fast; + /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */ + if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || + active == 0) { + timer_cancel(&port->periodic_timer); + timer_force_expired(&port->tx_machine_timer); + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + + MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n", + SM_FLAG(port, BEGIN) ? "begind " : "", + SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ", + active ? "LACP active " : "LACP pasive "); + return; + } + + is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT); + was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT); + + /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW. + * Other case: check if timer expire or partners settings changed. */ + if (!timer_is_stopped(&port->periodic_timer)) { + if (timer_is_expired(&port->periodic_timer)) { + SM_FLAG_SET(port, NTT); + } else if (is_partner_fast != was_partner_fast) { + /* Partners timeout was slow and now it is fast -> send LACP. + * In other case (was fast and now it is slow) just switch + * timeout to slow without forcing send of LACP (because standard + * say so)*/ + if (!is_partner_fast) + SM_FLAG_SET(port, NTT); + } else + return; /* Nothing changed */ + } + + /* Handle state transition to FAST/SLOW LACP timeout */ + if (is_partner_fast) { + timeout = BOND_8023AD_FAST_PERIODIC_MS; + SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT); + } else { + timeout = BOND_8023AD_SLOW_PERIODIC_MS; + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + } + + timer_set(&port->periodic_timer, timeout); +} + +/** + * Function handles mux state machine. + * + * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation. + * It should be called periodically. + * + * @param port Port to handle state machine. + */ +static int +mux_machine(struct bond_dev_private *internals, uint8_t port_num) +{ + bool ntt = false; + struct port *port = &internals->mode4.port_list[port_num]; + + /* Save current state for later use */ + const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | + STATE_COLLECTING; + + /* Enter DETACHED state on BEGIN condition or from any other state if + * port was unselected */ + if (SM_FLAG(port, BEGIN) || + port->selected == UNSELECTED || (port->selected == STANDBY && + (port->actor_state & state_mask) != 0)) { + /* detach mux from aggregator not used */ + port->actor_state &= ~state_mask; + /* Set ntt to true if BEGIN condition or transition from any other state + * which is indicated that wait_while_timer was started */ + if (SM_FLAG(port, BEGIN) || + !timer_is_stopped(&port->wait_while_timer)) { + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("-> DETACHED\n"); + } + timer_cancel(&port->wait_while_timer); + } + + if (timer_is_stopped(&port->wait_while_timer)) { + if (port->selected == SELECTED || port->selected == STANDBY) { + timer_set(&port->wait_while_timer, + BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS); + + MODE4_DEBUG("DETACHED -> WAITING\n"); + } + /* Waiting state entered */ + return 0; + } + + /* Transit next state if port is ready */ + if (!timer_is_expired(&port->wait_while_timer)) + return 0; + + if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) && + !PARTNER_STATE(port, SYNCHRONIZATION)) { + /* If in COLLECTING or DISTRIBUTING state and partner becomes out of + * sync transit to ATACHED state. */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); + /* Clear actor sync to activate transit ATACHED in condition bellow */ + ACTOR_STATE_CLR(port, SYNCHRONIZATION); + MODE4_DEBUG("Out of sync -> ATTACHED\n"); + } else if (!ACTOR_STATE(port, SYNCHRONIZATION)) { + /* attach mux to aggregator */ + RTE_VERIFY((port->actor_state & (STATE_COLLECTING | + STATE_DISTRIBUTING)) == 0); + ACTOR_STATE_SET(port, SYNCHRONIZATION); + ntt = true; + MODE4_DEBUG("ATTACHED Entered\n"); + } else if (!ACTOR_STATE(port, COLLECTING)) { + /* Start collecting if in sync */ + if (PARTNER_STATE(port, SYNCHRONIZATION)) { + MODE4_DEBUG("ATTACHED -> COLLECTING\n"); + ACTOR_STATE_SET(port, COLLECTING); + } + } else if (ACTOR_STATE(port, COLLECTING)) { + /* Check if partner is in COLLECTING state. If so this port can + * distribute frames to it */ + if (!ACTOR_STATE(port, DISTRIBUTING)) { + if (PARTNER_STATE(port, COLLECTING)) { + /* Enable DISTRIBUTING if partner is collecting */ + ACTOR_STATE_SET(port, DISTRIBUTING); + ntt = true; + MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n"); + } + } else { + if (!PARTNER_STATE(port, COLLECTING)) { + /* Disable DISTRIBUTING (enter COLLECTING state) if partner + * is not collecting */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + ntt = true; + MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n"); + } + } + } + + if (ntt != false) + SM_FLAG_SET(port, NTT); + + return ntt; +} + +/** + * Function handles transmit state machine. + * + * Function implements Transmit Machine from point 5.4.16 in 802.1AX + * documentation. + * + * @param port + */ +static void +tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct port *port = &internals->mode4.port_list[port_num]; + struct mode8023ad_data *data = &internals->mode4; + + struct slow_protocol_msg *msg = NULL; + struct lacpdu_header *hdr; + struct lacpdu *lacpdu; + + /* If periodic timer is not running periodic machine is in NO PERIODIC and + * acording to 802.3ax standard tx machine should not transmit any frames + * and set ntt to false. */ + if (timer_is_stopped(&port->periodic_timer)) + SM_FLAG_CLR(port, NTT); + + if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer)) + return; + + /* If all conditions are met construct packet to send */ + if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) { + MODE4_DEBUG("tx_machine: no free_lacpdu_ring\n"); + return; + } + + msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool); + if (msg->pkt == NULL) { + rte_ring_enqueue(data->free_ring, msg); + RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n"); + return; + } + + msg->port_id = internals->active_slaves[port_num]; + hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *); + + msg->pkt->data_len = sizeof(*hdr); + msg->pkt->pkt_len = sizeof(*hdr); + /* Source and destination MAC */ + ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr); + ether_addr_copy(&port->actor.system, &hdr->eth_hdr.s_addr); + hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW); + + lacpdu = &hdr->lacpdu; + memset(lacpdu, 0, sizeof(*lacpdu)); + + /* Initialize LACP part */ + lacpdu->subtype = SUBTYPE_LACP; + lacpdu->version_number = 1; + + /* ACTOR */ + lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION; + lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params); + memcpy(&hdr->lacpdu.actor.port_params, &port->actor, + sizeof(port->actor)); + lacpdu->actor.state = port->actor_state; + + /* PARTNER */ + lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION; + lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params); + memcpy(&lacpdu->partner.port_params, &port->partner, + sizeof(struct port_params)); + lacpdu->partner.state = port->partner_state; + + /* Other fields */ + lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION; + lacpdu->collector_info_length = 0x10; + lacpdu->collector_max_delay = 0; + + lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION; + lacpdu->terminator_length = 0; + + if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) { + /* If TX ring full, drop packet and free message. Retransmission + * will happen in next function call. */ + rte_pktmbuf_free(msg->pkt); + rte_ring_enqueue(data->free_ring, msg); + + RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring.\n" + "Receive and transmit functions must be invoked on bonded interface" + " at least 10 times per second or LACP will not work correctly\n"); + return; + } + + MODE4_DEBUG("sending LACP frame\n"); + BOND_PRINT_LACP(lacpdu); + + SM_FLAG_CLR(port, NTT); + /* Add 10% random backoff time to better distribute slow packets + * between tx bursts. */ + timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS + + rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100)); +} + +/** + * Function assigns port to aggregator. + * + * @param bond_dev_private Pointer to bond_dev_private structure. + * @param port_pos Port to assign. + */ +static void +selection_logic(struct bond_dev_private *internals, uint8_t port_num) +{ + struct mode8023ad_data *data = &internals->mode4; + struct port *agg, *port, *port_list; + uint8_t ports_count; + uint8_t i; + + ports_count = internals->slave_count; + port_list = data->port_list; + port = &port_list[port_num]; + + /* Skip port if it is selected */ + if (port->selected == SELECTED) + return; + + /* Search for aggregator suitable for this port */ + for (i = 0; i < ports_count; ++i) { + agg = &port_list[i]; + /* Skip ports that are not aggreagators */ + if (agg->agregator_idx != i && i == port_num) + continue; + + /* Actors system ID is not checked since all slave device have the same + * ID (MAC address). */ + if ((agg->actor.key == port->actor.key && + agg->partner.system_priority == port->partner.system_priority && + is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1 + && (agg->partner.key == port->partner.key)) && + is_zero_ether_addr(&port->partner.system) != 1 && + (agg->actor.key & + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) { + + port->agregator_idx = i; + break; + } + } + + /* By default, port uses it self as agregator */ + if (i == ports_count) + port->agregator_idx = port_num; + + port->selected = SELECTED; + + MODE4_DEBUG("-> SELECTED: ID=%3u pos=%3u\n" + "\t%s ID=%3u pos=%3u\n", + internals->active_slaves[port_num], port_num, + port->agregator_idx == port_num ? + "agregator not found, using default" : "agregator found", + port->agregator_idx, + internals->active_slaves[port->agregator_idx]); +} + +/** + * Helper function which updates current port + */ +static void +update_mux_slaves(struct bond_dev_private *internals) +{ + struct mode8023ad_data *data = &internals->mode4; + struct port *port; + uint8_t current[RTE_MAX_ETHPORTS]; + uint8_t count = 0; + uint8_t i; + + for (i = 0; i < internals->slave_count; i++) { + port = &data->port_list[i]; + if (ACTOR_STATE(port, DISTRIBUTING)) + current[count++] = i; + } + + memcpy(data->distibuting_slaves_offsets, current, + sizeof(current[0]) * count); + data->distibuting_slaves_count = count; +} + +/* Function maps DPDK speed to bonding speed stored in key field */ +static uint16_t +link_speed_key(uint16_t speed) { + uint16_t key_speed; + + switch (speed) { + case ETH_LINK_SPEED_AUTONEG: + key_speed = 0x00; + break; + case ETH_LINK_SPEED_10: + key_speed = BOND_LINK_SPEED_KEY_10M; + break; + case ETH_LINK_SPEED_100: + key_speed = BOND_LINK_SPEED_KEY_100M; + break; + case ETH_LINK_SPEED_1000: + key_speed = BOND_LINK_SPEED_KEY_1000M; + break; + case ETH_LINK_SPEED_10G: + key_speed = BOND_LINK_SPEED_KEY_10G; + break; + case ETH_LINK_SPEED_20G: + key_speed = BOND_LINK_SPEED_KEY_20G; + break; + case ETH_LINK_SPEED_40G: + key_speed = BOND_LINK_SPEED_KEY_40G; + break; + default: + /* Unknown speed*/ + key_speed = 0xFFFF; + } + + return key_speed; +} + +static void +bond_mode_8023ad_periodic_cb(void *arg) +{ + struct rte_eth_dev *bond_dev = arg; + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_data *data = &internals->mode4; + + struct port *port; + struct slow_protocol_frame *slow_hdr; + struct rte_eth_link link_info; + struct ether_addr slave_addr; + + struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE]; + uint16_t port_num, j, nb_msgs; + /* if not 0 collecting/distibuting array need update */ + uint16_t slaves_changed = 0; + bool machines_invoked; + + /* Update link status on each port */ + for (port_num = 0; port_num < internals->active_slave_count; port_num++) { + uint16_t key; + + rte_eth_link_get(internals->active_slaves[port_num], &link_info); + rte_eth_macaddr_get(internals->active_slaves[port_num], &slave_addr); + + if (link_info.link_status != 0) { + key = link_speed_key(link_info.link_speed) << 1; + if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX) + key |= BOND_LINK_FULL_DUPLEX_KEY; + } else + key = 0; + + port = &data->port_list[port_num]; + key = rte_cpu_to_be_16(key); + + if (key != port->actor.key) { + port->actor.key = key; + SM_FLAG_SET(port, NTT); + } + + if (!is_same_ether_addr(&port->actor.system, &slave_addr)) { + SM_FLAG_SET(port, NTT); + ether_addr_copy(&slave_addr, &port->actor.system); + } + } + + nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) msgs, + BOND_MODE_8023AX_RX_RING_SIZE); + + for (port_num = 0; port_num < internals->active_slave_count; port_num++) { + port = &data->port_list[port_num]; + if ((port->actor.key & + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) { + + SM_FLAG_SET(port, BEGIN); + + /* LACP is disabled on half duples or link is down */ + if (SM_FLAG(port, LACP_ENABLED)) { + /* If port was enabled set it to BEGIN state */ + SM_FLAG_CLR(port, LACP_ENABLED); + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); + slaves_changed++; + } + + MODE4_DEBUG("Port %u is not LACP capable!\n", + internals->active_slaves[port_num]); + /* Skip this port processing */ + continue; + } + + SM_FLAG_SET(port, LACP_ENABLED); + machines_invoked = false; + /* Find LACP packet */ + for (j = 0; j < nb_msgs; j++) { + if (msgs[j] == NULL || msgs[j]->port_id != + internals->active_slaves[port_num]) + continue; + + slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt, + struct slow_protocol_frame *); + + if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP) { + /* This is LACP frame so pass it to rx_machine */ + struct lacpdu *lacp = (struct lacpdu *)&slow_hdr->slow_protocol; + /* Invoke state machines on every active slave port */ + rx_machine(internals, port_num, lacp); + periodic_machine(internals, port_num); + slaves_changed += mux_machine(internals, port_num); + tx_machine(bond_dev, port_num); + selection_logic(internals, port_num); + + machines_invoked = true; + } else if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_MARKER) { + struct marker *marker; + + marker = (struct marker *) &slow_hdr->slow_protocol; + if (marker->tlv_type_marker == MARKER_TLV_TYPE_MARKER_INFO) { + /* Reuse received packet to send frame to Marker Responder + */ + marker->tlv_type_marker = MARKER_TLV_TYPE_MARKER_RESP; + + /* Update source MAC, destination MAC is multicast so we + * don't update it */ + mac_address_get(bond_dev, &slow_hdr->eth_hdr.s_addr); + + if (rte_ring_enqueue(data->tx_ring, msgs[j]) == -ENOBUFS) { + RTE_LOG(ERR, PMD, + "Failed to enqueue packet into tx ring"); + rte_pktmbuf_free(msgs[j]->pkt); + rte_ring_enqueue(data->free_ring, msgs[j]); + } + + msgs[j] = NULL; + } + } + } + + if (machines_invoked == false) { + rx_machine(internals, port_num, NULL); + periodic_machine(internals, port_num); + slaves_changed += mux_machine(internals, port_num); + tx_machine(bond_dev, port_num); + selection_logic(internals, port_num); + machines_invoked = true; + } + + SM_FLAG_CLR(port, BEGIN); + } + + /* Update mux if something changed */ + if (slaves_changed > 0) { + update_mux_slaves(internals); + MODE4_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n", + data->distibuting_slaves_count, + data->distibuting_slaves_offsets[0], + data->distibuting_slaves_count > 0 ? " " : "\b\b", + data->distibuting_slaves_offsets[1], + data->distibuting_slaves_count > 1 ? " " : "\b\b", + data->distibuting_slaves_offsets[2], + data->distibuting_slaves_count > 2 ? " " : "\b\b", + data->distibuting_slaves_offsets[3], + data->distibuting_slaves_count > 3 ? " " : "\b\b", + data->distibuting_slaves_count > 4 ? "..." : ""); + } + + /* Free packets that was not reused */ + for (port_num = 0; port_num < nb_msgs; port_num++) { + if (msgs[port_num] != NULL) { + rte_pktmbuf_free(msgs[port_num]->pkt); + rte_ring_enqueue(data->free_ring, msgs[port_num]); + } + } + + rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, + bond_mode_8023ad_periodic_cb, arg); +} + +static void +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_idx) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_data *data = &internals->mode4; + + struct port *port = &data->port_list[internals->active_slave_count]; + struct port_params initial = { + .system = { { 0 } }, + .system_priority = rte_cpu_to_be_16(0xFFFF), + .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY), + .port_priority = rte_cpu_to_be_16(0x00FF), + .port_number = 0, + }; + + uint8_t slave_id = internals->active_slaves[slave_idx]; + + memcpy(&port->actor, &initial, sizeof(struct port_params)); + port->actor.port_number = slave_id_to_port_number(slave_id); + + memcpy(&port->partner, &initial, sizeof(struct port_params)); + + /* default states */ + port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED; + port->partner_state = STATE_LACP_ACTIVE; + port->sm_flags = SM_FLAGS_BEGIN; + + /* use this port as agregator */ + port->agregator_idx = slave_idx; + + rte_eth_promiscuous_enable(slave_id); +} + +void +bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + + bond_mode_8023ad_activate_slave(bond_dev, internals->active_slave_count); +} + +int +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev, + uint8_t slave_pos) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_data *data = &internals->mode4; + struct port *port; + uint8_t i; + + bond_mode_8023ad_stop(bond_dev); + + /* Exclude slave from transmit policy. If this slave is an aggregator + * make all aggregated slaves unselected to force sellection logic + * to select suitable aggregator for this port */ + for (i = 0; i < internals->active_slave_count; i++) { + port = &data->port_list[slave_pos]; + if (port->agregator_idx == slave_pos) { + port->selected = UNSELECTED; + port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | + STATE_COLLECTING); + + /* Use default aggregator */ + port->agregator_idx = i; + } + } + + port = &data->port_list[slave_pos]; + + update_mux_slaves(internals); + + /* Remove slave port config */ + if (slave_pos + 1 < internals->active_slave_count) { + memmove(&data->port_list[slave_pos], + &data->port_list[slave_pos + 1], + sizeof(data->port_list[0]) * (internals->active_slave_count - + slave_pos - 1)); + } + + if (bond_dev->data->dev_started) + return bond_mode_8023ad_start(bond_dev); + + return 0; +} + +int +bond_mode_8023ad_init(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_data *data = &internals->mode4; + char mem_name[RTE_ETH_NAME_MAX_LEN]; + int socket_id = bond_dev->pci_dev->numa_node; + uint8_t i; + + if (data->mbuf_pool == NULL) { + const uint16_t element_size = sizeof(struct slow_protocol_frame) + + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM; + + snprintf(mem_name, sizeof(mem_name), "%s_POOL", bond_dev->data->name); + data->mbuf_pool = rte_mempool_create(mem_name, + /* FIXME: How big memory pool should be? If driver will not + * free packets quick enough there will be ENOMEM in tx_machine. + * For now give 512 packets per slave. Hope it will be enough. */ + (BOND_MODE_8023AX_TX_RING_SIZE + 1) * 512 * RTE_MAX_ETHPORTS, + element_size, + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, + sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, + NULL, rte_pktmbuf_init, NULL, socket_id, 0); + + /* Any memory allocation failure in initalization is critical because + * resources can't be free, so reinitialization is impossible. */ + if (data->mbuf_pool == NULL) { + RTE_LOG(ERR, PMD, "%s: Failed to initialize LACP rx ring\n", + bond_dev->data->name); + + rte_panic("Failed to alocate memory pool ('%s')\n" + "for bond device '%s'\n", mem_name, bond_dev->data->name); + } + + /* Setup ring for free messages that can be used in RX/TX burst */ + snprintf(mem_name, sizeof(mem_name), "%s_free", bond_dev->data->name); + + uint16_t free_cnt = BOND_MODE_8023AX_RX_RING_SIZE + + BOND_MODE_8023AX_TX_RING_SIZE; + + data->free_ring = rte_ring_create(mem_name, free_cnt, socket_id, 0); + + if (data->free_ring == NULL) { + rte_panic("%s: Failed to create slow messages free ring\n", + bond_dev->data->name); + } + + for (i = 0; i < free_cnt; i++) { + struct slow_protocol_msg *msg; + + snprintf(mem_name, sizeof(mem_name), "%s_slow_msg_%u", + bond_dev->data->name, i); + + msg = (struct slow_protocol_msg *) rte_malloc_socket(mem_name, + sizeof(struct slow_protocol_msg), 0, socket_id); + + if (msg == NULL) { + rte_panic("%s: Failed to allocate slow message\n", + bond_dev->data->name); + } + + rte_ring_enqueue(data->free_ring, msg); + } + + /* Setup rings for usage in rx/tx bursts and machines state + * call back */ + snprintf(mem_name, sizeof(mem_name), "%s_rx", bond_dev->data->name); + data->rx_ring = rte_ring_create(mem_name, + BOND_MODE_8023AX_RX_RING_SIZE, socket_id, 0); + + if (data->rx_ring == NULL) { + rte_panic("%s: Failed to create slow messages rx ring\n", + bond_dev->data->name); + } + + snprintf(mem_name, sizeof(mem_name), "%s_tx", bond_dev->data->name); + data->tx_ring = rte_ring_create(mem_name, BOND_MODE_8023AX_TX_RING_SIZE, + socket_id, RING_F_SP_ENQ); + + if (data->tx_ring == NULL) { + rte_panic("%s: Failed to create slow messages tx ring\n", + bond_dev->data->name); + } + } + + data->distibuting_slaves_count = 0; + + for (i = 0; i < internals->active_slave_count; i++) + bond_mode_8023ad_activate_slave(bond_dev, i); + + return 0; +} + +int +bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) +{ + return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, + &bond_mode_8023ad_periodic_cb, bond_dev); +} + +int +bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) +{ + if (rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev)) + return 0; + + return -ENOENT; +} + +void +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, + uint8_t slave_pos, struct rte_mbuf *slot_pkt) +{ + struct mode8023ad_data *data; + struct slow_protocol_msg *msg = NULL; + int retval; + + data = &internals->mode4; + + if (unlikely(rte_ring_dequeue(data->free_ring, (void **)&msg) == + -ENOBUFS)) { + rte_pktmbuf_free(slot_pkt); + return; + } + + msg->pkt = slot_pkt; + msg->port_id = internals->active_slaves[slave_pos]; + + retval = rte_ring_enqueue(data->rx_ring, msg); + if (unlikely(retval == -ENOBUFS)) { + /* If RX fing full free lacpdu message and drop packet */ + rte_ring_enqueue(data->free_ring, msg); + rte_pktmbuf_free(slot_pkt); + } +} diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h new file mode 100644 index 0000000..df250bb --- /dev/null +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h @@ -0,0 +1,405 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_ETH_BOND_8023AD_H_ +#define RTE_ETH_BOND_8023AD_H_ + +#include <stdint.h> + +#include <rte_ether.h> +#include <rte_byteorder.h> +#include <rte_spinlock.h> + +typedef int bool; + +#define true 1 +#define false 0 + +/** + * Timeouts deffinitions (5.4.4 in 802.1AX documentation). + */ +#define BOND_8023AD_FAST_PERIODIC_MS 1000 +#define BOND_8023AD_SLOW_PERIODIC_MS 30000 +#define BOND_8023AD_SHORT_TIMEOUT_MS 3000 +#define BOND_8023AD_LONG_TIMEOUT_MS 90000 +#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000 +#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000 +#define BOND_8023AD_TX_PERIOD_MS 333 +/** + * Actor/partner states + */ +#define STATE_LACP_ACTIVE 0x01 +#define STATE_LACP_SHORT_TIMEOUT 0x02 +#define STATE_AGGREGATION 0x04 +#define STATE_SYNCHRONIZATION 0x08 +#define STATE_COLLECTING 0x10 +#define STATE_DISTRIBUTING 0x20 +/** Partners parameters are defaulted */ +#define STATE_DEFAULTED 0x40 +#define STATE_EXPIRED 0x80 + +/** + * State machine flags + */ +#define SM_FLAGS_BEGIN 0x0001 +#define SM_FLAGS_LACP_ENABLED 0x0002 +#define SM_FLAGS_ACTOR_CHURN 0x0004 +#define SM_FLAGS_PARTNER_CHURN 0x0008 +#define SM_FLAGS_MOVED 0x0100 +#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200 +#define SM_FLAGS_NTT 0x0400 + +#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100 +#define BOND_MODE_8023AX_RX_RING_SIZE (2 * RTE_MAX_ETHPORTS) +#define BOND_MODE_8023AX_TX_RING_SIZE (2 * RTE_MAX_ETHPORTS) + +#define BOND_LINK_FULL_DUPLEX_KEY 0x01 +#define BOND_LINK_SPEED_KEY_10M 0x02 +#define BOND_LINK_SPEED_KEY_100M 0x04 +#define BOND_LINK_SPEED_KEY_1000M 0x08 +#define BOND_LINK_SPEED_KEY_10G 0x10 +#define BOND_LINK_SPEED_KEY_20G 0x11 +#define BOND_LINK_SPEED_KEY_40G 0x12 + +#define SUBTYPE_LACP 0x01 + +#define TLV_TYPE_ACTOR_INFORMATION 0x01 +#define TLV_TYPE_PARTNER_INFORMATION 0x02 +#define TLV_TYPE_COLLECTOR_INFORMATION 0x03 +#define TLV_TYPE_TERMINATOR_INFORMATION 0x00 + +#define CHECK_FLAGS(_variable, _flags) ((_variable) & (_flags)) +#define SET_FLAGS(_variable, _flags) ((_variable) |= (_flags)) +#define CLEAR_FLAGS(_variable, _flags) ((_variable) &= ~(_flags)) + +#define SM_FLAG(port, flag) (!!CHECK_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)) +#define SM_FLAG_SET(port, flag) SET_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag) +#define SM_FLAG_CLR(port, flag) CLEAR_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag) + +#define ACTOR_STATE(port, flag) (!!CHECK_FLAGS((port)->actor_state, STATE_ ## flag)) +#define ACTOR_STATE_SET(port, flag) SET_FLAGS((port)->actor_state, STATE_ ## flag) +#define ACTOR_STATE_CLR(port, flag) CLEAR_FLAGS((port)->actor_state, STATE_ ## flag) + +#define PARTNER_STATE(port, flag) (!!CHECK_FLAGS((port)->partner_state, STATE_ ## flag)) +#define PARTNER_STATE_SET(port, flag) SET_FLAGS((port)->partner_state, STATE_ ## flag) +#define PARTNER_STATE_CLR(port, flag) CLEAR_FLAGS((port)->partner_state, STATE_ ## flag) + +/** Slow protocol LACP frame subtype */ +#define SLOW_SUBTYPE_LACP 0x01 + +/** Slow procotol marker frame subtype */ +#define SLOW_SUBTYPE_MARKER 0x02 + +/** Marker type info request */ +#define MARKER_TLV_TYPE_MARKER_INFO 0x01 + +/** Marker type info response */ +#define MARKER_TLV_TYPE_MARKER_RESP 0x02 + +/** Generic slow protocol structure */ +struct slow_protocol { + uint8_t subtype; + uint8_t reserved_119[119]; +} __attribute__((__packed__)); + +/** Generic slow protocol frame type structure */ +struct slow_protocol_frame { + struct ether_hdr eth_hdr; + struct slow_protocol slow_protocol; +} __attribute__((__packed__)); + +struct port_params { + uint16_t system_priority; + /**< System priority (unused in current implementation) */ + struct ether_addr system; + /**< System ID - Slave MAC address, same as bonding MAC address */ + uint16_t key; + /**< Speed information (implementation dependednt) and duplex. */ + uint16_t port_priority; + /**< Priority of this (unused in current implementation) */ + uint16_t port_number; + /**< Port number. It corresponds to slave port id. */ +} __attribute__((__packed__)); + +struct lacpdu_actor_partner_params { + uint8_t tlv_type_info; + uint8_t info_length; + struct port_params port_params; + uint8_t state; + uint8_t reserved_3[3]; +} __attribute__((__packed__)); + +/** LACPDU structure (5.4.2 in 802.1AX documentation). */ +struct lacpdu { + uint8_t subtype; + uint8_t version_number; + + struct lacpdu_actor_partner_params actor; + struct lacpdu_actor_partner_params partner; + + uint8_t tlv_type_collector_info; + uint8_t collector_info_length; + uint16_t collector_max_delay; + uint8_t reserved_12[12]; + + uint8_t tlv_type_terminator; + uint8_t terminator_length; + uint8_t reserved_50[50]; +} __attribute__((__packed__)); + +/** LACPDU frame: Contains ethernet header and LACPDU. */ +struct lacpdu_header { + struct ether_hdr eth_hdr; + struct lacpdu lacpdu; +} __attribute__((__packed__)); + +struct marker { + uint8_t subtype; + uint8_t version_number; + + uint8_t tlv_type_marker; + uint8_t info_length; + uint16_t requester_port; + struct ether_addr requester_system; + uint32_t requester_transaction_id; + uint8_t reserved_2[2]; + + uint8_t tlv_type_terminator; + uint8_t terminator_length; + uint8_t reserved_90[90]; +} __attribute__((__packed__)); + +struct marker_header { + struct ether_hdr eth_hdr; + struct marker marker; +} __attribute__((__packed__)); + +/** Variables associated with the system (5.4.5 in 802.1AX documentation). */ +struct system { + struct ether_addr actor_system; + /**< The MAC address component of the System Identifier of the System */ + uint16_t actor_system_priority; + /**< The System Priority of the System */ +}; + +enum selection { + UNSELECTED, + STANDBY, + SELECTED +}; + +/** Variables associated with each port (5.4.7 in 802.1AX documentation). */ +struct port { + /** + * The operational values of the Actor's state parameters. Bitmask + * of port states. + */ + uint8_t actor_state; + + /** The operational Actor's port parameters */ + struct port_params actor; + + /** + * The operational value of the Actor's view of the current values of + * the Partner's state parameters. The Actor sets this variable either + * to the value received from the Partner in an LACPDU, or to the value + * of Partner_Admin_Port_State. Bitmask of port states. + */ + uint8_t partner_state; + + /** The operational Partner's port parameters */ + struct port_params partner; + + /* Additional port parameters not listed in documentation */ + /** State machine flags */ + uint16_t sm_flags; + enum selection selected; + + uint64_t current_while_timer; + uint64_t periodic_timer; + uint64_t wait_while_timer; + uint64_t tx_machine_timer; + /* Agregator parameters */ + /** + * Index in mode8023ad_data::port_list[] of Aggregator + * the port is currently attached to. + */ + uint16_t agregator_idx; +}; + + +/** + * Struct used to comunicate with 8023ad logic. + */ +struct slow_protocol_msg { + struct rte_mbuf *pkt; + uint8_t port_id; +}; + +/** Data specific to mode 802.1AX */ +struct mode8023ad_data { + /** Memory pool used to allocated rings */ + struct rte_mempool *mbuf_pool; + + /** Ring containing free slow_protocol_msg objects. Used to avoid + * alocating/freeing memory in RX/TX bursts */ + struct rte_ring *free_ring; + + /** Ring of struct slow_protocol_msg from RX burst function */ + struct rte_ring *rx_ring; + + /** Ring of struct slow_protocol_msg to RX burst function */ + struct rte_ring *tx_ring; + + /** list of all enslaved ports in mode 802.1AX */ + struct port port_list[RTE_MAX_ETHPORTS]; + + /** List of offsets in active slaves array used to tansmit packets. */ + uint8_t distibuting_slaves_offsets[RTE_MAX_ETHPORTS]; + uint8_t distibuting_slaves_count; +}; + +/* Forward declaration */ +struct bond_dev_private; + +/** + * Configures 802.1AX mode and all active slaves on bonded interface. + * + * @param dev Bonded interface + * @return + * 0 on success, negative value otherwise. + */ +int +bond_mode_8023ad_init(struct rte_eth_dev *dev); + +/** + * Deconfigures 802.1AX mode of the bonded interface and slaves. + * + * @param dev Bonded interface + * @return + * 0 on success, negative value otherwise. + */ +int bond_mode_8023ad_disable(struct rte_eth_dev *dev); + +/** + * Starts 802.3AX state machines management logic. + * @param dev Bonded interface + * @return + * 0 if machines was started, 1 if machines was already running, + * negative value otherwise. + */ +int +bond_mode_8023ad_start(struct rte_eth_dev *dev); + +/** + * Stops 802.3AX state machines management logic. + * @param dev Bonded interface + * @return + * 0 if this call stopped state machines, -ENOENT if alarm was not set. + */ +int +bond_mode_8023ad_stop(struct rte_eth_dev *dev); + +/** + * Passes given slow packet to state machines management logic. + * @param internals Bonded device private data. + * @param slave_pos Possition in active slaves array on which this packet was received. + * @param slot_pkt Slow packet + */ +void +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, + uint8_t slave_pos, struct rte_mbuf *slot_pkt); + +/** + * Appends and initializes slave active_slaves[slave_num] to use with + * 802.1AX mode. + * + * @pre active_slaves[active_slave_count] must contain valid slave id. + * @post active_slave_count must be incremented. + * + * @param dev Bonded interface. + * + * @return + * 0 on success, negative value otherwise. + */ +void +bond_mode_8023ad_slave_append(struct rte_eth_dev *dev); + +/** + * Denitializes and removes given slave from 802.1AX mode. + * + * @pre active_slaves[slave_num] must contain valid slave id corresponding to + * slave initialized in 802.1AX mode. + * @post active_slaves[slave_num] must be removed. + * + * @param dev Bonded interface. + * @param slave_num Position of slave in active_slaves array + * + * @return + * 0 on success, negative value otherwise. + * + */ +int +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos); + +/** + * Converts port_number from network byte order to port id. + * + * @param port_number The 8023ad port number to convert. + * @return corresponding slave id + */ +static inline uint8_t +port_number_to_slave_id(uint16_t port_number) +{ + uint16_t port_id = rte_be_to_cpu_16(port_number); + /* Standard requires that port number must be grater than 0. + * Substract 1 to get corresponding slave id */ + return port_id - 1; +} + +/** + * Converts port id to mode 8023ad port number. + * + * @param slave_id Id of slave to convert. + * @return corresponding Port number in network byte order. + */ +static inline uint16_t +slave_id_to_port_number(uint8_t slave_id) +{ + /* Standard requires that port ID must be grater than 0. + * Add 1 do get corresponding port_number */ + uint16_t port_number = (uint16_t)slave_id + 1; + return rte_cpu_to_be_16(port_number); +} + +#endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c index c690ceb..c547164 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_api.c +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c @@ -31,6 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <string.h> + #include <rte_mbuf.h> #include <rte_malloc.h> #include <rte_ethdev.h> @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id) return 0; } +void +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + internals->active_slaves[active_count] = port_id; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_slave_append(eth_dev); + + internals->active_slave_count = active_count + 1; +} + +void +deactivate_slave(struct rte_eth_dev *eth_dev, + uint8_t slave_pos) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos); + + active_count--; + + /* If slave was not at the end of the list + * shift active slaves up active array list */ + if (slave_pos < active_count) { + memmove(internals->active_slaves + slave_pos, + internals->active_slaves + slave_pos + 1, + (active_count - slave_pos) * + sizeof(internals->active_slaves[0])); + } + + internals->active_slave_count = active_count; +} + uint8_t number_of_sockets(void) { @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) eth_dev->dev_ops = &default_dev_ops; eth_dev->pci_dev = pci_dev; - if (bond_ethdev_mode_set(eth_dev, mode)) { - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", - eth_dev->data->port_id, mode); - goto err; - } - + internals->port_id = eth_dev->data->port_id; + internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = 0; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; internals->user_defined_mac = 0; @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); memset(internals->slaves, 0, sizeof(internals->slaves)); + if (bond_ethdev_mode_set(eth_dev, mode)) { + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", + eth_dev->data->port_id, mode); + goto err; + } + return eth_dev->data->port_id; err: @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) rte_eth_link_get_nowait(slave_port_id, &link_props); if (link_props.link_status == 1) - internals->active_slaves[internals->active_slave_count++] = - slave_port_id; + activate_slave(bonded_eth_dev, slave_port_id); } return 0; } - int rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) { @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) return retval; } - static int __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) { + struct rte_eth_dev *bonded_eth_dev; struct bond_dev_private *internals; - int i, slave_idx = -1; + int i, slave_idx; if (valid_slave_port_id(slave_port_id) != 0) return -1; - internals = rte_eth_devices[bonded_port_id].data->dev_private; + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; /* first remove from active slave list */ - for (i = 0; i < internals->active_slave_count; i++) { - if (internals->active_slaves[i] == slave_port_id) - slave_idx = i; + slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count, + slave_port_id); - /* shift active slaves up active array list */ - if (slave_idx >= 0 && i < (internals->active_slave_count - 1)) - internals->active_slaves[i] = internals->active_slaves[i+1]; - } - - if (slave_idx >= 0) - internals->active_slave_count--; + if (slave_idx < internals->active_slave_count) + deactivate_slave(bonded_eth_dev, slave_idx); slave_idx = -1; /* now find in slave list */ @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id) return internals->current_primary_port; } + int rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len) { @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id) return internals->balance_xmit_policy; } - int rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms) { @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id) return internals->link_down_delay_ms; } - int rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c index bbbc69b..a0be0e6 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_args.c +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, case BONDING_MODE_ACTIVE_BACKUP: case BONDING_MODE_BALANCE: case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: return 0; default: RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c index 6d0fb1b..13630d9 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c @@ -44,6 +44,7 @@ #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad.h" static uint16_t bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue, bufs, nb_pkts); } +static uint16_t +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + struct bond_dev_private *internals = bd_rx_q->dev_private; + struct mode8023ad_data *mode4 = &internals->mode4; + struct ether_addr bond_mac; + + struct ether_hdr *hdr; + struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */ + + uint16_t num_rx_slave = 0; /* Number of packet received on current slave */ + uint16_t num_rx_total = 0; /* Total number of received packets */ + + uint8_t i, j; + + rte_eth_macaddr_get(internals->port_id, &bond_mac); + + for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) { + /* Read packets from this slave */ + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i], + bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total); + + /* Separate slow protocol packets from other packets */ + for (j = 0; j < num_rx_slave; j++) { + hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *); + + uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type); + if (unlikely(ether_type == ETHER_TYPE_SLOW)) { + bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]); + continue; + } + + /* Check if we can receive this packet. Also filter packets if + * bonding interface is not in promiscuous mode (slaves are always + * in promiscuous mode). */ + if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) && + likely(internals->promiscuous_en || + is_same_ether_addr(&bond_mac, &hdr->d_addr))) { + bufs[num_rx_total++] = pkts[j]; + } else + rte_pktmbuf_free(pkts[j]); + } + } + + return num_rx_total; +} + static inline uint16_t ether_hash(struct ether_hdr *eth_hdr) { @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, } static uint16_t +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct mode8023ad_data *mode4; + struct bond_tx_queue *bd_tx_q; + + uint8_t num_of_slaves; + uint8_t slaves[RTE_MAX_ETHPORTS]; + /* possitions in slaves, not ID */ + uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; + uint8_t distributing_slaves_count; + + uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0; + uint16_t i, op_slave_idx; + + /* Slow packets from 802.3AX state machines. */ + struct slow_protocol_msg *slow_msg; + + /* Allocate one additional packet in case 8023AD mode. + * First element if not NULL is slow packet. */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1]; + /* Total amount of packets in slave_bufs */ + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + /* Array of slow packets placed in each slave */ + uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 }; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + mode4 = &internals->mode4; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); + + distributing_slaves_count = mode4->distibuting_slaves_count; + memcpy(distributing_offsets, mode4->distibuting_slaves_offsets, + sizeof(slaves[0]) * distributing_slaves_count); + + for (i = 0; i < num_of_slaves; i++) + slave_bufs[i][0] = NULL; + + /* It is likely that tx ring will be empty. If it is not empty, it is + * likely that there will be only one frame. */ + while (unlikely(!rte_ring_empty(mode4->tx_ring)) && + rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) { + i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id); + + /* Assign slow packet to slave or drop it if slave is not in active list + * (ex: link down). */ + if (likely(i < num_of_slaves)) { + /* If there is more than one slow packet to the same slave, send + * only latest, and drop previouse - tx burst was no called quick + * enough. */ + if (slave_bufs[i][0] != NULL) + rte_pktmbuf_free(slave_bufs[i][0]); + + slave_bufs[i][0] = slow_msg->pkt; + slave_nb_pkts[i] = 1; + slave_slow_packets[i] = 1; + } else + rte_pktmbuf_free(slow_msg->pkt); + + rte_ring_enqueue(mode4->free_ring, slow_msg); + } + + if (likely(distributing_slaves_count > 0)) { + /* Populate slaves mbuf with the packets which are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { + /* Select output slave using hash based on xmit policy */ + op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count, + internals->balance_xmit_policy); + + /* Populate slave mbuf arrays with mbufs for that slave. Use only + * slaves that are currently distributing. */ + uint8_t slave_offset = distributing_offsets[op_slave_idx]; + uint16_t pkt_pos = slave_nb_pkts[slave_offset]; + slave_nb_pkts[slave_offset]++; + + slave_bufs[slave_offset][pkt_pos] = bufs[i]; + } + } + + /* Send packet burst on each slave device */ + for (i = 0; i < num_of_slaves; i++) { + if (slave_nb_pkts[i] > 0) { + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + slave_bufs[i], slave_nb_pkts[i]); + + /* if tx burst fails move packets to end of bufs */ + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { + uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; + + /* Free slow packet if it exists and not send. */ + if (slave_slow_packets[i] != 0 && num_tx_slave == 0) { + rte_pktmbuf_free(slave_bufs[i][0]); + slave_tx_fail_count--; + } + + tx_fail_total += slave_tx_fail_count; + memcpy(bufs[nb_pkts - tx_fail_total], + slave_bufs[i][num_tx_slave], + slave_tx_fail_count); + } + + if (num_tx_slave > 0) + num_tx_slave -= slave_slow_packets[i]; + + num_tx_total += num_tx_slave; + } + } + + return num_tx_total; +} + +static uint16_t bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link, } int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) +{ + struct ether_addr *mac_addr; + + mac_addr = eth_dev->data->mac_addrs; + + if (eth_dev == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); + return -1; + } + + if (dst_mac_addr == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); + return -1; + } + + ether_addr_copy(mac_addr, dst_mac_addr); + return 0; +} + +int mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) { struct ether_addr *mac_addr; @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) mac_addr = eth_dev->data->mac_addrs; if (eth_dev == NULL) { - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); return -1; } @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) } } break; + case BONDING_MODE_8023AD: + break; case BONDING_MODE_ACTIVE_BACKUP: default: for (i = 0; i < internals->slave_count; i++) { @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; break; + case BONDING_MODE_8023AD: + if (bond_mode_8023ad_init(eth_dev) != 0) + return -1; + + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; + break; default: return -1; } @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) if (internals->user_defined_primary_port) bond_ethdev_primary_set(internals, internals->primary_port); + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_start(eth_dev); if (internals->link_status_polling_enabled) rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) { struct bond_dev_private *internals = eth_dev->data->dev_private; + if (internals->mode == BONDING_MODE_8023AD) { + struct mode8023ad_data *data = &internals->mode4; + struct slow_protocol_msg *msg; + + bond_mode_8023ad_stop(eth_dev); + data->distibuting_slaves_count = 0; + + /* Discard all messages to/from mode 4 state machines */ + while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) { + rte_pktmbuf_free(msg->pkt); + rte_ring_enqueue(data->free_ring, msg); + } + + while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) { + rte_pktmbuf_free(msg->pkt); + rte_ring_enqueue(data->free_ring, msg); + } + } + internals->active_slave_count = 0; internals->link_status_polling_enabled = 0; @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, 0, dev->pci_dev->numa_node); if (bd_tx_q == NULL) - return -1; + return -1; bd_tx_q->queue_id = tx_queue_id; bd_tx_q->dev_private = dev->data->dev_private; @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue) rte_free(queue); } - static void bond_ethdev_slave_link_status_change_monitor(void *cb_arg) { @@ -884,7 +1105,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg) /* If device is currently being configured then don't check slaves link * status, wait until next period */ - if (rte_spinlock_trylock(&internals->lock)){ + if (rte_spinlock_trylock(&internals->lock)) { for (i = 0; i < internals->slave_count; i++) { if (internals->slaves[i].link_status_polling_enabled) { slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) for (i = 0; i < internals->slave_count; i++) rte_eth_promiscuous_enable(internals->slaves[i].port_id); break; + /* In mode4 promiscus mode is managed when slave is added/removed */ + case BONDING_MODE_8023AD: + break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: default: rte_eth_promiscuous_enable(internals->current_primary_port); - } } @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) int i; internals->promiscuous_en = 0; - + switch (internals->mode) { /* Promiscuous mode is propagated to all slaves */ case BONDING_MODE_ROUND_ROBIN: @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) for (i = 0; i < internals->slave_count; i++) rte_eth_promiscuous_disable(internals->slaves[i].port_id); break; + /* In mode4 promiscus mode is set managed when slave is added/removed */ + case BONDING_MODE_8023AD: + break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: default: @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, struct bond_dev_private *internals; struct rte_eth_link link; - int i, valid_slave = 0, active_pos = -1; + int i, valid_slave = 0; + uint8_t active_pos; uint8_t lsc_flag = 0; if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, return; /* Search for port in active port list */ - for (i = 0; i < internals->active_slave_count; i++) { - if (port_id == internals->active_slaves[i]) { - active_pos = i; - break; - } - } + active_pos = find_slave_by_id(internals->active_slaves, + internals->active_slave_count, port_id); rte_eth_link_get_nowait(port_id, &link); if (link.link_status) { - if (active_pos >= 0) + if (active_pos < internals->active_slave_count) return; /* if no active slave ports then set this port to be primary port */ @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, link_properties_set(bonded_eth_dev, &(slave_eth_dev->data->dev_link)); } - internals->active_slaves[internals->active_slave_count++] = port_id; + + activate_slave(bonded_eth_dev, port_id); /* If user has defined the primary port then default to using it */ if (internals->user_defined_primary_port && internals->primary_port == port_id) bond_ethdev_primary_set(internals, port_id); } else { - if (active_pos < 0) + if (active_pos == internals->active_slave_count) return; /* Remove from active slave list */ - for (i = active_pos; i < (internals->active_slave_count - 1); i++) - internals->active_slaves[i] = internals->active_slaves[i+1]; - - internals->active_slave_count--; + deactivate_slave(bonded_eth_dev, active_pos); /* No active slaves, change link status to down and reset other * link properties */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h index 6db5144..77f7bb0 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_private.h +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h @@ -42,6 +42,7 @@ extern "C" { #include <rte_spinlock.h> #include "rte_eth_bond.h" +#include "rte_eth_bond_8023ad.h" #define PMD_BOND_SLAVE_PORT_KVARG ("slave") #define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary") @@ -60,6 +61,8 @@ extern "C" { #define RTE_BOND_LOG(lvl, msg, ...) \ RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__); +#define BONDING_MODE_INVALID 0xFF + extern const char *pmd_bond_init_valid_arguments[]; extern const char *driver_name; @@ -89,7 +92,13 @@ struct bond_tx_queue { /**< Copy of TX configuration structure for queue */ }; - +/** Persisted Slave Configuration Structure */ +struct slave_conf { + uint8_t port_id; + /**< Port Id of slave eth_dev */ + struct ether_addr mac_addr; + /**< Slave eth_dev original MAC address */ +}; /** Bonded slave devices structure */ struct bond_ethdev_slave_ports { uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */ @@ -124,7 +133,7 @@ struct bond_dev_private { uint8_t user_defined_mac; /**< Flag for whether MAC address is user defined or not */ uint8_t promiscuous_en; - /**< Enabled/disable promiscuous mode on slave devices */ + /**< Enabled/disable promiscuous mode on bonding device */ uint8_t link_props_set; /**< flag to denote if the link properties are set */ @@ -143,6 +152,9 @@ struct bond_dev_private { uint8_t slave_count; /**< Number of bonded slaves */ struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; /**< Arary of bonded slaves details */ + + struct mode8023ad_data mode4; + /**< Mode 4 private data */ }; extern struct eth_dev_ops default_dev_ops; @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops; int valid_bonded_ethdev(struct rte_eth_dev *eth_dev); +/* Search given slave array to find possition of given id. + * Return slave pos or slaves_count if not found. */ +static inline uint8_t +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, + uint8_t slave_id ) { + + uint8_t pos; + for (pos = 0; pos < slaves_count; pos++) { + if (slave_id == slaves[pos]) + break; + } + + return pos; +} + int valid_port_id(uint8_t port_id); @@ -160,6 +187,14 @@ int valid_slave_port_id(uint8_t port_id); void +deactivate_slave(struct rte_eth_dev *eth_dev, + uint8_t slave_pos ); + +void +activate_slave(struct rte_eth_dev *eth_dev, + uint8_t port_id ); + +void link_properties_set(struct rte_eth_dev *bonded_eth_dev, struct rte_eth_link *slave_dev_link); void @@ -173,6 +208,9 @@ int mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr); int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr); + +int mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev); uint8_t -- 1.7.9.5
Fixed patch version sent. Pawel
On Tue, Sep 30, 2014 at 07:19:33AM +0100, Pawel Wodkowski wrote: > This patch adds support mode 4 of link bonding. It depend on Delcan Doherty > patches v3 and rte alarms patch v2 or above. > > New version handles race issues with setting/cancelin callbacks, > fixes promiscus mode setting in mode 4 and some other minor errors in mode 4 > implementation. > > > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com> Looks good. Acked-by: Neil Horman <nhorman@tuxdriver.com> > --- > lib/librte_ether/rte_ether.h | 1 + > lib/librte_pmd_bond/Makefile | 1 + > lib/librte_pmd_bond/rte_eth_bond.h | 4 + > lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1070 ++++++++++++++++++++++++++++ > lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 405 +++++++++++ > lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++- > lib/librte_pmd_bond/rte_eth_bond_args.c | 1 + > lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 ++++++- > lib/librte_pmd_bond/rte_eth_bond_private.h | 42 +- > 9 files changed, 1821 insertions(+), 46 deletions(-) > create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c > create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h > > diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h > index 2e08f23..1a3711b 100644 > --- a/lib/librte_ether/rte_ether.h > +++ b/lib/librte_ether/rte_ether.h > @@ -293,6 +293,7 @@ struct vlan_hdr { > #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ > #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ > #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ > +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ > > #ifdef __cplusplus > } > diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile > index 953d75e..c2312c2 100644 > --- a/lib/librte_pmd_bond/Makefile > +++ b/lib/librte_pmd_bond/Makefile > @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS) > # > SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c > SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c > +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c > SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c > > # > diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h > index 6811c7b..b0223c2 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond.h > +++ b/lib/librte_pmd_bond/rte_eth_bond.h > @@ -75,6 +75,10 @@ extern "C" { > /**< Broadcast (Mode 3). > * In this mode all transmitted packets will be transmitted on all available > * active slaves of the bonded. */ > +#define BONDING_MODE_8023AD (4) > +/**< 802.3AD (Mode 4). > + * In this mode transmission and reception of packets is managed by LACP > + * protocol specified in 802.3AD documentation. */ > > /* Balance Mode Transmit Policies */ > #define BALANCE_XMIT_POLICY_LAYER2 (0) > diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > new file mode 100644 > index 0000000..de416c6 > --- /dev/null > +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > @@ -0,0 +1,1070 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <stddef.h> > +#include <string.h> > + > +#include <rte_alarm.h> > +#include <rte_malloc.h> > +#include <rte_errno.h> > + > +#include "rte_eth_bond_private.h" > +#include "rte_eth_bond_8023ad.h" > + > +#include <rte_cycles.h> > + > +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD > +#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \ > + bond_dbg_get_time_diff_ms(), internals->active_slaves[port_num], \ > + __FUNCTION__, ##__VA_ARGS__) > + > +static unsigned > +bond_dbg_get_time_diff_ms(void) > +{ > + static uint64_t start_time = 0; > + uint64_t now; > + > + now = rte_rdtsc(); > + if (start_time == 0) > + start_time = now; > + > + return ((now - start_time) * 1000) / rte_get_tsc_hz(); > +} > + > +static void > +bond_print_lacp(struct lacpdu *l) > +{ > + char a_address[18]; > + char p_address[18]; > + char a_state[256] = { 0 }; > + char p_state[256] = { 0 }; > + > + static const char *state_labels[] = { > + "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP" > + }; > + > + int a_len = 0; > + int p_len = 0; > + uint8_t i; > + uint8_t *addr; > + > + addr = l->actor.port_params.system.addr_bytes; > + snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X", > + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); > + > + addr = l->partner.port_params.system.addr_bytes; > + snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X", > + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); > + > + for (i = 0; i < 8; i++) { > + if ((l->actor.state >> i) & 1) { > + a_len += snprintf(a_state + a_len, sizeof(a_state) - a_len, "%s ", > + state_labels[i]); > + } > + > + if ((l->partner.state >> i) & 1) { > + p_len += snprintf(p_state + p_len, sizeof(p_state) - p_len, "%s ", > + state_labels[i]); > + } > + } > + > + if (a_len && a_state[a_len-1] == ' ') > + a_state[a_len-1] = '\0'; > + > + if (p_len && p_state[p_len-1] == ' ') > + p_state[p_len-1] = '\0'; > + > + RTE_LOG(DEBUG, PMD, "LACP: {\n"\ > + " subtype= %02X\n"\ > + " ver_num=%02X\n"\ > + " actor={ tlv=%02X, len=%02X\n"\ > + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ > + " state={ %s }\n"\ > + " }\n"\ > + " partner={ tlv=%02X, len=%02X\n"\ > + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\ > + " state={ %s }\n"\ > + " }\n"\ > + " collector={info=%02X, length=%02X, max_delay=%04X\n, " \ > + "type_term=%02X, terminator_length = %02X}\n",\ > + l->subtype,\ > + l->version_number,\ > + l->actor.tlv_type_info,\ > + l->actor.info_length,\ > + l->actor.port_params.system_priority,\ > + a_address,\ > + l->actor.port_params.key,\ > + l->actor.port_params.port_priority,\ > + l->actor.port_params.port_number,\ > + a_state,\ > + l->partner.tlv_type_info,\ > + l->partner.info_length,\ > + l->partner.port_params.system_priority,\ > + p_address,\ > + l->partner.port_params.key,\ > + l->partner.port_params.port_priority,\ > + l->partner.port_params.port_number,\ > + p_state,\ > + l->tlv_type_collector_info,\ > + l->collector_info_length,\ > + l->collector_max_delay,\ > + l->tlv_type_terminator,\ > + l->terminator_length); > + > +} > +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu) > + > +#else > +#define BOND_PRINT_LACP(lacpdu) do { } while (0) > +#define MODE4_DEBUG(fmt, ...) do { } while (0) > +#endif > + > +static const struct ether_addr lacp_mac_addr = { > + .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 } > +}; > + > +static void > +timer_cancel(uint64_t *timer) > +{ > + *timer = 0; > +} > + > +static void > +timer_set(uint64_t *timer, uint64_t timeout_ms) > +{ > + *timer = rte_rdtsc() + timeout_ms * rte_get_tsc_hz() / 1000; > +} > + > +/* Forces given timer to be in expired state. */ > +static void > +timer_force_expired(uint64_t *timer) > +{ > + *timer = rte_rdtsc(); > +} > + > +static bool > +timer_is_stopped(uint64_t *timer) > +{ > + return *timer == 0; > +} > + > +static bool > +timer_is_expired(uint64_t *timer) > +{ > + return *timer <= rte_rdtsc(); > +} > + > +/* Timer is in running state if it is not stopped nor expired */ > +static bool > +timer_is_running(uint64_t *timer) > +{ > + return !timer_is_stopped(timer) && !timer_is_expired(timer); > +} > + > +static void > +record_default(struct port *port) > +{ > + /* Record default parametes for partner. Partner admin parameters > + * are not implemented so set them to arbitrary default (last known) and > + * mark actor that parner is in defaulted state. */ > + port->partner_state = STATE_LACP_ACTIVE; > + ACTOR_STATE_SET(port, DEFAULTED); > +} > + > +/** Function handles rx state machine. > + * > + * This function implements Receive State Machine from point 5.4.12 in > + * 802.1AX documentation. It should be called periodically. > + * > + * @param lacpdu LACPDU received. > + * @param port Port on which LACPDU was received. > + */ > +static void > +rx_machine(struct bond_dev_private *internals, uint8_t port_num, > + struct lacpdu *lacp) > +{ > + struct port *port = &internals->mode4.port_list[port_num]; > + > + if (SM_FLAG(port, BEGIN)) { > + /* Initialize stuff */ > + MODE4_DEBUG("-> INITIALIZE\n"); > + SM_FLAG_CLR(port, MOVED); > + port->selected = UNSELECTED; > + > + record_default(port); > + > + ACTOR_STATE_CLR(port, EXPIRED); > + timer_cancel(&port->current_while_timer); > + > + /* DISABLED: On initialization partner is out of sync */ > + PARTNER_STATE_CLR(port, SYNCHRONIZATION); > + > + /* LACP DISABLED stuff if LACP not enabled on this port */ > + if (!SM_FLAG(port, LACP_ENABLED)) > + PARTNER_STATE_CLR(port, AGGREGATION); > + } > + > + if (!SM_FLAG(port, LACP_ENABLED)) { > + /* Update parameters only if state changed */ > + if (!timer_is_stopped(&port->current_while_timer)) { > + port->selected = UNSELECTED; > + record_default(port); > + PARTNER_STATE_CLR(port, AGGREGATION); > + ACTOR_STATE_CLR(port, EXPIRED); > + timer_cancel(&port->current_while_timer); > + } > + return; > + } > + > + if (lacp) { > + MODE4_DEBUG("LACP -> CURRENT\n"); > + BOND_PRINT_LACP(lacp); > + /* Update selected flag. If partner parameters are defaulted assume they > + * are match. If not defaulted compare LACP actor with ports parner > + * params. */ > + if (!(port->actor_state & STATE_DEFAULTED) && > + (((port->partner_state ^ lacp->actor.state) & STATE_AGGREGATION) || > + memcmp(&port->partner, &lacp->actor.port_params, > + sizeof(port->partner)) != 0)) { > + MODE4_DEBUG("selected <- UNSELECTED\n"); > + port->selected = UNSELECTED; > + } > + > + /* Record this PDU actor params as partner params */ > + memcpy(&port->partner, &lacp->actor.port_params, > + sizeof(struct port_params)); > + port->partner_state = lacp->actor.state; > + > + /* Partner parameters are not defaulted any more */ > + ACTOR_STATE_CLR(port, DEFAULTED); > + > + /* Update NTT if partners information are outdated */ > + uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT | > + STATE_SYNCHRONIZATION | STATE_AGGREGATION; > + > + if (((port->actor_state ^ lacp->partner.state) & state_mask) || > + memcmp(&port->actor, &lacp->partner.port_params, > + sizeof(struct port_params)) != 0) { > + port->sm_flags |= SM_FLAGS_NTT; > + } > + > + /* If LACP partner params match this port actor params */ > + if (memcmp(&port->actor, &lacp->partner.port_params, > + sizeof(port->actor)) == 0 && > + (port->partner_state & STATE_AGGREGATION) == (port->actor_state > + & STATE_AGGREGATION)) > + PARTNER_STATE_SET(port, SYNCHRONIZATION); > + else if (!(port->partner_state & STATE_AGGREGATION) && > + (port->actor_state & STATE_AGGREGATION)) > + PARTNER_STATE_SET(port, SYNCHRONIZATION); > + else > + PARTNER_STATE_CLR(port, SYNCHRONIZATION); > + > + if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT)) > + timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS); > + else > + timer_set(&port->current_while_timer, BOND_8023AD_LONG_TIMEOUT_MS); > + > + ACTOR_STATE_CLR(port, EXPIRED); > + return; /* No state change */ > + } > + > + /* If CURRENT state timer is not running (stopped or expired) > + * transit to EXPIRED state from DISABLED or CURRENT */ > + if (!timer_is_running(&port->current_while_timer)) { > + ACTOR_STATE_SET(port, EXPIRED); > + PARTNER_STATE_CLR(port, SYNCHRONIZATION); > + PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT); > + timer_set(&port->current_while_timer, BOND_8023AD_SHORT_TIMEOUT_MS); > + } > +} > + > +/** > + * Function handles periodic tx state machine. > + * > + * Function implements Periodic Transmission state machine from point 5.4.13 > + * in 802.1AX documentation. It should be called periodically. > + * > + * @param port Port to handle state machine. > + */ > +static void > +periodic_machine(struct bond_dev_private *internals, uint8_t port_num) > +{ > + struct port *port = &internals->mode4.port_list[port_num]; > + /* Calculate if either site is LACP enabled */ > + uint32_t timeout; > + uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) || > + PARTNER_STATE(port, LACP_ACTIVE); > + > + uint8_t is_partner_fast, was_partner_fast; > + /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */ > + if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || > + active == 0) { > + timer_cancel(&port->periodic_timer); > + timer_force_expired(&port->tx_machine_timer); > + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); > + > + MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n", > + SM_FLAG(port, BEGIN) ? "begind " : "", > + SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ", > + active ? "LACP active " : "LACP pasive "); > + return; > + } > + > + is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT); > + was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT); > + > + /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW. > + * Other case: check if timer expire or partners settings changed. */ > + if (!timer_is_stopped(&port->periodic_timer)) { > + if (timer_is_expired(&port->periodic_timer)) { > + SM_FLAG_SET(port, NTT); > + } else if (is_partner_fast != was_partner_fast) { > + /* Partners timeout was slow and now it is fast -> send LACP. > + * In other case (was fast and now it is slow) just switch > + * timeout to slow without forcing send of LACP (because standard > + * say so)*/ > + if (!is_partner_fast) > + SM_FLAG_SET(port, NTT); > + } else > + return; /* Nothing changed */ > + } > + > + /* Handle state transition to FAST/SLOW LACP timeout */ > + if (is_partner_fast) { > + timeout = BOND_8023AD_FAST_PERIODIC_MS; > + SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT); > + } else { > + timeout = BOND_8023AD_SLOW_PERIODIC_MS; > + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); > + } > + > + timer_set(&port->periodic_timer, timeout); > +} > + > +/** > + * Function handles mux state machine. > + * > + * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation. > + * It should be called periodically. > + * > + * @param port Port to handle state machine. > + */ > +static int > +mux_machine(struct bond_dev_private *internals, uint8_t port_num) > +{ > + bool ntt = false; > + struct port *port = &internals->mode4.port_list[port_num]; > + > + /* Save current state for later use */ > + const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | > + STATE_COLLECTING; > + > + /* Enter DETACHED state on BEGIN condition or from any other state if > + * port was unselected */ > + if (SM_FLAG(port, BEGIN) || > + port->selected == UNSELECTED || (port->selected == STANDBY && > + (port->actor_state & state_mask) != 0)) { > + /* detach mux from aggregator not used */ > + port->actor_state &= ~state_mask; > + /* Set ntt to true if BEGIN condition or transition from any other state > + * which is indicated that wait_while_timer was started */ > + if (SM_FLAG(port, BEGIN) || > + !timer_is_stopped(&port->wait_while_timer)) { > + SM_FLAG_SET(port, NTT); > + MODE4_DEBUG("-> DETACHED\n"); > + } > + timer_cancel(&port->wait_while_timer); > + } > + > + if (timer_is_stopped(&port->wait_while_timer)) { > + if (port->selected == SELECTED || port->selected == STANDBY) { > + timer_set(&port->wait_while_timer, > + BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS); > + > + MODE4_DEBUG("DETACHED -> WAITING\n"); > + } > + /* Waiting state entered */ > + return 0; > + } > + > + /* Transit next state if port is ready */ > + if (!timer_is_expired(&port->wait_while_timer)) > + return 0; > + > + if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) && > + !PARTNER_STATE(port, SYNCHRONIZATION)) { > + /* If in COLLECTING or DISTRIBUTING state and partner becomes out of > + * sync transit to ATACHED state. */ > + ACTOR_STATE_CLR(port, DISTRIBUTING); > + ACTOR_STATE_CLR(port, COLLECTING); > + /* Clear actor sync to activate transit ATACHED in condition bellow */ > + ACTOR_STATE_CLR(port, SYNCHRONIZATION); > + MODE4_DEBUG("Out of sync -> ATTACHED\n"); > + } else if (!ACTOR_STATE(port, SYNCHRONIZATION)) { > + /* attach mux to aggregator */ > + RTE_VERIFY((port->actor_state & (STATE_COLLECTING | > + STATE_DISTRIBUTING)) == 0); > + ACTOR_STATE_SET(port, SYNCHRONIZATION); > + ntt = true; > + MODE4_DEBUG("ATTACHED Entered\n"); > + } else if (!ACTOR_STATE(port, COLLECTING)) { > + /* Start collecting if in sync */ > + if (PARTNER_STATE(port, SYNCHRONIZATION)) { > + MODE4_DEBUG("ATTACHED -> COLLECTING\n"); > + ACTOR_STATE_SET(port, COLLECTING); > + } > + } else if (ACTOR_STATE(port, COLLECTING)) { > + /* Check if partner is in COLLECTING state. If so this port can > + * distribute frames to it */ > + if (!ACTOR_STATE(port, DISTRIBUTING)) { > + if (PARTNER_STATE(port, COLLECTING)) { > + /* Enable DISTRIBUTING if partner is collecting */ > + ACTOR_STATE_SET(port, DISTRIBUTING); > + ntt = true; > + MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n"); > + } > + } else { > + if (!PARTNER_STATE(port, COLLECTING)) { > + /* Disable DISTRIBUTING (enter COLLECTING state) if partner > + * is not collecting */ > + ACTOR_STATE_CLR(port, DISTRIBUTING); > + ntt = true; > + MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n"); > + } > + } > + } > + > + if (ntt != false) > + SM_FLAG_SET(port, NTT); > + > + return ntt; > +} > + > +/** > + * Function handles transmit state machine. > + * > + * Function implements Transmit Machine from point 5.4.16 in 802.1AX > + * documentation. > + * > + * @param port > + */ > +static void > +tx_machine(struct rte_eth_dev *bond_dev, uint8_t port_num) > +{ > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct port *port = &internals->mode4.port_list[port_num]; > + struct mode8023ad_data *data = &internals->mode4; > + > + struct slow_protocol_msg *msg = NULL; > + struct lacpdu_header *hdr; > + struct lacpdu *lacpdu; > + > + /* If periodic timer is not running periodic machine is in NO PERIODIC and > + * acording to 802.3ax standard tx machine should not transmit any frames > + * and set ntt to false. */ > + if (timer_is_stopped(&port->periodic_timer)) > + SM_FLAG_CLR(port, NTT); > + > + if (!SM_FLAG(port, NTT) || !timer_is_expired(&port->tx_machine_timer)) > + return; > + > + /* If all conditions are met construct packet to send */ > + if (rte_ring_dequeue(data->free_ring, (void **)&msg) == -ENOBUFS) { > + MODE4_DEBUG("tx_machine: no free_lacpdu_ring\n"); > + return; > + } > + > + msg->pkt = rte_pktmbuf_alloc(data->mbuf_pool); > + if (msg->pkt == NULL) { > + rte_ring_enqueue(data->free_ring, msg); > + RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n"); > + return; > + } > + > + msg->port_id = internals->active_slaves[port_num]; > + hdr = rte_pktmbuf_mtod(msg->pkt, struct lacpdu_header *); > + > + msg->pkt->data_len = sizeof(*hdr); > + msg->pkt->pkt_len = sizeof(*hdr); > + /* Source and destination MAC */ > + ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr); > + ether_addr_copy(&port->actor.system, &hdr->eth_hdr.s_addr); > + hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW); > + > + lacpdu = &hdr->lacpdu; > + memset(lacpdu, 0, sizeof(*lacpdu)); > + > + /* Initialize LACP part */ > + lacpdu->subtype = SUBTYPE_LACP; > + lacpdu->version_number = 1; > + > + /* ACTOR */ > + lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION; > + lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params); > + memcpy(&hdr->lacpdu.actor.port_params, &port->actor, > + sizeof(port->actor)); > + lacpdu->actor.state = port->actor_state; > + > + /* PARTNER */ > + lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION; > + lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params); > + memcpy(&lacpdu->partner.port_params, &port->partner, > + sizeof(struct port_params)); > + lacpdu->partner.state = port->partner_state; > + > + /* Other fields */ > + lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION; > + lacpdu->collector_info_length = 0x10; > + lacpdu->collector_max_delay = 0; > + > + lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION; > + lacpdu->terminator_length = 0; > + > + if (rte_ring_enqueue(data->tx_ring, msg) == -ENOBUFS) { > + /* If TX ring full, drop packet and free message. Retransmission > + * will happen in next function call. */ > + rte_pktmbuf_free(msg->pkt); > + rte_ring_enqueue(data->free_ring, msg); > + > + RTE_LOG(ERR, PMD, "Failed to enqueue LACP packet into tx ring.\n" > + "Receive and transmit functions must be invoked on bonded interface" > + " at least 10 times per second or LACP will not work correctly\n"); > + return; > + } > + > + MODE4_DEBUG("sending LACP frame\n"); > + BOND_PRINT_LACP(lacpdu); > + > + SM_FLAG_CLR(port, NTT); > + /* Add 10% random backoff time to better distribute slow packets > + * between tx bursts. */ > + timer_set(&port->tx_machine_timer, BOND_8023AD_TX_PERIOD_MS + > + rand() % ((BOND_8023AD_TX_PERIOD_MS * 10) / 100)); > +} > + > +/** > + * Function assigns port to aggregator. > + * > + * @param bond_dev_private Pointer to bond_dev_private structure. > + * @param port_pos Port to assign. > + */ > +static void > +selection_logic(struct bond_dev_private *internals, uint8_t port_num) > +{ > + struct mode8023ad_data *data = &internals->mode4; > + struct port *agg, *port, *port_list; > + uint8_t ports_count; > + uint8_t i; > + > + ports_count = internals->slave_count; > + port_list = data->port_list; > + port = &port_list[port_num]; > + > + /* Skip port if it is selected */ > + if (port->selected == SELECTED) > + return; > + > + /* Search for aggregator suitable for this port */ > + for (i = 0; i < ports_count; ++i) { > + agg = &port_list[i]; > + /* Skip ports that are not aggreagators */ > + if (agg->agregator_idx != i && i == port_num) > + continue; > + > + /* Actors system ID is not checked since all slave device have the same > + * ID (MAC address). */ > + if ((agg->actor.key == port->actor.key && > + agg->partner.system_priority == port->partner.system_priority && > + is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1 > + && (agg->partner.key == port->partner.key)) && > + is_zero_ether_addr(&port->partner.system) != 1 && > + (agg->actor.key & > + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) { > + > + port->agregator_idx = i; > + break; > + } > + } > + > + /* By default, port uses it self as agregator */ > + if (i == ports_count) > + port->agregator_idx = port_num; > + > + port->selected = SELECTED; > + > + MODE4_DEBUG("-> SELECTED: ID=%3u pos=%3u\n" > + "\t%s ID=%3u pos=%3u\n", > + internals->active_slaves[port_num], port_num, > + port->agregator_idx == port_num ? > + "agregator not found, using default" : "agregator found", > + port->agregator_idx, > + internals->active_slaves[port->agregator_idx]); > +} > + > +/** > + * Helper function which updates current port > + */ > +static void > +update_mux_slaves(struct bond_dev_private *internals) > +{ > + struct mode8023ad_data *data = &internals->mode4; > + struct port *port; > + uint8_t current[RTE_MAX_ETHPORTS]; > + uint8_t count = 0; > + uint8_t i; > + > + for (i = 0; i < internals->slave_count; i++) { > + port = &data->port_list[i]; > + if (ACTOR_STATE(port, DISTRIBUTING)) > + current[count++] = i; > + } > + > + memcpy(data->distibuting_slaves_offsets, current, > + sizeof(current[0]) * count); > + data->distibuting_slaves_count = count; > +} > + > +/* Function maps DPDK speed to bonding speed stored in key field */ > +static uint16_t > +link_speed_key(uint16_t speed) { > + uint16_t key_speed; > + > + switch (speed) { > + case ETH_LINK_SPEED_AUTONEG: > + key_speed = 0x00; > + break; > + case ETH_LINK_SPEED_10: > + key_speed = BOND_LINK_SPEED_KEY_10M; > + break; > + case ETH_LINK_SPEED_100: > + key_speed = BOND_LINK_SPEED_KEY_100M; > + break; > + case ETH_LINK_SPEED_1000: > + key_speed = BOND_LINK_SPEED_KEY_1000M; > + break; > + case ETH_LINK_SPEED_10G: > + key_speed = BOND_LINK_SPEED_KEY_10G; > + break; > + case ETH_LINK_SPEED_20G: > + key_speed = BOND_LINK_SPEED_KEY_20G; > + break; > + case ETH_LINK_SPEED_40G: > + key_speed = BOND_LINK_SPEED_KEY_40G; > + break; > + default: > + /* Unknown speed*/ > + key_speed = 0xFFFF; > + } > + > + return key_speed; > +} > + > +static void > +bond_mode_8023ad_periodic_cb(void *arg) > +{ > + struct rte_eth_dev *bond_dev = arg; > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_data *data = &internals->mode4; > + > + struct port *port; > + struct slow_protocol_frame *slow_hdr; > + struct rte_eth_link link_info; > + struct ether_addr slave_addr; > + > + struct slow_protocol_msg *msgs[BOND_MODE_8023AX_RX_RING_SIZE]; > + uint16_t port_num, j, nb_msgs; > + /* if not 0 collecting/distibuting array need update */ > + uint16_t slaves_changed = 0; > + bool machines_invoked; > + > + /* Update link status on each port */ > + for (port_num = 0; port_num < internals->active_slave_count; port_num++) { > + uint16_t key; > + > + rte_eth_link_get(internals->active_slaves[port_num], &link_info); > + rte_eth_macaddr_get(internals->active_slaves[port_num], &slave_addr); > + > + if (link_info.link_status != 0) { > + key = link_speed_key(link_info.link_speed) << 1; > + if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX) > + key |= BOND_LINK_FULL_DUPLEX_KEY; > + } else > + key = 0; > + > + port = &data->port_list[port_num]; > + key = rte_cpu_to_be_16(key); > + > + if (key != port->actor.key) { > + port->actor.key = key; > + SM_FLAG_SET(port, NTT); > + } > + > + if (!is_same_ether_addr(&port->actor.system, &slave_addr)) { > + SM_FLAG_SET(port, NTT); > + ether_addr_copy(&slave_addr, &port->actor.system); > + } > + } > + > + nb_msgs = (uint16_t)rte_ring_dequeue_burst(data->rx_ring, (void **) msgs, > + BOND_MODE_8023AX_RX_RING_SIZE); > + > + for (port_num = 0; port_num < internals->active_slave_count; port_num++) { > + port = &data->port_list[port_num]; > + if ((port->actor.key & > + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) { > + > + SM_FLAG_SET(port, BEGIN); > + > + /* LACP is disabled on half duples or link is down */ > + if (SM_FLAG(port, LACP_ENABLED)) { > + /* If port was enabled set it to BEGIN state */ > + SM_FLAG_CLR(port, LACP_ENABLED); > + ACTOR_STATE_CLR(port, DISTRIBUTING); > + ACTOR_STATE_CLR(port, COLLECTING); > + slaves_changed++; > + } > + > + MODE4_DEBUG("Port %u is not LACP capable!\n", > + internals->active_slaves[port_num]); > + /* Skip this port processing */ > + continue; > + } > + > + SM_FLAG_SET(port, LACP_ENABLED); > + machines_invoked = false; > + /* Find LACP packet */ > + for (j = 0; j < nb_msgs; j++) { > + if (msgs[j] == NULL || msgs[j]->port_id != > + internals->active_slaves[port_num]) > + continue; > + > + slow_hdr = rte_pktmbuf_mtod(msgs[j]->pkt, > + struct slow_protocol_frame *); > + > + if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_LACP) { > + /* This is LACP frame so pass it to rx_machine */ > + struct lacpdu *lacp = (struct lacpdu *)&slow_hdr->slow_protocol; > + /* Invoke state machines on every active slave port */ > + rx_machine(internals, port_num, lacp); > + periodic_machine(internals, port_num); > + slaves_changed += mux_machine(internals, port_num); > + tx_machine(bond_dev, port_num); > + selection_logic(internals, port_num); > + > + machines_invoked = true; > + } else if (slow_hdr->slow_protocol.subtype == SLOW_SUBTYPE_MARKER) { > + struct marker *marker; > + > + marker = (struct marker *) &slow_hdr->slow_protocol; > + if (marker->tlv_type_marker == MARKER_TLV_TYPE_MARKER_INFO) { > + /* Reuse received packet to send frame to Marker Responder > + */ > + marker->tlv_type_marker = MARKER_TLV_TYPE_MARKER_RESP; > + > + /* Update source MAC, destination MAC is multicast so we > + * don't update it */ > + mac_address_get(bond_dev, &slow_hdr->eth_hdr.s_addr); > + > + if (rte_ring_enqueue(data->tx_ring, msgs[j]) == -ENOBUFS) { > + RTE_LOG(ERR, PMD, > + "Failed to enqueue packet into tx ring"); > + rte_pktmbuf_free(msgs[j]->pkt); > + rte_ring_enqueue(data->free_ring, msgs[j]); > + } > + > + msgs[j] = NULL; > + } > + } > + } > + > + if (machines_invoked == false) { > + rx_machine(internals, port_num, NULL); > + periodic_machine(internals, port_num); > + slaves_changed += mux_machine(internals, port_num); > + tx_machine(bond_dev, port_num); > + selection_logic(internals, port_num); > + machines_invoked = true; > + } > + > + SM_FLAG_CLR(port, BEGIN); > + } > + > + /* Update mux if something changed */ > + if (slaves_changed > 0) { > + update_mux_slaves(internals); > + MODE4_DEBUG("mux count %u [%2u%s%2u%s%2u%s%2u%s%s]\n", > + data->distibuting_slaves_count, > + data->distibuting_slaves_offsets[0], > + data->distibuting_slaves_count > 0 ? " " : "\b\b", > + data->distibuting_slaves_offsets[1], > + data->distibuting_slaves_count > 1 ? " " : "\b\b", > + data->distibuting_slaves_offsets[2], > + data->distibuting_slaves_count > 2 ? " " : "\b\b", > + data->distibuting_slaves_offsets[3], > + data->distibuting_slaves_count > 3 ? " " : "\b\b", > + data->distibuting_slaves_count > 4 ? "..." : ""); > + } > + > + /* Free packets that was not reused */ > + for (port_num = 0; port_num < nb_msgs; port_num++) { > + if (msgs[port_num] != NULL) { > + rte_pktmbuf_free(msgs[port_num]->pkt); > + rte_ring_enqueue(data->free_ring, msgs[port_num]); > + } > + } > + > + rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, > + bond_mode_8023ad_periodic_cb, arg); > +} > + > +static void > +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_idx) > +{ > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_data *data = &internals->mode4; > + > + struct port *port = &data->port_list[internals->active_slave_count]; > + struct port_params initial = { > + .system = { { 0 } }, > + .system_priority = rte_cpu_to_be_16(0xFFFF), > + .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY), > + .port_priority = rte_cpu_to_be_16(0x00FF), > + .port_number = 0, > + }; > + > + uint8_t slave_id = internals->active_slaves[slave_idx]; > + > + memcpy(&port->actor, &initial, sizeof(struct port_params)); > + port->actor.port_number = slave_id_to_port_number(slave_id); > + > + memcpy(&port->partner, &initial, sizeof(struct port_params)); > + > + /* default states */ > + port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED; > + port->partner_state = STATE_LACP_ACTIVE; > + port->sm_flags = SM_FLAGS_BEGIN; > + > + /* use this port as agregator */ > + port->agregator_idx = slave_idx; > + > + rte_eth_promiscuous_enable(slave_id); > +} > + > +void > +bond_mode_8023ad_slave_append(struct rte_eth_dev *bond_dev) > +{ > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + > + bond_mode_8023ad_activate_slave(bond_dev, internals->active_slave_count); > +} > + > +int > +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev, > + uint8_t slave_pos) > +{ > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_data *data = &internals->mode4; > + struct port *port; > + uint8_t i; > + > + bond_mode_8023ad_stop(bond_dev); > + > + /* Exclude slave from transmit policy. If this slave is an aggregator > + * make all aggregated slaves unselected to force sellection logic > + * to select suitable aggregator for this port */ > + for (i = 0; i < internals->active_slave_count; i++) { > + port = &data->port_list[slave_pos]; > + if (port->agregator_idx == slave_pos) { > + port->selected = UNSELECTED; > + port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | > + STATE_COLLECTING); > + > + /* Use default aggregator */ > + port->agregator_idx = i; > + } > + } > + > + port = &data->port_list[slave_pos]; > + > + update_mux_slaves(internals); > + > + /* Remove slave port config */ > + if (slave_pos + 1 < internals->active_slave_count) { > + memmove(&data->port_list[slave_pos], > + &data->port_list[slave_pos + 1], > + sizeof(data->port_list[0]) * (internals->active_slave_count - > + slave_pos - 1)); > + } > + > + if (bond_dev->data->dev_started) > + return bond_mode_8023ad_start(bond_dev); > + > + return 0; > +} > + > +int > +bond_mode_8023ad_init(struct rte_eth_dev *bond_dev) > +{ > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_data *data = &internals->mode4; > + char mem_name[RTE_ETH_NAME_MAX_LEN]; > + int socket_id = bond_dev->pci_dev->numa_node; > + uint8_t i; > + > + if (data->mbuf_pool == NULL) { > + const uint16_t element_size = sizeof(struct slow_protocol_frame) + > + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM; > + > + snprintf(mem_name, sizeof(mem_name), "%s_POOL", bond_dev->data->name); > + data->mbuf_pool = rte_mempool_create(mem_name, > + /* FIXME: How big memory pool should be? If driver will not > + * free packets quick enough there will be ENOMEM in tx_machine. > + * For now give 512 packets per slave. Hope it will be enough. */ > + (BOND_MODE_8023AX_TX_RING_SIZE + 1) * 512 * RTE_MAX_ETHPORTS, > + element_size, > + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, > + sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, > + NULL, rte_pktmbuf_init, NULL, socket_id, 0); > + > + /* Any memory allocation failure in initalization is critical because > + * resources can't be free, so reinitialization is impossible. */ > + if (data->mbuf_pool == NULL) { > + RTE_LOG(ERR, PMD, "%s: Failed to initialize LACP rx ring\n", > + bond_dev->data->name); > + > + rte_panic("Failed to alocate memory pool ('%s')\n" > + "for bond device '%s'\n", mem_name, bond_dev->data->name); > + } > + > + /* Setup ring for free messages that can be used in RX/TX burst */ > + snprintf(mem_name, sizeof(mem_name), "%s_free", bond_dev->data->name); > + > + uint16_t free_cnt = BOND_MODE_8023AX_RX_RING_SIZE + > + BOND_MODE_8023AX_TX_RING_SIZE; > + > + data->free_ring = rte_ring_create(mem_name, free_cnt, socket_id, 0); > + > + if (data->free_ring == NULL) { > + rte_panic("%s: Failed to create slow messages free ring\n", > + bond_dev->data->name); > + } > + > + for (i = 0; i < free_cnt; i++) { > + struct slow_protocol_msg *msg; > + > + snprintf(mem_name, sizeof(mem_name), "%s_slow_msg_%u", > + bond_dev->data->name, i); > + > + msg = (struct slow_protocol_msg *) rte_malloc_socket(mem_name, > + sizeof(struct slow_protocol_msg), 0, socket_id); > + > + if (msg == NULL) { > + rte_panic("%s: Failed to allocate slow message\n", > + bond_dev->data->name); > + } > + > + rte_ring_enqueue(data->free_ring, msg); > + } > + > + /* Setup rings for usage in rx/tx bursts and machines state > + * call back */ > + snprintf(mem_name, sizeof(mem_name), "%s_rx", bond_dev->data->name); > + data->rx_ring = rte_ring_create(mem_name, > + BOND_MODE_8023AX_RX_RING_SIZE, socket_id, 0); > + > + if (data->rx_ring == NULL) { > + rte_panic("%s: Failed to create slow messages rx ring\n", > + bond_dev->data->name); > + } > + > + snprintf(mem_name, sizeof(mem_name), "%s_tx", bond_dev->data->name); > + data->tx_ring = rte_ring_create(mem_name, BOND_MODE_8023AX_TX_RING_SIZE, > + socket_id, RING_F_SP_ENQ); > + > + if (data->tx_ring == NULL) { > + rte_panic("%s: Failed to create slow messages tx ring\n", > + bond_dev->data->name); > + } > + } > + > + data->distibuting_slaves_count = 0; > + > + for (i = 0; i < internals->active_slave_count; i++) > + bond_mode_8023ad_activate_slave(bond_dev, i); > + > + return 0; > +} > + > +int > +bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) > +{ > + return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, > + &bond_mode_8023ad_periodic_cb, bond_dev); > +} > + > +int > +bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) > +{ > + if (rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev)) > + return 0; > + > + return -ENOENT; > +} > + > +void > +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, > + uint8_t slave_pos, struct rte_mbuf *slot_pkt) > +{ > + struct mode8023ad_data *data; > + struct slow_protocol_msg *msg = NULL; > + int retval; > + > + data = &internals->mode4; > + > + if (unlikely(rte_ring_dequeue(data->free_ring, (void **)&msg) == > + -ENOBUFS)) { > + rte_pktmbuf_free(slot_pkt); > + return; > + } > + > + msg->pkt = slot_pkt; > + msg->port_id = internals->active_slaves[slave_pos]; > + > + retval = rte_ring_enqueue(data->rx_ring, msg); > + if (unlikely(retval == -ENOBUFS)) { > + /* If RX fing full free lacpdu message and drop packet */ > + rte_ring_enqueue(data->free_ring, msg); > + rte_pktmbuf_free(slot_pkt); > + } > +} > diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h > new file mode 100644 > index 0000000..df250bb > --- /dev/null > +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h > @@ -0,0 +1,405 @@ > +/*- > + * BSD LICENSE > + * > + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in > + * the documentation and/or other materials provided with the > + * distribution. > + * * Neither the name of Intel Corporation nor the names of its > + * contributors may be used to endorse or promote products derived > + * from this software without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#ifndef RTE_ETH_BOND_8023AD_H_ > +#define RTE_ETH_BOND_8023AD_H_ > + > +#include <stdint.h> > + > +#include <rte_ether.h> > +#include <rte_byteorder.h> > +#include <rte_spinlock.h> > + > +typedef int bool; > + > +#define true 1 > +#define false 0 > + > +/** > + * Timeouts deffinitions (5.4.4 in 802.1AX documentation). > + */ > +#define BOND_8023AD_FAST_PERIODIC_MS 1000 > +#define BOND_8023AD_SLOW_PERIODIC_MS 30000 > +#define BOND_8023AD_SHORT_TIMEOUT_MS 3000 > +#define BOND_8023AD_LONG_TIMEOUT_MS 90000 > +#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000 > +#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000 > +#define BOND_8023AD_TX_PERIOD_MS 333 > +/** > + * Actor/partner states > + */ > +#define STATE_LACP_ACTIVE 0x01 > +#define STATE_LACP_SHORT_TIMEOUT 0x02 > +#define STATE_AGGREGATION 0x04 > +#define STATE_SYNCHRONIZATION 0x08 > +#define STATE_COLLECTING 0x10 > +#define STATE_DISTRIBUTING 0x20 > +/** Partners parameters are defaulted */ > +#define STATE_DEFAULTED 0x40 > +#define STATE_EXPIRED 0x80 > + > +/** > + * State machine flags > + */ > +#define SM_FLAGS_BEGIN 0x0001 > +#define SM_FLAGS_LACP_ENABLED 0x0002 > +#define SM_FLAGS_ACTOR_CHURN 0x0004 > +#define SM_FLAGS_PARTNER_CHURN 0x0008 > +#define SM_FLAGS_MOVED 0x0100 > +#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200 > +#define SM_FLAGS_NTT 0x0400 > + > +#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100 > +#define BOND_MODE_8023AX_RX_RING_SIZE (2 * RTE_MAX_ETHPORTS) > +#define BOND_MODE_8023AX_TX_RING_SIZE (2 * RTE_MAX_ETHPORTS) > + > +#define BOND_LINK_FULL_DUPLEX_KEY 0x01 > +#define BOND_LINK_SPEED_KEY_10M 0x02 > +#define BOND_LINK_SPEED_KEY_100M 0x04 > +#define BOND_LINK_SPEED_KEY_1000M 0x08 > +#define BOND_LINK_SPEED_KEY_10G 0x10 > +#define BOND_LINK_SPEED_KEY_20G 0x11 > +#define BOND_LINK_SPEED_KEY_40G 0x12 > + > +#define SUBTYPE_LACP 0x01 > + > +#define TLV_TYPE_ACTOR_INFORMATION 0x01 > +#define TLV_TYPE_PARTNER_INFORMATION 0x02 > +#define TLV_TYPE_COLLECTOR_INFORMATION 0x03 > +#define TLV_TYPE_TERMINATOR_INFORMATION 0x00 > + > +#define CHECK_FLAGS(_variable, _flags) ((_variable) & (_flags)) > +#define SET_FLAGS(_variable, _flags) ((_variable) |= (_flags)) > +#define CLEAR_FLAGS(_variable, _flags) ((_variable) &= ~(_flags)) > + > +#define SM_FLAG(port, flag) (!!CHECK_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag)) > +#define SM_FLAG_SET(port, flag) SET_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag) > +#define SM_FLAG_CLR(port, flag) CLEAR_FLAGS((port)->sm_flags, SM_FLAGS_ ## flag) > + > +#define ACTOR_STATE(port, flag) (!!CHECK_FLAGS((port)->actor_state, STATE_ ## flag)) > +#define ACTOR_STATE_SET(port, flag) SET_FLAGS((port)->actor_state, STATE_ ## flag) > +#define ACTOR_STATE_CLR(port, flag) CLEAR_FLAGS((port)->actor_state, STATE_ ## flag) > + > +#define PARTNER_STATE(port, flag) (!!CHECK_FLAGS((port)->partner_state, STATE_ ## flag)) > +#define PARTNER_STATE_SET(port, flag) SET_FLAGS((port)->partner_state, STATE_ ## flag) > +#define PARTNER_STATE_CLR(port, flag) CLEAR_FLAGS((port)->partner_state, STATE_ ## flag) > + > +/** Slow protocol LACP frame subtype */ > +#define SLOW_SUBTYPE_LACP 0x01 > + > +/** Slow procotol marker frame subtype */ > +#define SLOW_SUBTYPE_MARKER 0x02 > + > +/** Marker type info request */ > +#define MARKER_TLV_TYPE_MARKER_INFO 0x01 > + > +/** Marker type info response */ > +#define MARKER_TLV_TYPE_MARKER_RESP 0x02 > + > +/** Generic slow protocol structure */ > +struct slow_protocol { > + uint8_t subtype; > + uint8_t reserved_119[119]; > +} __attribute__((__packed__)); > + > +/** Generic slow protocol frame type structure */ > +struct slow_protocol_frame { > + struct ether_hdr eth_hdr; > + struct slow_protocol slow_protocol; > +} __attribute__((__packed__)); > + > +struct port_params { > + uint16_t system_priority; > + /**< System priority (unused in current implementation) */ > + struct ether_addr system; > + /**< System ID - Slave MAC address, same as bonding MAC address */ > + uint16_t key; > + /**< Speed information (implementation dependednt) and duplex. */ > + uint16_t port_priority; > + /**< Priority of this (unused in current implementation) */ > + uint16_t port_number; > + /**< Port number. It corresponds to slave port id. */ > +} __attribute__((__packed__)); > + > +struct lacpdu_actor_partner_params { > + uint8_t tlv_type_info; > + uint8_t info_length; > + struct port_params port_params; > + uint8_t state; > + uint8_t reserved_3[3]; > +} __attribute__((__packed__)); > + > +/** LACPDU structure (5.4.2 in 802.1AX documentation). */ > +struct lacpdu { > + uint8_t subtype; > + uint8_t version_number; > + > + struct lacpdu_actor_partner_params actor; > + struct lacpdu_actor_partner_params partner; > + > + uint8_t tlv_type_collector_info; > + uint8_t collector_info_length; > + uint16_t collector_max_delay; > + uint8_t reserved_12[12]; > + > + uint8_t tlv_type_terminator; > + uint8_t terminator_length; > + uint8_t reserved_50[50]; > +} __attribute__((__packed__)); > + > +/** LACPDU frame: Contains ethernet header and LACPDU. */ > +struct lacpdu_header { > + struct ether_hdr eth_hdr; > + struct lacpdu lacpdu; > +} __attribute__((__packed__)); > + > +struct marker { > + uint8_t subtype; > + uint8_t version_number; > + > + uint8_t tlv_type_marker; > + uint8_t info_length; > + uint16_t requester_port; > + struct ether_addr requester_system; > + uint32_t requester_transaction_id; > + uint8_t reserved_2[2]; > + > + uint8_t tlv_type_terminator; > + uint8_t terminator_length; > + uint8_t reserved_90[90]; > +} __attribute__((__packed__)); > + > +struct marker_header { > + struct ether_hdr eth_hdr; > + struct marker marker; > +} __attribute__((__packed__)); > + > +/** Variables associated with the system (5.4.5 in 802.1AX documentation). */ > +struct system { > + struct ether_addr actor_system; > + /**< The MAC address component of the System Identifier of the System */ > + uint16_t actor_system_priority; > + /**< The System Priority of the System */ > +}; > + > +enum selection { > + UNSELECTED, > + STANDBY, > + SELECTED > +}; > + > +/** Variables associated with each port (5.4.7 in 802.1AX documentation). */ > +struct port { > + /** > + * The operational values of the Actor's state parameters. Bitmask > + * of port states. > + */ > + uint8_t actor_state; > + > + /** The operational Actor's port parameters */ > + struct port_params actor; > + > + /** > + * The operational value of the Actor's view of the current values of > + * the Partner's state parameters. The Actor sets this variable either > + * to the value received from the Partner in an LACPDU, or to the value > + * of Partner_Admin_Port_State. Bitmask of port states. > + */ > + uint8_t partner_state; > + > + /** The operational Partner's port parameters */ > + struct port_params partner; > + > + /* Additional port parameters not listed in documentation */ > + /** State machine flags */ > + uint16_t sm_flags; > + enum selection selected; > + > + uint64_t current_while_timer; > + uint64_t periodic_timer; > + uint64_t wait_while_timer; > + uint64_t tx_machine_timer; > + /* Agregator parameters */ > + /** > + * Index in mode8023ad_data::port_list[] of Aggregator > + * the port is currently attached to. > + */ > + uint16_t agregator_idx; > +}; > + > + > +/** > + * Struct used to comunicate with 8023ad logic. > + */ > +struct slow_protocol_msg { > + struct rte_mbuf *pkt; > + uint8_t port_id; > +}; > + > +/** Data specific to mode 802.1AX */ > +struct mode8023ad_data { > + /** Memory pool used to allocated rings */ > + struct rte_mempool *mbuf_pool; > + > + /** Ring containing free slow_protocol_msg objects. Used to avoid > + * alocating/freeing memory in RX/TX bursts */ > + struct rte_ring *free_ring; > + > + /** Ring of struct slow_protocol_msg from RX burst function */ > + struct rte_ring *rx_ring; > + > + /** Ring of struct slow_protocol_msg to RX burst function */ > + struct rte_ring *tx_ring; > + > + /** list of all enslaved ports in mode 802.1AX */ > + struct port port_list[RTE_MAX_ETHPORTS]; > + > + /** List of offsets in active slaves array used to tansmit packets. */ > + uint8_t distibuting_slaves_offsets[RTE_MAX_ETHPORTS]; > + uint8_t distibuting_slaves_count; > +}; > + > +/* Forward declaration */ > +struct bond_dev_private; > + > +/** > + * Configures 802.1AX mode and all active slaves on bonded interface. > + * > + * @param dev Bonded interface > + * @return > + * 0 on success, negative value otherwise. > + */ > +int > +bond_mode_8023ad_init(struct rte_eth_dev *dev); > + > +/** > + * Deconfigures 802.1AX mode of the bonded interface and slaves. > + * > + * @param dev Bonded interface > + * @return > + * 0 on success, negative value otherwise. > + */ > +int bond_mode_8023ad_disable(struct rte_eth_dev *dev); > + > +/** > + * Starts 802.3AX state machines management logic. > + * @param dev Bonded interface > + * @return > + * 0 if machines was started, 1 if machines was already running, > + * negative value otherwise. > + */ > +int > +bond_mode_8023ad_start(struct rte_eth_dev *dev); > + > +/** > + * Stops 802.3AX state machines management logic. > + * @param dev Bonded interface > + * @return > + * 0 if this call stopped state machines, -ENOENT if alarm was not set. > + */ > +int > +bond_mode_8023ad_stop(struct rte_eth_dev *dev); > + > +/** > + * Passes given slow packet to state machines management logic. > + * @param internals Bonded device private data. > + * @param slave_pos Possition in active slaves array on which this packet was received. > + * @param slot_pkt Slow packet > + */ > +void > +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, > + uint8_t slave_pos, struct rte_mbuf *slot_pkt); > + > +/** > + * Appends and initializes slave active_slaves[slave_num] to use with > + * 802.1AX mode. > + * > + * @pre active_slaves[active_slave_count] must contain valid slave id. > + * @post active_slave_count must be incremented. > + * > + * @param dev Bonded interface. > + * > + * @return > + * 0 on success, negative value otherwise. > + */ > +void > +bond_mode_8023ad_slave_append(struct rte_eth_dev *dev); > + > +/** > + * Denitializes and removes given slave from 802.1AX mode. > + * > + * @pre active_slaves[slave_num] must contain valid slave id corresponding to > + * slave initialized in 802.1AX mode. > + * @post active_slaves[slave_num] must be removed. > + * > + * @param dev Bonded interface. > + * @param slave_num Position of slave in active_slaves array > + * > + * @return > + * 0 on success, negative value otherwise. > + * > + */ > +int > +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos); > + > +/** > + * Converts port_number from network byte order to port id. > + * > + * @param port_number The 8023ad port number to convert. > + * @return corresponding slave id > + */ > +static inline uint8_t > +port_number_to_slave_id(uint16_t port_number) > +{ > + uint16_t port_id = rte_be_to_cpu_16(port_number); > + /* Standard requires that port number must be grater than 0. > + * Substract 1 to get corresponding slave id */ > + return port_id - 1; > +} > + > +/** > + * Converts port id to mode 8023ad port number. > + * > + * @param slave_id Id of slave to convert. > + * @return corresponding Port number in network byte order. > + */ > +static inline uint16_t > +slave_id_to_port_number(uint8_t slave_id) > +{ > + /* Standard requires that port ID must be grater than 0. > + * Add 1 do get corresponding port_number */ > + uint16_t port_number = (uint16_t)slave_id + 1; > + return rte_cpu_to_be_16(port_number); > +} > + > +#endif /* RTE_ETH_BOND_8023AD_H_ */ > diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c > index c690ceb..c547164 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_api.c > +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c > @@ -31,6 +31,8 @@ > * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > */ > > +#include <string.h> > + > #include <rte_mbuf.h> > #include <rte_malloc.h> > #include <rte_ethdev.h> > @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id) > return 0; > } > > +void > +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) > +{ > + struct bond_dev_private *internals = eth_dev->data->dev_private; > + uint8_t active_count = internals->active_slave_count; > + > + internals->active_slaves[active_count] = port_id; > + > + if (internals->mode == BONDING_MODE_8023AD) > + bond_mode_8023ad_slave_append(eth_dev); > + > + internals->active_slave_count = active_count + 1; > +} > + > +void > +deactivate_slave(struct rte_eth_dev *eth_dev, > + uint8_t slave_pos) > +{ > + struct bond_dev_private *internals = eth_dev->data->dev_private; > + uint8_t active_count = internals->active_slave_count; > + > + if (internals->mode == BONDING_MODE_8023AD) > + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos); > + > + active_count--; > + > + /* If slave was not at the end of the list > + * shift active slaves up active array list */ > + if (slave_pos < active_count) { > + memmove(internals->active_slaves + slave_pos, > + internals->active_slaves + slave_pos + 1, > + (active_count - slave_pos) * > + sizeof(internals->active_slaves[0])); > + } > + > + internals->active_slave_count = active_count; > +} > + > uint8_t > number_of_sockets(void) > { > @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) > eth_dev->dev_ops = &default_dev_ops; > eth_dev->pci_dev = pci_dev; > > - if (bond_ethdev_mode_set(eth_dev, mode)) { > - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", > - eth_dev->data->port_id, mode); > - goto err; > - } > - > + internals->port_id = eth_dev->data->port_id; > + internals->mode = BONDING_MODE_INVALID; > internals->current_primary_port = 0; > internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; > internals->user_defined_mac = 0; > @@ -241,6 +277,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) > memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); > memset(internals->slaves, 0, sizeof(internals->slaves)); > > + if (bond_ethdev_mode_set(eth_dev, mode)) { > + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", > + eth_dev->data->port_id, mode); > + goto err; > + } > + > return eth_dev->data->port_id; > > err: > @@ -348,14 +390,12 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) > rte_eth_link_get_nowait(slave_port_id, &link_props); > > if (link_props.link_status == 1) > - internals->active_slaves[internals->active_slave_count++] = > - slave_port_id; > + activate_slave(bonded_eth_dev, slave_port_id); > } > return 0; > > } > > - > int > rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) > { > @@ -380,31 +420,26 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) > return retval; > } > > - > static int > __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) > { > + struct rte_eth_dev *bonded_eth_dev; > struct bond_dev_private *internals; > > - int i, slave_idx = -1; > + int i, slave_idx; > > if (valid_slave_port_id(slave_port_id) != 0) > return -1; > > - internals = rte_eth_devices[bonded_port_id].data->dev_private; > + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; > + internals = bonded_eth_dev->data->dev_private; > > /* first remove from active slave list */ > - for (i = 0; i < internals->active_slave_count; i++) { > - if (internals->active_slaves[i] == slave_port_id) > - slave_idx = i; > + slave_idx = find_slave_by_id(internals->active_slaves, internals->active_slave_count, > + slave_port_id); > > - /* shift active slaves up active array list */ > - if (slave_idx >= 0 && i < (internals->active_slave_count - 1)) > - internals->active_slaves[i] = internals->active_slaves[i+1]; > - } > - > - if (slave_idx >= 0) > - internals->active_slave_count--; > + if (slave_idx < internals->active_slave_count) > + deactivate_slave(bonded_eth_dev, slave_idx); > > slave_idx = -1; > /* now find in slave list */ > @@ -538,6 +573,7 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id) > > return internals->current_primary_port; > } > + > int > rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len) > { > @@ -673,7 +709,6 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id) > return internals->balance_xmit_policy; > } > > - > int > rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms) > { > @@ -729,7 +764,6 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id) > return internals->link_down_delay_ms; > } > > - > int > rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms) > > diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c > index bbbc69b..a0be0e6 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_args.c > +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c > @@ -171,6 +171,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, > case BONDING_MODE_ACTIVE_BACKUP: > case BONDING_MODE_BALANCE: > case BONDING_MODE_BROADCAST: > + case BONDING_MODE_8023AD: > return 0; > default: > RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); > diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c > index 6d0fb1b..13630d9 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c > +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c > @@ -44,6 +44,7 @@ > > #include "rte_eth_bond.h" > #include "rte_eth_bond_private.h" > +#include "rte_eth_bond_8023ad.h" > > static uint16_t > bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) > @@ -168,6 +169,56 @@ bond_ethdev_tx_burst_active_backup(void *queue, > bufs, nb_pkts); > } > > +static uint16_t > +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, > + uint16_t nb_pkts) > +{ > + /* Cast to structure, containing bonded device's port id and queue id */ > + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; > + struct bond_dev_private *internals = bd_rx_q->dev_private; > + struct mode8023ad_data *mode4 = &internals->mode4; > + struct ether_addr bond_mac; > + > + struct ether_hdr *hdr; > + struct rte_mbuf *pkts[nb_pkts + 1]; /* one packet more for slow packet */ > + > + uint16_t num_rx_slave = 0; /* Number of packet received on current slave */ > + uint16_t num_rx_total = 0; /* Total number of received packets */ > + > + uint8_t i, j; > + > + rte_eth_macaddr_get(internals->port_id, &bond_mac); > + > + for (i = 0; i < internals->active_slave_count && num_rx_total < nb_pkts; i++) { > + /* Read packets from this slave */ > + num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i], > + bd_rx_q->queue_id, pkts, nb_pkts + 1 - num_rx_total); > + > + /* Separate slow protocol packets from other packets */ > + for (j = 0; j < num_rx_slave; j++) { > + hdr = rte_pktmbuf_mtod(pkts[j], struct ether_hdr *); > + > + uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type); > + if (unlikely(ether_type == ETHER_TYPE_SLOW)) { > + bond_mode_8023ad_handle_slow_pkt(internals, i, pkts[j]); > + continue; > + } > + > + /* Check if we can receive this packet. Also filter packets if > + * bonding interface is not in promiscuous mode (slaves are always > + * in promiscuous mode). */ > + if (likely(ACTOR_STATE(&mode4->port_list[i], COLLECTING)) && > + likely(internals->promiscuous_en || > + is_same_ether_addr(&bond_mac, &hdr->d_addr))) { > + bufs[num_rx_total++] = pkts[j]; > + } else > + rte_pktmbuf_free(pkts[j]); > + } > + } > + > + return num_rx_total; > +} > + > static inline uint16_t > ether_hash(struct ether_hdr *eth_hdr) > { > @@ -350,6 +401,126 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, > } > > static uint16_t > +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, > + uint16_t nb_pkts) > +{ > + struct bond_dev_private *internals; > + struct mode8023ad_data *mode4; > + struct bond_tx_queue *bd_tx_q; > + > + uint8_t num_of_slaves; > + uint8_t slaves[RTE_MAX_ETHPORTS]; > + /* possitions in slaves, not ID */ > + uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; > + uint8_t distributing_slaves_count; > + > + uint16_t num_tx_slave, num_tx_total = 0, tx_fail_total = 0; > + uint16_t i, op_slave_idx; > + > + /* Slow packets from 802.3AX state machines. */ > + struct slow_protocol_msg *slow_msg; > + > + /* Allocate one additional packet in case 8023AD mode. > + * First element if not NULL is slow packet. */ > + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts + 1]; > + /* Total amount of packets in slave_bufs */ > + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; > + /* Array of slow packets placed in each slave */ > + uint8_t slave_slow_packets[RTE_MAX_ETHPORTS] = { 0 }; > + > + bd_tx_q = (struct bond_tx_queue *)queue; > + internals = bd_tx_q->dev_private; > + mode4 = &internals->mode4; > + > + /* Copy slave list to protect against slave up/down changes during tx > + * bursting */ > + num_of_slaves = internals->active_slave_count; > + if (num_of_slaves < 1) > + return num_tx_total; > + > + memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); > + > + distributing_slaves_count = mode4->distibuting_slaves_count; > + memcpy(distributing_offsets, mode4->distibuting_slaves_offsets, > + sizeof(slaves[0]) * distributing_slaves_count); > + > + for (i = 0; i < num_of_slaves; i++) > + slave_bufs[i][0] = NULL; > + > + /* It is likely that tx ring will be empty. If it is not empty, it is > + * likely that there will be only one frame. */ > + while (unlikely(!rte_ring_empty(mode4->tx_ring)) && > + rte_ring_dequeue(mode4->tx_ring, (void **)&slow_msg) != -ENOENT) { > + i = find_slave_by_id(slaves, num_of_slaves, slow_msg->port_id); > + > + /* Assign slow packet to slave or drop it if slave is not in active list > + * (ex: link down). */ > + if (likely(i < num_of_slaves)) { > + /* If there is more than one slow packet to the same slave, send > + * only latest, and drop previouse - tx burst was no called quick > + * enough. */ > + if (slave_bufs[i][0] != NULL) > + rte_pktmbuf_free(slave_bufs[i][0]); > + > + slave_bufs[i][0] = slow_msg->pkt; > + slave_nb_pkts[i] = 1; > + slave_slow_packets[i] = 1; > + } else > + rte_pktmbuf_free(slow_msg->pkt); > + > + rte_ring_enqueue(mode4->free_ring, slow_msg); > + } > + > + if (likely(distributing_slaves_count > 0)) { > + /* Populate slaves mbuf with the packets which are to be sent on it */ > + for (i = 0; i < nb_pkts; i++) { > + /* Select output slave using hash based on xmit policy */ > + op_slave_idx = xmit_slave_hash(bufs[i], distributing_slaves_count, > + internals->balance_xmit_policy); > + > + /* Populate slave mbuf arrays with mbufs for that slave. Use only > + * slaves that are currently distributing. */ > + uint8_t slave_offset = distributing_offsets[op_slave_idx]; > + uint16_t pkt_pos = slave_nb_pkts[slave_offset]; > + slave_nb_pkts[slave_offset]++; > + > + slave_bufs[slave_offset][pkt_pos] = bufs[i]; > + } > + } > + > + /* Send packet burst on each slave device */ > + for (i = 0; i < num_of_slaves; i++) { > + if (slave_nb_pkts[i] > 0) { > + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, > + slave_bufs[i], slave_nb_pkts[i]); > + > + /* if tx burst fails move packets to end of bufs */ > + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { > + uint16_t slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; > + > + /* Free slow packet if it exists and not send. */ > + if (slave_slow_packets[i] != 0 && num_tx_slave == 0) { > + rte_pktmbuf_free(slave_bufs[i][0]); > + slave_tx_fail_count--; > + } > + > + tx_fail_total += slave_tx_fail_count; > + memcpy(bufs[nb_pkts - tx_fail_total], > + slave_bufs[i][num_tx_slave], > + slave_tx_fail_count); > + } > + > + if (num_tx_slave > 0) > + num_tx_slave -= slave_slow_packets[i]; > + > + num_tx_total += num_tx_slave; > + } > + } > + > + return num_tx_total; > +} > + > +static uint16_t > bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, > uint16_t nb_pkts) > { > @@ -448,6 +619,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link, > } > > int > +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) > +{ > + struct ether_addr *mac_addr; > + > + mac_addr = eth_dev->data->mac_addrs; > + > + if (eth_dev == NULL) { > + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); > + return -1; > + } > + > + if (dst_mac_addr == NULL) { > + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); > + return -1; > + } > + > + ether_addr_copy(mac_addr, dst_mac_addr); > + return 0; > +} > + > +int > mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) > { > struct ether_addr *mac_addr; > @@ -455,7 +647,7 @@ mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr) > mac_addr = eth_dev->data->mac_addrs; > > if (eth_dev == NULL) { > - RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); > + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); > return -1; > } > > @@ -494,6 +686,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) > } > } > break; > + case BONDING_MODE_8023AD: > + break; > case BONDING_MODE_ACTIVE_BACKUP: > default: > for (i = 0; i < internals->slave_count; i++) { > @@ -544,6 +738,13 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) > eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; > eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; > break; > + case BONDING_MODE_8023AD: > + if (bond_mode_8023ad_init(eth_dev) != 0) > + return -1; > + > + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; > + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; > + break; > default: > return -1; > } > @@ -751,6 +952,8 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) > if (internals->user_defined_primary_port) > bond_ethdev_primary_set(internals, internals->primary_port); > > + if (internals->mode == BONDING_MODE_8023AD) > + bond_mode_8023ad_start(eth_dev); > > if (internals->link_status_polling_enabled) > rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, > @@ -765,6 +968,25 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) > { > struct bond_dev_private *internals = eth_dev->data->dev_private; > > + if (internals->mode == BONDING_MODE_8023AD) { > + struct mode8023ad_data *data = &internals->mode4; > + struct slow_protocol_msg *msg; > + > + bond_mode_8023ad_stop(eth_dev); > + data->distibuting_slaves_count = 0; > + > + /* Discard all messages to/from mode 4 state machines */ > + while (rte_ring_dequeue(data->rx_ring, (void **)&msg) != -ENOENT) { > + rte_pktmbuf_free(msg->pkt); > + rte_ring_enqueue(data->free_ring, msg); > + } > + > + while (rte_ring_dequeue(data->tx_ring, (void **)&msg) != -ENOENT) { > + rte_pktmbuf_free(msg->pkt); > + rte_ring_enqueue(data->free_ring, msg); > + } > + } > + > internals->active_slave_count = 0; > internals->link_status_polling_enabled = 0; > > @@ -832,7 +1054,7 @@ bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, > 0, dev->pci_dev->numa_node); > > if (bd_tx_q == NULL) > - return -1; > + return -1; > > bd_tx_q->queue_id = tx_queue_id; > bd_tx_q->dev_private = dev->data->dev_private; > @@ -863,7 +1085,6 @@ bond_ethdev_tx_queue_release(void *queue) > rte_free(queue); > } > > - > static void > bond_ethdev_slave_link_status_change_monitor(void *cb_arg) > { > @@ -884,7 +1105,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg) > > /* If device is currently being configured then don't check slaves link > * status, wait until next period */ > - if (rte_spinlock_trylock(&internals->lock)){ > + if (rte_spinlock_trylock(&internals->lock)) { > for (i = 0; i < internals->slave_count; i++) { > if (internals->slaves[i].link_status_polling_enabled) { > slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; > @@ -1002,11 +1223,13 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) > for (i = 0; i < internals->slave_count; i++) > rte_eth_promiscuous_enable(internals->slaves[i].port_id); > break; > + /* In mode4 promiscus mode is managed when slave is added/removed */ > + case BONDING_MODE_8023AD: > + break; > /* Promiscuous mode is propagated only to primary slave */ > case BONDING_MODE_ACTIVE_BACKUP: > default: > rte_eth_promiscuous_enable(internals->current_primary_port); > - > } > } > > @@ -1017,7 +1240,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) > int i; > > internals->promiscuous_en = 0; > - > + > switch (internals->mode) { > /* Promiscuous mode is propagated to all slaves */ > case BONDING_MODE_ROUND_ROBIN: > @@ -1026,6 +1249,9 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) > for (i = 0; i < internals->slave_count; i++) > rte_eth_promiscuous_disable(internals->slaves[i].port_id); > break; > + /* In mode4 promiscus mode is set managed when slave is added/removed */ > + case BONDING_MODE_8023AD: > + break; > /* Promiscuous mode is propagated only to primary slave */ > case BONDING_MODE_ACTIVE_BACKUP: > default: > @@ -1051,7 +1277,8 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, > struct bond_dev_private *internals; > struct rte_eth_link link; > > - int i, valid_slave = 0, active_pos = -1; > + int i, valid_slave = 0; > + uint8_t active_pos; > uint8_t lsc_flag = 0; > > if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) > @@ -1081,16 +1308,12 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, > return; > > /* Search for port in active port list */ > - for (i = 0; i < internals->active_slave_count; i++) { > - if (port_id == internals->active_slaves[i]) { > - active_pos = i; > - break; > - } > - } > + active_pos = find_slave_by_id(internals->active_slaves, > + internals->active_slave_count, port_id); > > rte_eth_link_get_nowait(port_id, &link); > if (link.link_status) { > - if (active_pos >= 0) > + if (active_pos < internals->active_slave_count) > return; > > /* if no active slave ports then set this port to be primary port */ > @@ -1104,21 +1327,19 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, > link_properties_set(bonded_eth_dev, > &(slave_eth_dev->data->dev_link)); > } > - internals->active_slaves[internals->active_slave_count++] = port_id; > + > + activate_slave(bonded_eth_dev, port_id); > > /* If user has defined the primary port then default to using it */ > if (internals->user_defined_primary_port && > internals->primary_port == port_id) > bond_ethdev_primary_set(internals, port_id); > } else { > - if (active_pos < 0) > + if (active_pos == internals->active_slave_count) > return; > > /* Remove from active slave list */ > - for (i = active_pos; i < (internals->active_slave_count - 1); i++) > - internals->active_slaves[i] = internals->active_slaves[i+1]; > - > - internals->active_slave_count--; > + deactivate_slave(bonded_eth_dev, active_pos); > > /* No active slaves, change link status to down and reset other > * link properties */ > diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h > index 6db5144..77f7bb0 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_private.h > +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h > @@ -42,6 +42,7 @@ extern "C" { > #include <rte_spinlock.h> > > #include "rte_eth_bond.h" > +#include "rte_eth_bond_8023ad.h" > > #define PMD_BOND_SLAVE_PORT_KVARG ("slave") > #define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary") > @@ -60,6 +61,8 @@ extern "C" { > #define RTE_BOND_LOG(lvl, msg, ...) \ > RTE_LOG(lvl, PMD, "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__); > > +#define BONDING_MODE_INVALID 0xFF > + > extern const char *pmd_bond_init_valid_arguments[]; > > extern const char *driver_name; > @@ -89,7 +92,13 @@ struct bond_tx_queue { > /**< Copy of TX configuration structure for queue */ > }; > > - > +/** Persisted Slave Configuration Structure */ > +struct slave_conf { > + uint8_t port_id; > + /**< Port Id of slave eth_dev */ > + struct ether_addr mac_addr; > + /**< Slave eth_dev original MAC address */ > +}; > /** Bonded slave devices structure */ > struct bond_ethdev_slave_ports { > uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */ > @@ -124,7 +133,7 @@ struct bond_dev_private { > uint8_t user_defined_mac; > /**< Flag for whether MAC address is user defined or not */ > uint8_t promiscuous_en; > - /**< Enabled/disable promiscuous mode on slave devices */ > + /**< Enabled/disable promiscuous mode on bonding device */ > uint8_t link_props_set; > /**< flag to denote if the link properties are set */ > > @@ -143,6 +152,9 @@ struct bond_dev_private { > uint8_t slave_count; /**< Number of bonded slaves */ > struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; > /**< Arary of bonded slaves details */ > + > + struct mode8023ad_data mode4; > + /**< Mode 4 private data */ > }; > > extern struct eth_dev_ops default_dev_ops; > @@ -150,6 +162,21 @@ extern struct eth_dev_ops default_dev_ops; > int > valid_bonded_ethdev(struct rte_eth_dev *eth_dev); > > +/* Search given slave array to find possition of given id. > + * Return slave pos or slaves_count if not found. */ > +static inline uint8_t > +find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, > + uint8_t slave_id ) { > + > + uint8_t pos; > + for (pos = 0; pos < slaves_count; pos++) { > + if (slave_id == slaves[pos]) > + break; > + } > + > + return pos; > +} > + > int > valid_port_id(uint8_t port_id); > > @@ -160,6 +187,14 @@ int > valid_slave_port_id(uint8_t port_id); > > void > +deactivate_slave(struct rte_eth_dev *eth_dev, > + uint8_t slave_pos ); > + > +void > +activate_slave(struct rte_eth_dev *eth_dev, > + uint8_t port_id ); > + > +void > link_properties_set(struct rte_eth_dev *bonded_eth_dev, > struct rte_eth_link *slave_dev_link); > void > @@ -173,6 +208,9 @@ int > mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr); > > int > +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr); > + > +int > mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev); > > uint8_t > -- > 1.7.9.5 > >