From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 9C2013772 for ; Tue, 9 Jan 2018 12:39:07 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 09 Jan 2018 03:39:06 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,335,1511856000"; d="scan'208";a="8995430" Received: from dwdohert-dpdk.ir.intel.com ([163.33.210.152]) by orsmga007.jf.intel.com with ESMTP; 09 Jan 2018 03:39:04 -0800 From: Declan Doherty To: dev@dpdk.org Cc: ferruh.yigit@intel.com, Declan Doherty , Keith Wiles Date: Tue, 9 Jan 2018 11:34:09 +0000 Message-Id: <20180109113409.27699-1-declan.doherty@intel.com> X-Mailer: git-send-email 2.9.4 In-Reply-To: <20171201000405.27870-1-declan.doherty@intel.com> References: <20171201000405.27870-1-declan.doherty@intel.com> Subject: [dpdk-dev] [PATCH v2] net/bond: burst mode hash calculation X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 09 Jan 2018 11:39:08 -0000 change the xmit_hash functions to handle bursts of packet instead of single packets at a time, and update the affected tx_burst functions. Signed-off-by: Declan Doherty Signed-off-by: Keith Wiles --- V2: rebased to HEAD of next-net drivers/net/bonding/rte_eth_bond_api.c | 3 - drivers/net/bonding/rte_eth_bond_pmd.c | 580 ++++++++++++++++++----------- drivers/net/bonding/rte_eth_bond_private.h | 22 +- 3 files changed, 369 insertions(+), 236 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c index 703bb39..ed880cd 100644 --- a/drivers/net/bonding/rte_eth_bond_api.c +++ b/drivers/net/bonding/rte_eth_bond_api.c @@ -694,15 +694,12 @@ rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy) switch (policy) { case BALANCE_XMIT_POLICY_LAYER2: internals->balance_xmit_policy = policy; - internals->xmit_hash = xmit_l2_hash; break; case BALANCE_XMIT_POLICY_LAYER23: internals->balance_xmit_policy = policy; - internals->xmit_hash = xmit_l23_hash; break; case BALANCE_XMIT_POLICY_LAYER34: internals->balance_xmit_policy = policy; - internals->xmit_hash = xmit_l34_hash; break; default: diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index d1d3663..f66861e 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -309,87 +309,114 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, static uint16_t bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, op_slave_idx; + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + /* 2-D array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - if (unlikely(nb_pkts == 0)) - return 0; + uint16_t i, j; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * - num_of_slaves); + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); + + + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; } - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], - distributing_count); + if (unlikely(dist_slave_count < 1)) + return 0; - /* Populate slave mbuf arrays with mbufs for that slave. - * Use only slaves that are currently distributing. - */ - uint8_t slave_offset = - distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = - bufs[i]; - slave_nb_pkts[slave_offset]++; - } + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; } + /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) + for (i = 0; i < dist_slave_count; i++) { + if (slave_nb_bufs[i] == 0) continue; - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); - num_tx_total += num_tx_slave; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + total_tx_count += slave_tx_count; /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, - num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow reordering + * later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) { + slave_bufs[i][j] = + slave_bufs[i][(slave_tx_count - 1) + j]; + } } } - return num_tx_total; + /* + * If there are tx burst failures we move packets to end of bufs to + * preserve expected PMD behaviour of all failed transmitted being + * at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; j < slave_tx_fail_count[i]; j++) + bufs[bufs_idx++] = slave_bufs[i][j]; + } + } + } + + return total_tx_count; } @@ -788,96 +815,129 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr) (word_src_addr[3] ^ word_dst_addr[3]); } -uint16_t -xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count) + +void +burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); + struct ether_hdr *eth_hdr; + uint32_t hash; + int i; - uint32_t hash = ether_hash(eth_hdr); + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + + hash = ether_hash(eth_hdr); - return (hash ^= hash >> 8) % slave_count; + slaves[i++] = (hash ^= hash >> 8) % slave_count; + } } -uint16_t -xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count) +void +burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); - uint32_t hash, l3hash = 0; + uint16_t i; + struct ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + uint32_t hash, l3hash; - hash = ether_hash(eth_hdr); + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + l3hash = 0; - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv4_hash(ipv4_hdr); + proto = eth_hdr->ether_type; + hash = ether_hash(eth_hdr); - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - } + vlan_offset = get_vlan_offset(eth_hdr, &proto); - hash = hash ^ l3hash; - hash ^= hash >> 16; - hash ^= hash >> 8; + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv4_hash(ipv4_hdr); - return hash % slave_count; -} + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + } -uint16_t -xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count) -{ - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); + hash = hash ^ l3hash; + hash ^= hash >> 16; + hash ^= hash >> 8; - struct udp_hdr *udp_hdr = NULL; - struct tcp_hdr *tcp_hdr = NULL; - uint32_t hash, l3hash = 0, l4hash = 0; + slaves[i++] = hash % slave_count; + } +} - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - size_t ip_hdr_offset; +void +burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) +{ + struct ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + int i; - l3hash = ipv4_hash(ipv4_hdr); + struct udp_hdr *udp_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t hash, l3hash, l4hash; - /* there is no L4 header in fragmented packet */ - if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) { - ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + proto = eth_hdr->ether_type; + vlan_offset = get_vlan_offset(eth_hdr, &proto); + l3hash = 0; + l4hash = 0; + + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + size_t ip_hdr_offset; + + l3hash = ipv4_hash(ipv4_hdr); + + /* there is no L4 header in fragmented packet */ + if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) + == 0)) { + ip_hdr_offset = (ipv4_hdr->version_ihl + & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; - if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv4_hdr->next_proto_id == + IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(udp_hdr); + } + } + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + + if (ipv6_hdr->proto == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); + } else if (ipv6_hdr->proto == IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); l4hash = HASH_L4_PORTS(udp_hdr); } } - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - - if (ipv6_hdr->proto == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv6_hdr->proto == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(udp_hdr); - } - } - hash = l3hash ^ l4hash; - hash ^= hash >> 16; - hash ^= hash >> 8; + hash = l3hash ^ l4hash; + hash ^= hash >> 16; + hash ^= hash >> 8; - return hash % slave_count; + slaves[i++] = hash % slave_count; + } } struct bwg_slave { @@ -1185,167 +1245,239 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) static uint16_t bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0; + /* Array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - int i, op_slave_id; + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t i, j; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * num_of_slaves); + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - if (num_of_slaves < 1) - return num_tx_total; + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves); + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; - /* Populate slave mbuf arrays with mbufs for that slave */ - slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i]; + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; } /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] > 0) { - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + for (i = 0; i < slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; - /* if tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); - tx_fail_total += slave_tx_fail_count; - memcpy(&bufs[nb_pkts - tx_fail_total], - &slave_bufs[i][num_tx_slave], - slave_tx_fail_count * sizeof(bufs[0])); + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow reordering + * later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) { + slave_bufs[i][j] = + slave_bufs[i][(slave_tx_count - 1) + j]; } + } + } - num_tx_total += num_tx_slave; + /* + * If there are tx burst failures we move packets to end of bufs to + * preserve expected PMD behaviour of all failed transmitted being + * at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; j < slave_tx_fail_count[i]; j++) + bufs[bufs_idx++] = slave_bufs[i][j]; + } } } - return num_tx_total; + return total_tx_count; } static uint16_t bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, op_slave_idx; + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; - /* Allocate additional packets in case 8023AD mode. */ - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + /* 2-D array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ - uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + uint16_t i, j; + + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; } - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], distributing_count); + if (likely(dist_slave_count > 1)) { - /* Populate slave mbuf arrays with mbufs for that slave. Use only - * slaves that are currently distributing. */ - uint8_t slave_offset = distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i]; - slave_nb_pkts[slave_offset]++; - } - } + /* + * Populate slaves mbuf with the packets which are to be sent + * on it, selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, + bufs_slave_port_idxs); - /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) - continue; + for (i = 0; i < nb_bufs; i++) { + /* + * Populate slave mbuf arrays with mbufs for that + * slave + */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = + bufs[i]; + } - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); - /* If tx burst fails drop slow packets */ - for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++) - rte_pktmbuf_free(slave_bufs[i][num_tx_slave]); + /* Send packet burst on each slave device */ + for (i = 0; i < dist_slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; - num_tx_total += num_tx_slave - slave_slow_nb_pkts[i]; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + slave_tx_count = rte_eth_tx_burst( + dist_slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); + + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow + * reordering later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) + slave_bufs[i][j] = + slave_bufs[i] + [(slave_tx_count - 1) + + j]; + } + } - /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; + /* + * If there are tx burst failures we move packets to end of + * bufs to preserve expected PMD behaviour of all failed + * transmitted being at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; + j < slave_tx_fail_count[i]; + j++) { + bufs[bufs_idx++] = + slave_bufs[i][j]; + } + } + } } } /* Check for LACP control packets and send if available */ - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; struct rte_mbuf *ctrl_pkt = NULL; - int pkt_avail = rte_ring_dequeue(port->tx_ring, - (void **)&ctrl_pkt); + if (likely(rte_ring_empty(port->tx_ring))) + continue; + + rte_ring_dequeue(port->tx_ring, (void **)&ctrl_pkt); - if (unlikely(pkt_avail == 0)) { - num_tx_slave = rte_eth_tx_burst(slaves[i], + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], bd_tx_q->queue_id, &ctrl_pkt, 1); - /* - * re-enqueue LAG control plane packets to buffering - * ring if transmission fails so the packet isn't lost. - */ - if (num_tx_slave != nb_pkts) - rte_ring_enqueue(port->tx_ring, ctrl_pkt); - } + /* + * re-enqueue LAG control plane packets to buffering + * ring if transmission fails so the packet isn't lost. + */ + if (slave_tx_count != 1) + rte_ring_enqueue(port->tx_ring, ctrl_pkt); } - return num_tx_total; + return total_tx_count; } static uint16_t @@ -2780,7 +2912,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode) internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = RTE_MAX_ETHPORTS + 1; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; - internals->xmit_hash = xmit_l2_hash; + internals->burst_xmit_hash = burst_xmit_l2_hash; internals->user_defined_mac = 0; internals->link_status_polling_enabled = 0; diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h index a5cfa6a..3ee8edc 100644 --- a/drivers/net/bonding/rte_eth_bond_private.h +++ b/drivers/net/bonding/rte_eth_bond_private.h @@ -109,8 +109,8 @@ struct bond_slave_details { uint16_t reta_size; }; - -typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count); +typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves); /** Link Bonding PMD device private configuration Structure */ struct bond_dev_private { @@ -126,7 +126,7 @@ struct bond_dev_private { uint8_t balance_xmit_policy; /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */ - xmit_hash_t xmit_hash; + burst_xmit_hash_t burst_xmit_hash; /**< Transmit policy hash function */ uint8_t user_defined_mac; @@ -248,14 +248,18 @@ void slave_add(struct bond_dev_private *internals, struct rte_eth_dev *slave_eth_dev); -uint16_t -xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count); +void +burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves); -uint16_t -xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count); +void +burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves); + +void +burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves); -uint16_t -xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count); void bond_ethdev_primary_set(struct bond_dev_private *internals, -- 2.9.4