* [dpdk-dev] [PATCH] net/bnxt: optimizations for Tx completion handling
@ 2021-03-06 15:19 Lance Richardson
2021-03-12 15:23 ` Ajit Khaparde
0 siblings, 1 reply; 2+ messages in thread
From: Lance Richardson @ 2021-03-06 15:19 UTC (permalink / raw)
To: Ajit Khaparde, Somnath Kotur, Jerin Jacob, Ruifeng Wang,
Bruce Richardson, Konstantin Ananyev
Cc: dev
[-- Attachment #1: Type: text/plain, Size: 14643 bytes --]
Avoid copying mbuf pointers to separate array for bulk
mbuf free when handling transmit completions for vector
mode transmit.
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
---
drivers/net/bnxt/bnxt_ethdev.c | 4 +-
drivers/net/bnxt/bnxt_ring.c | 2 +-
drivers/net/bnxt/bnxt_rxtx_vec_common.h | 89 +++++++++++++++----------
drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 5 +-
drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 7 +-
drivers/net/bnxt/bnxt_txq.c | 8 +--
drivers/net/bnxt/bnxt_txr.c | 68 ++++++++++---------
drivers/net/bnxt/bnxt_txr.h | 7 +-
8 files changed, 106 insertions(+), 84 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 88da345034..d4028e2bb2 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3186,7 +3186,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
struct bnxt_tx_ring_info *txr;
struct bnxt_cp_ring_info *cpr;
- struct bnxt_sw_tx_bd *tx_buf;
+ struct rte_mbuf **tx_buf;
struct tx_pkt_cmpl *txcmp;
uint32_t cons, cp_cons;
int rc;
@@ -3216,7 +3216,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
return RTE_ETH_TX_DESC_UNAVAIL;
}
tx_buf = &txr->tx_buf_ring[cons];
- if (tx_buf->mbuf == NULL)
+ if (*tx_buf == NULL)
return RTE_ETH_TX_DESC_DONE;
return RTE_ETH_TX_DESC_FULL;
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 997dcdc28b..e4055fa49b 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -230,7 +230,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
tx_ring->vmem =
(void **)((char *)mz->addr + tx_vmem_start);
tx_ring_info->tx_buf_ring =
- (struct bnxt_sw_tx_bd *)tx_ring->vmem;
+ (struct rte_mbuf **)tx_ring->vmem;
}
}
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
index 91ff6736b1..9b9489a695 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
@@ -100,57 +100,78 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
* is enabled.
*/
static inline void
-bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts)
+bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
{
struct bnxt_tx_ring_info *txr = txq->tx_ring;
- struct rte_mbuf **free = txq->free;
uint16_t cons, raw_cons = txr->tx_raw_cons;
- unsigned int blk = 0;
- uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
-
- while (nr_pkts--) {
- struct bnxt_sw_tx_bd *tx_buf;
-
- cons = raw_cons++ & ring_mask;
- tx_buf = &txr->tx_buf_ring[cons];
- free[blk++] = tx_buf->mbuf;
- tx_buf->mbuf = NULL;
+ uint32_t ring_mask, ring_size, num;
+ struct rte_mempool *pool;
+
+ ring_mask = txr->tx_ring_struct->ring_mask;
+ ring_size = txr->tx_ring_struct->ring_size;
+
+ cons = raw_cons & ring_mask;
+ num = RTE_MIN(nr_pkts, ring_size - cons);
+ pool = txr->tx_buf_ring[cons]->pool;
+
+ rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num);
+ memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *));
+ raw_cons += num;
+ num = nr_pkts - num;
+ if (num) {
+ cons = raw_cons & ring_mask;
+ rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons],
+ num);
+ memset(&txr->tx_buf_ring[cons], 0,
+ num * sizeof(struct rte_mbuf *));
+ raw_cons += num;
}
- if (blk)
- rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
txr->tx_raw_cons = raw_cons;
}
static inline void
-bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
+bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
{
struct bnxt_tx_ring_info *txr = txq->tx_ring;
- struct rte_mbuf **free = txq->free;
uint16_t cons, raw_cons = txr->tx_raw_cons;
- unsigned int blk = 0;
- uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
+ uint32_t ring_mask, ring_size, num, blk;
+ struct rte_mempool *pool;
- while (nr_pkts--) {
- struct bnxt_sw_tx_bd *tx_buf;
- struct rte_mbuf *mbuf;
+ ring_mask = txr->tx_ring_struct->ring_mask;
+ ring_size = txr->tx_ring_struct->ring_size;
- cons = raw_cons++ & ring_mask;
- tx_buf = &txr->tx_buf_ring[cons];
- mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
- if (unlikely(mbuf == NULL))
- continue;
- tx_buf->mbuf = NULL;
+ while (nr_pkts) {
+ struct rte_mbuf *mbuf;
- if (blk && mbuf->pool != free[0]->pool) {
- rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
- blk = 0;
+ cons = raw_cons & ring_mask;
+ num = RTE_MIN(nr_pkts, ring_size - cons);
+ pool = txr->tx_buf_ring[cons]->pool;
+
+ blk = 0;
+ do {
+ mbuf = txr->tx_buf_ring[cons + blk];
+ mbuf = rte_pktmbuf_prefree_seg(mbuf);
+ if (!mbuf || mbuf->pool != pool)
+ break;
+ blk++;
+ } while (blk < num);
+
+ if (blk) {
+ rte_mempool_put_bulk(pool,
+ (void **)&txr->tx_buf_ring[cons],
+ blk);
+ memset(&txr->tx_buf_ring[cons], 0,
+ blk * sizeof(struct rte_mbuf *));
+ raw_cons += blk;
+ nr_pkts -= blk;
+ }
+ if (!mbuf) {
+ /* Skip freeing mbufs with non-zero reference count. */
+ raw_cons++;
+ nr_pkts--;
}
- free[blk++] = mbuf;
}
- if (blk)
- rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
-
txr->tx_raw_cons = raw_cons;
}
#endif /* _BNXT_RXTX_VEC_COMMON_H_ */
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 3d54d9d59d..bc2e96ec38 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -346,7 +346,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
struct rte_mbuf *tx_mbuf;
struct tx_bd_long *txbd = NULL;
- struct bnxt_sw_tx_bd *tx_buf;
+ struct rte_mbuf **tx_buf;
uint16_t to_send;
nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
@@ -362,8 +362,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
tx_buf = &txr->tx_buf_ring[tx_prod];
- tx_buf->mbuf = tx_mbuf;
- tx_buf->nr_bds = 1;
+ *tx_buf = tx_mbuf;
txbd = &txr->tx_desc_ring[tx_prod];
txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index 7a58434ce9..7ec04797b7 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -321,12 +321,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
static inline void
bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd,
- struct bnxt_sw_tx_bd *tx_buf)
+ struct rte_mbuf **tx_buf)
{
__m128i desc;
- tx_buf->mbuf = mbuf;
- tx_buf->nr_bds = 1;
+ *tx_buf = mbuf;
desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off,
bnxt_xmit_flags_len(mbuf->data_len,
@@ -343,7 +342,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts,
struct bnxt_tx_ring_info *txr = txq->tx_ring;
uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
struct tx_bd_long *txbd;
- struct bnxt_sw_tx_bd *tx_buf;
+ struct rte_mbuf **tx_buf;
uint16_t to_send;
tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
index 8679ac91e7..d95e1f7526 100644
--- a/drivers/net/bnxt/bnxt_txq.c
+++ b/drivers/net/bnxt/bnxt_txq.c
@@ -24,7 +24,7 @@ void bnxt_free_txq_stats(struct bnxt_tx_queue *txq)
static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
{
- struct bnxt_sw_tx_bd *sw_ring;
+ struct rte_mbuf **sw_ring;
uint16_t i;
if (!txq || !txq->tx_ring)
@@ -33,9 +33,9 @@ static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
sw_ring = txq->tx_ring->tx_buf_ring;
if (sw_ring) {
for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) {
- if (sw_ring[i].mbuf) {
- rte_pktmbuf_free_seg(sw_ring[i].mbuf);
- sw_ring[i].mbuf = NULL;
+ if (sw_ring[i]) {
+ rte_pktmbuf_free_seg(sw_ring[i]);
+ sw_ring[i] = NULL;
}
}
}
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index 2810906a3a..68fbd3f582 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -76,7 +76,7 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
ring->ring_mask = ring->ring_size - 1;
ring->bd = (void *)txr->tx_desc_ring;
ring->bd_dma = txr->tx_desc_mapping;
- ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd);
+ ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
ring->vmem = (void **)&txr->tx_buf_ring;
ring->fw_ring_id = INVALID_HW_RING_ID;
@@ -104,6 +104,21 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
return 0;
}
+static bool
+bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq)
+{
+ if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
+ PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
+ PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
+ PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
+ PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
+ PKT_TX_QINQ_PKT) ||
+ (BNXT_TRUFLOW_EN(txq->bp) &&
+ (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
+ return true;
+ return false;
+}
+
static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
struct bnxt_tx_queue *txq,
uint16_t *coal_pkts,
@@ -116,10 +131,10 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
struct tx_bd_long_hi *txbd1 = NULL;
uint32_t vlan_tag_flags;
bool long_bd = false;
- unsigned short nr_bds = 0;
+ unsigned short nr_bds;
uint16_t prod;
struct rte_mbuf *m_seg;
- struct bnxt_sw_tx_bd *tx_buf;
+ struct rte_mbuf **tx_buf;
static const uint32_t lhint_arr[4] = {
TX_BD_LONG_FLAGS_LHINT_LT512,
TX_BD_LONG_FLAGS_LHINT_LT1K,
@@ -130,17 +145,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
if (unlikely(is_bnxt_in_error(txq->bp)))
return -EIO;
- if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
- PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
- PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
- PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
- PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
- PKT_TX_QINQ_PKT) ||
- (BNXT_TRUFLOW_EN(txq->bp) &&
- (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
- long_bd = true;
-
+ long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq);
nr_bds = long_bd + tx_pkt->nb_segs;
+
if (unlikely(bnxt_tx_avail(txq) < nr_bds))
return -ENOMEM;
@@ -172,8 +179,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
prod = RING_IDX(ring, txr->tx_raw_prod);
tx_buf = &txr->tx_buf_ring[prod];
- tx_buf->mbuf = tx_pkt;
- tx_buf->nr_bds = nr_bds;
+ *tx_buf = tx_pkt;
txbd = &txr->tx_desc_ring[prod];
txbd->opaque = *coal_pkts;
@@ -185,7 +191,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
else
txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9];
- txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf));
+ txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
*last_txbd = txbd;
if (long_bd) {
@@ -193,18 +199,18 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
vlan_tag_flags = 0;
/* HW can accelerate only outer vlan in QinQ mode */
- if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) {
+ if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) {
vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
- tx_buf->mbuf->vlan_tci_outer;
+ tx_pkt->vlan_tci_outer;
outer_tpid_bd = txq->bp->outer_tpid_bd &
BNXT_OUTER_TPID_BD_MASK;
vlan_tag_flags |= outer_tpid_bd;
- } else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
+ } else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) {
/* shurd: Should this mask at
* TX_BD_LONG_CFA_META_VLAN_VID_MASK?
*/
vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
- tx_buf->mbuf->vlan_tci;
+ tx_pkt->vlan_tci;
/* Currently supports 8021Q, 8021AD vlan offloads
* QINQ1, QINQ2, QINQ3 vlan headers are deprecated
*/
@@ -325,7 +331,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
prod = RING_IDX(ring, txr->tx_raw_prod);
tx_buf = &txr->tx_buf_ring[prod];
- tx_buf->mbuf = m_seg;
+ *tx_buf = m_seg;
txbd = &txr->tx_desc_ring[prod];
txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg));
@@ -356,16 +362,17 @@ static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts)
int i, j;
for (i = 0; i < nr_pkts; i++) {
- struct bnxt_sw_tx_bd *tx_buf;
+ struct rte_mbuf **tx_buf;
unsigned short nr_bds;
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
- nr_bds = tx_buf->nr_bds;
+ nr_bds = (*tx_buf)->nb_segs +
+ bnxt_xmit_need_long_bd(*tx_buf, txq);
for (j = 0; j < nr_bds; j++) {
- if (tx_buf->mbuf) {
+ if (*tx_buf) {
/* Add mbuf to the bulk free array */
- free[blk++] = tx_buf->mbuf;
- tx_buf->mbuf = NULL;
+ free[blk++] = *tx_buf;
+ *tx_buf = NULL;
}
raw_cons = RING_NEXT(raw_cons);
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
@@ -389,14 +396,15 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
for (i = 0; i < nr_pkts; i++) {
struct rte_mbuf *mbuf;
- struct bnxt_sw_tx_bd *tx_buf;
+ struct rte_mbuf **tx_buf;
unsigned short nr_bds;
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
- nr_bds = tx_buf->nr_bds;
+ nr_bds = (*tx_buf)->nb_segs +
+ bnxt_xmit_need_long_bd(*tx_buf, txq);
for (j = 0; j < nr_bds; j++) {
- mbuf = tx_buf->mbuf;
- tx_buf->mbuf = NULL;
+ mbuf = *tx_buf;
+ *tx_buf = NULL;
raw_cons = RING_NEXT(raw_cons);
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
if (!mbuf) /* long_bd's tx_buf ? */
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 281a3e23c5..238be1d190 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -17,18 +17,13 @@ struct bnxt_tx_ring_info {
struct bnxt_db_info tx_db;
struct tx_bd_long *tx_desc_ring;
- struct bnxt_sw_tx_bd *tx_buf_ring;
+ struct rte_mbuf **tx_buf_ring;
rte_iova_t tx_desc_mapping;
struct bnxt_ring *tx_ring_struct;
};
-struct bnxt_sw_tx_bd {
- struct rte_mbuf *mbuf; /* mbuf associated with TX descriptor */
- unsigned short nr_bds;
-};
-
static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq)
{
return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &
--
2.25.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [dpdk-dev] [PATCH] net/bnxt: optimizations for Tx completion handling
2021-03-06 15:19 [dpdk-dev] [PATCH] net/bnxt: optimizations for Tx completion handling Lance Richardson
@ 2021-03-12 15:23 ` Ajit Khaparde
0 siblings, 0 replies; 2+ messages in thread
From: Ajit Khaparde @ 2021-03-12 15:23 UTC (permalink / raw)
To: Lance Richardson
Cc: Somnath Kotur, Jerin Jacob, Ruifeng Wang, Bruce Richardson,
Konstantin Ananyev, dpdk-dev
[-- Attachment #1: Type: text/plain, Size: 19228 bytes --]
On Sat, Mar 6, 2021 at 7:19 AM Lance Richardson
<lance.richardson@broadcom.com> wrote:
>
> Avoid copying mbuf pointers to separate array for bulk
> mbuf free when handling transmit completions for vector
> mode transmit.
>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Patch applied to dpdk-next-net-brcm. Thanks
> ---
> drivers/net/bnxt/bnxt_ethdev.c | 4 +-
> drivers/net/bnxt/bnxt_ring.c | 2 +-
> drivers/net/bnxt/bnxt_rxtx_vec_common.h | 89 +++++++++++++++----------
> drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 5 +-
> drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 7 +-
> drivers/net/bnxt/bnxt_txq.c | 8 +--
> drivers/net/bnxt/bnxt_txr.c | 68 ++++++++++---------
> drivers/net/bnxt/bnxt_txr.h | 7 +-
> 8 files changed, 106 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index 88da345034..d4028e2bb2 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -3186,7 +3186,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
> struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
> struct bnxt_tx_ring_info *txr;
> struct bnxt_cp_ring_info *cpr;
> - struct bnxt_sw_tx_bd *tx_buf;
> + struct rte_mbuf **tx_buf;
> struct tx_pkt_cmpl *txcmp;
> uint32_t cons, cp_cons;
> int rc;
> @@ -3216,7 +3216,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
> return RTE_ETH_TX_DESC_UNAVAIL;
> }
> tx_buf = &txr->tx_buf_ring[cons];
> - if (tx_buf->mbuf == NULL)
> + if (*tx_buf == NULL)
> return RTE_ETH_TX_DESC_DONE;
>
> return RTE_ETH_TX_DESC_FULL;
> diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
> index 997dcdc28b..e4055fa49b 100644
> --- a/drivers/net/bnxt/bnxt_ring.c
> +++ b/drivers/net/bnxt/bnxt_ring.c
> @@ -230,7 +230,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
> tx_ring->vmem =
> (void **)((char *)mz->addr + tx_vmem_start);
> tx_ring_info->tx_buf_ring =
> - (struct bnxt_sw_tx_bd *)tx_ring->vmem;
> + (struct rte_mbuf **)tx_ring->vmem;
> }
> }
>
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> index 91ff6736b1..9b9489a695 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> @@ -100,57 +100,78 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
> * is enabled.
> */
> static inline void
> -bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts)
> +bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
> {
> struct bnxt_tx_ring_info *txr = txq->tx_ring;
> - struct rte_mbuf **free = txq->free;
> uint16_t cons, raw_cons = txr->tx_raw_cons;
> - unsigned int blk = 0;
> - uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
> -
> - while (nr_pkts--) {
> - struct bnxt_sw_tx_bd *tx_buf;
> -
> - cons = raw_cons++ & ring_mask;
> - tx_buf = &txr->tx_buf_ring[cons];
> - free[blk++] = tx_buf->mbuf;
> - tx_buf->mbuf = NULL;
> + uint32_t ring_mask, ring_size, num;
> + struct rte_mempool *pool;
> +
> + ring_mask = txr->tx_ring_struct->ring_mask;
> + ring_size = txr->tx_ring_struct->ring_size;
> +
> + cons = raw_cons & ring_mask;
> + num = RTE_MIN(nr_pkts, ring_size - cons);
> + pool = txr->tx_buf_ring[cons]->pool;
> +
> + rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num);
> + memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *));
> + raw_cons += num;
> + num = nr_pkts - num;
> + if (num) {
> + cons = raw_cons & ring_mask;
> + rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons],
> + num);
> + memset(&txr->tx_buf_ring[cons], 0,
> + num * sizeof(struct rte_mbuf *));
> + raw_cons += num;
> }
> - if (blk)
> - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
>
> txr->tx_raw_cons = raw_cons;
> }
>
> static inline void
> -bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
> +bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
> {
> struct bnxt_tx_ring_info *txr = txq->tx_ring;
> - struct rte_mbuf **free = txq->free;
> uint16_t cons, raw_cons = txr->tx_raw_cons;
> - unsigned int blk = 0;
> - uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
> + uint32_t ring_mask, ring_size, num, blk;
> + struct rte_mempool *pool;
>
> - while (nr_pkts--) {
> - struct bnxt_sw_tx_bd *tx_buf;
> - struct rte_mbuf *mbuf;
> + ring_mask = txr->tx_ring_struct->ring_mask;
> + ring_size = txr->tx_ring_struct->ring_size;
>
> - cons = raw_cons++ & ring_mask;
> - tx_buf = &txr->tx_buf_ring[cons];
> - mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
> - if (unlikely(mbuf == NULL))
> - continue;
> - tx_buf->mbuf = NULL;
> + while (nr_pkts) {
> + struct rte_mbuf *mbuf;
>
> - if (blk && mbuf->pool != free[0]->pool) {
> - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
> - blk = 0;
> + cons = raw_cons & ring_mask;
> + num = RTE_MIN(nr_pkts, ring_size - cons);
> + pool = txr->tx_buf_ring[cons]->pool;
> +
> + blk = 0;
> + do {
> + mbuf = txr->tx_buf_ring[cons + blk];
> + mbuf = rte_pktmbuf_prefree_seg(mbuf);
> + if (!mbuf || mbuf->pool != pool)
> + break;
> + blk++;
> + } while (blk < num);
> +
> + if (blk) {
> + rte_mempool_put_bulk(pool,
> + (void **)&txr->tx_buf_ring[cons],
> + blk);
> + memset(&txr->tx_buf_ring[cons], 0,
> + blk * sizeof(struct rte_mbuf *));
> + raw_cons += blk;
> + nr_pkts -= blk;
> + }
> + if (!mbuf) {
> + /* Skip freeing mbufs with non-zero reference count. */
> + raw_cons++;
> + nr_pkts--;
> }
> - free[blk++] = mbuf;
> }
> - if (blk)
> - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
> -
> txr->tx_raw_cons = raw_cons;
> }
> #endif /* _BNXT_RXTX_VEC_COMMON_H_ */
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> index 3d54d9d59d..bc2e96ec38 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> @@ -346,7 +346,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
> struct rte_mbuf *tx_mbuf;
> struct tx_bd_long *txbd = NULL;
> - struct bnxt_sw_tx_bd *tx_buf;
> + struct rte_mbuf **tx_buf;
> uint16_t to_send;
>
> nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
> @@ -362,8 +362,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>
> tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
> tx_buf = &txr->tx_buf_ring[tx_prod];
> - tx_buf->mbuf = tx_mbuf;
> - tx_buf->nr_bds = 1;
> + *tx_buf = tx_mbuf;
>
> txbd = &txr->tx_desc_ring[tx_prod];
> txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> index 7a58434ce9..7ec04797b7 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> @@ -321,12 +321,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
>
> static inline void
> bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd,
> - struct bnxt_sw_tx_bd *tx_buf)
> + struct rte_mbuf **tx_buf)
> {
> __m128i desc;
>
> - tx_buf->mbuf = mbuf;
> - tx_buf->nr_bds = 1;
> + *tx_buf = mbuf;
>
> desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off,
> bnxt_xmit_flags_len(mbuf->data_len,
> @@ -343,7 +342,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts,
> struct bnxt_tx_ring_info *txr = txq->tx_ring;
> uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
> struct tx_bd_long *txbd;
> - struct bnxt_sw_tx_bd *tx_buf;
> + struct rte_mbuf **tx_buf;
> uint16_t to_send;
>
> tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
> diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
> index 8679ac91e7..d95e1f7526 100644
> --- a/drivers/net/bnxt/bnxt_txq.c
> +++ b/drivers/net/bnxt/bnxt_txq.c
> @@ -24,7 +24,7 @@ void bnxt_free_txq_stats(struct bnxt_tx_queue *txq)
>
> static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
> {
> - struct bnxt_sw_tx_bd *sw_ring;
> + struct rte_mbuf **sw_ring;
> uint16_t i;
>
> if (!txq || !txq->tx_ring)
> @@ -33,9 +33,9 @@ static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
> sw_ring = txq->tx_ring->tx_buf_ring;
> if (sw_ring) {
> for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) {
> - if (sw_ring[i].mbuf) {
> - rte_pktmbuf_free_seg(sw_ring[i].mbuf);
> - sw_ring[i].mbuf = NULL;
> + if (sw_ring[i]) {
> + rte_pktmbuf_free_seg(sw_ring[i]);
> + sw_ring[i] = NULL;
> }
> }
> }
> diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
> index 2810906a3a..68fbd3f582 100644
> --- a/drivers/net/bnxt/bnxt_txr.c
> +++ b/drivers/net/bnxt/bnxt_txr.c
> @@ -76,7 +76,7 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
> ring->ring_mask = ring->ring_size - 1;
> ring->bd = (void *)txr->tx_desc_ring;
> ring->bd_dma = txr->tx_desc_mapping;
> - ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd);
> + ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
> ring->vmem = (void **)&txr->tx_buf_ring;
> ring->fw_ring_id = INVALID_HW_RING_ID;
>
> @@ -104,6 +104,21 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
> return 0;
> }
>
> +static bool
> +bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq)
> +{
> + if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
> + PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
> + PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
> + PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
> + PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
> + PKT_TX_QINQ_PKT) ||
> + (BNXT_TRUFLOW_EN(txq->bp) &&
> + (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
> + return true;
> + return false;
> +}
> +
> static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
> struct bnxt_tx_queue *txq,
> uint16_t *coal_pkts,
> @@ -116,10 +131,10 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
> struct tx_bd_long_hi *txbd1 = NULL;
> uint32_t vlan_tag_flags;
> bool long_bd = false;
> - unsigned short nr_bds = 0;
> + unsigned short nr_bds;
> uint16_t prod;
> struct rte_mbuf *m_seg;
> - struct bnxt_sw_tx_bd *tx_buf;
> + struct rte_mbuf **tx_buf;
> static const uint32_t lhint_arr[4] = {
> TX_BD_LONG_FLAGS_LHINT_LT512,
> TX_BD_LONG_FLAGS_LHINT_LT1K,
> @@ -130,17 +145,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
> if (unlikely(is_bnxt_in_error(txq->bp)))
> return -EIO;
>
> - if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
> - PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
> - PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
> - PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
> - PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
> - PKT_TX_QINQ_PKT) ||
> - (BNXT_TRUFLOW_EN(txq->bp) &&
> - (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
> - long_bd = true;
> -
> + long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq);
> nr_bds = long_bd + tx_pkt->nb_segs;
> +
> if (unlikely(bnxt_tx_avail(txq) < nr_bds))
> return -ENOMEM;
>
> @@ -172,8 +179,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>
> prod = RING_IDX(ring, txr->tx_raw_prod);
> tx_buf = &txr->tx_buf_ring[prod];
> - tx_buf->mbuf = tx_pkt;
> - tx_buf->nr_bds = nr_bds;
> + *tx_buf = tx_pkt;
>
> txbd = &txr->tx_desc_ring[prod];
> txbd->opaque = *coal_pkts;
> @@ -185,7 +191,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
> txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
> else
> txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9];
> - txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf));
> + txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
> *last_txbd = txbd;
>
> if (long_bd) {
> @@ -193,18 +199,18 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
> vlan_tag_flags = 0;
>
> /* HW can accelerate only outer vlan in QinQ mode */
> - if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) {
> + if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) {
> vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
> - tx_buf->mbuf->vlan_tci_outer;
> + tx_pkt->vlan_tci_outer;
> outer_tpid_bd = txq->bp->outer_tpid_bd &
> BNXT_OUTER_TPID_BD_MASK;
> vlan_tag_flags |= outer_tpid_bd;
> - } else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
> + } else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) {
> /* shurd: Should this mask at
> * TX_BD_LONG_CFA_META_VLAN_VID_MASK?
> */
> vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
> - tx_buf->mbuf->vlan_tci;
> + tx_pkt->vlan_tci;
> /* Currently supports 8021Q, 8021AD vlan offloads
> * QINQ1, QINQ2, QINQ3 vlan headers are deprecated
> */
> @@ -325,7 +331,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>
> prod = RING_IDX(ring, txr->tx_raw_prod);
> tx_buf = &txr->tx_buf_ring[prod];
> - tx_buf->mbuf = m_seg;
> + *tx_buf = m_seg;
>
> txbd = &txr->tx_desc_ring[prod];
> txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg));
> @@ -356,16 +362,17 @@ static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts)
> int i, j;
>
> for (i = 0; i < nr_pkts; i++) {
> - struct bnxt_sw_tx_bd *tx_buf;
> + struct rte_mbuf **tx_buf;
> unsigned short nr_bds;
>
> tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> - nr_bds = tx_buf->nr_bds;
> + nr_bds = (*tx_buf)->nb_segs +
> + bnxt_xmit_need_long_bd(*tx_buf, txq);
> for (j = 0; j < nr_bds; j++) {
> - if (tx_buf->mbuf) {
> + if (*tx_buf) {
> /* Add mbuf to the bulk free array */
> - free[blk++] = tx_buf->mbuf;
> - tx_buf->mbuf = NULL;
> + free[blk++] = *tx_buf;
> + *tx_buf = NULL;
> }
> raw_cons = RING_NEXT(raw_cons);
> tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> @@ -389,14 +396,15 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
>
> for (i = 0; i < nr_pkts; i++) {
> struct rte_mbuf *mbuf;
> - struct bnxt_sw_tx_bd *tx_buf;
> + struct rte_mbuf **tx_buf;
> unsigned short nr_bds;
>
> tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> - nr_bds = tx_buf->nr_bds;
> + nr_bds = (*tx_buf)->nb_segs +
> + bnxt_xmit_need_long_bd(*tx_buf, txq);
> for (j = 0; j < nr_bds; j++) {
> - mbuf = tx_buf->mbuf;
> - tx_buf->mbuf = NULL;
> + mbuf = *tx_buf;
> + *tx_buf = NULL;
> raw_cons = RING_NEXT(raw_cons);
> tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> if (!mbuf) /* long_bd's tx_buf ? */
> diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
> index 281a3e23c5..238be1d190 100644
> --- a/drivers/net/bnxt/bnxt_txr.h
> +++ b/drivers/net/bnxt/bnxt_txr.h
> @@ -17,18 +17,13 @@ struct bnxt_tx_ring_info {
> struct bnxt_db_info tx_db;
>
> struct tx_bd_long *tx_desc_ring;
> - struct bnxt_sw_tx_bd *tx_buf_ring;
> + struct rte_mbuf **tx_buf_ring;
>
> rte_iova_t tx_desc_mapping;
>
> struct bnxt_ring *tx_ring_struct;
> };
>
> -struct bnxt_sw_tx_bd {
> - struct rte_mbuf *mbuf; /* mbuf associated with TX descriptor */
> - unsigned short nr_bds;
> -};
> -
> static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq)
> {
> return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-03-12 15:23 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-06 15:19 [dpdk-dev] [PATCH] net/bnxt: optimizations for Tx completion handling Lance Richardson
2021-03-12 15:23 ` Ajit Khaparde
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).