* [dpdk-dev] [PATCH 2/3] bnx2x: Tx performance improvement fixes
2016-03-11 1:47 [dpdk-dev] [PATCH 1/3] bnx2x: Update documentation Rasesh Mody
@ 2016-03-11 1:47 ` Rasesh Mody
2016-03-22 14:29 ` Bruce Richardson
2016-03-11 1:47 ` [dpdk-dev] [PATCH 3/3] bnx2x: Update PMD version to 1.1.0 Rasesh Mody
1 sibling, 1 reply; 5+ messages in thread
From: Rasesh Mody @ 2016-03-11 1:47 UTC (permalink / raw)
To: dev
Signed-off-by: Harish Patil <harish.patil@qlogic.com>
Signed-off-by: Rasesh Mody <rasesh.mody@qlogic.com>
---
drivers/net/bnx2x/bnx2x.c | 207 +++++++++++++++++---------------------
drivers/net/bnx2x/bnx2x.h | 4 +-
drivers/net/bnx2x/bnx2x_ethdev.c | 53 ++++++++++
drivers/net/bnx2x/bnx2x_rxtx.c | 47 +++++----
4 files changed, 173 insertions(+), 138 deletions(-)
diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index 9d640da..0f49fef 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -1293,7 +1293,7 @@ bnx2x_free_tx_pkt(__rte_unused struct bnx2x_fastpath *fp, struct bnx2x_tx_queue
struct rte_mbuf *tx_mbuf = txq->sw_ring[TX_BD(pkt_idx, txq)];
if (likely(tx_mbuf != NULL)) {
- rte_pktmbuf_free(tx_mbuf);
+ rte_pktmbuf_free_seg(tx_mbuf);
} else {
PMD_RX_LOG(ERR, "fp[%02d] lost mbuf %lu",
fp->index, (unsigned long)TX_BD(pkt_idx, txq));
@@ -2113,147 +2113,128 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link
* the mbuf and return to the caller.
*
* Returns:
- * 0 = Success, !0 = Failure
+ * Number of TX BDs used for the mbuf
* Note the side effect that an mbuf may be freed if it causes a problem.
*/
-int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head, int m_pkts)
+uint32_t
+bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0)
{
- struct rte_mbuf *m0;
struct eth_tx_start_bd *tx_start_bd;
uint16_t bd_prod, pkt_prod;
- int m_tx;
struct bnx2x_softc *sc;
uint32_t nbds = 0;
- struct bnx2x_fastpath *fp;
sc = txq->sc;
- fp = &sc->fp[txq->queue_id];
bd_prod = txq->tx_bd_tail;
pkt_prod = txq->tx_pkt_tail;
- for (m_tx = 0; m_tx < m_pkts; m_tx++) {
+ txq->sw_ring[TX_BD(pkt_prod, txq)] = m0;
- m0 = *m_head++;
+ tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd;
- if (unlikely(txq->nb_tx_avail < 3)) {
- PMD_TX_LOG(ERR, "no enough bds %d/%d",
- bd_prod, txq->nb_tx_avail);
- return -ENOMEM;
- }
+ tx_start_bd->addr =
+ rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0));
+ tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len);
+ tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
+ tx_start_bd->general_data =
+ (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT);
- txq->sw_ring[TX_BD(pkt_prod, txq)] = m0;
+ tx_start_bd->nbd = rte_cpu_to_le_16(2);
- tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd;
-
- tx_start_bd->addr =
- rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0));
- tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len);
- tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
- tx_start_bd->general_data =
- (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT);
-
- tx_start_bd->nbd = rte_cpu_to_le_16(2);
+ if (m0->ol_flags & PKT_TX_VLAN_PKT) {
+ tx_start_bd->vlan_or_ethertype =
+ rte_cpu_to_le_16(m0->vlan_tci);
+ tx_start_bd->bd_flags.as_bitfield |=
+ (X_ETH_OUTBAND_VLAN <<
+ ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
+ } else {
+ if (IS_PF(sc))
+ tx_start_bd->vlan_or_ethertype =
+ rte_cpu_to_le_16(pkt_prod);
+ else {
+ struct ether_hdr *eh =
+ rte_pktmbuf_mtod(m0, struct ether_hdr *);
- if (m0->ol_flags & PKT_TX_VLAN_PKT) {
tx_start_bd->vlan_or_ethertype =
- rte_cpu_to_le_16(m0->vlan_tci);
- tx_start_bd->bd_flags.as_bitfield |=
- (X_ETH_OUTBAND_VLAN <<
- ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
- } else {
- if (IS_PF(sc))
- tx_start_bd->vlan_or_ethertype =
- rte_cpu_to_le_16(pkt_prod);
- else {
- struct ether_hdr *eh
- = rte_pktmbuf_mtod(m0, struct ether_hdr *);
-
- tx_start_bd->vlan_or_ethertype
- = rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type));
- }
+ rte_cpu_to_le_16(rte_be_to_cpu_16(eh->ether_type));
}
+ }
- bd_prod = NEXT_TX_BD(bd_prod);
- if (IS_VF(sc)) {
- struct eth_tx_parse_bd_e2 *tx_parse_bd;
- const struct ether_hdr *eh = rte_pktmbuf_mtod(m0, struct ether_hdr *);
- uint8_t mac_type = UNICAST_ADDRESS;
-
- tx_parse_bd =
- &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2;
- if (is_multicast_ether_addr(&eh->d_addr)) {
- if (is_broadcast_ether_addr(&eh->d_addr))
- mac_type = BROADCAST_ADDRESS;
- else
- mac_type = MULTICAST_ADDRESS;
- }
- tx_parse_bd->parsing_data =
- (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT);
-
- rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi,
- &eh->d_addr.addr_bytes[0], 2);
- rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid,
- &eh->d_addr.addr_bytes[2], 2);
- rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo,
- &eh->d_addr.addr_bytes[4], 2);
- rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi,
- &eh->s_addr.addr_bytes[0], 2);
- rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid,
- &eh->s_addr.addr_bytes[2], 2);
- rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo,
- &eh->s_addr.addr_bytes[4], 2);
-
- tx_parse_bd->data.mac_addr.dst_hi =
- rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi);
- tx_parse_bd->data.mac_addr.dst_mid =
- rte_cpu_to_be_16(tx_parse_bd->data.
- mac_addr.dst_mid);
- tx_parse_bd->data.mac_addr.dst_lo =
- rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo);
- tx_parse_bd->data.mac_addr.src_hi =
- rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi);
- tx_parse_bd->data.mac_addr.src_mid =
- rte_cpu_to_be_16(tx_parse_bd->data.
- mac_addr.src_mid);
- tx_parse_bd->data.mac_addr.src_lo =
- rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo);
-
- PMD_TX_LOG(DEBUG,
- "PBD dst %x %x %x src %x %x %x p_data %x",
- tx_parse_bd->data.mac_addr.dst_hi,
- tx_parse_bd->data.mac_addr.dst_mid,
- tx_parse_bd->data.mac_addr.dst_lo,
- tx_parse_bd->data.mac_addr.src_hi,
- tx_parse_bd->data.mac_addr.src_mid,
- tx_parse_bd->data.mac_addr.src_lo,
- tx_parse_bd->parsing_data);
- }
+ bd_prod = NEXT_TX_BD(bd_prod);
+ if (IS_VF(sc)) {
+ struct eth_tx_parse_bd_e2 *tx_parse_bd;
+ const struct ether_hdr *eh =
+ rte_pktmbuf_mtod(m0, struct ether_hdr *);
+ uint8_t mac_type = UNICAST_ADDRESS;
+
+ tx_parse_bd =
+ &txq->tx_ring[TX_BD(bd_prod, txq)].parse_bd_e2;
+ if (is_multicast_ether_addr(&eh->d_addr)) {
+ if (is_broadcast_ether_addr(&eh->d_addr))
+ mac_type = BROADCAST_ADDRESS;
+ else
+ mac_type = MULTICAST_ADDRESS;
+ }
+ tx_parse_bd->parsing_data =
+ (mac_type << ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT);
+
+ rte_memcpy(&tx_parse_bd->data.mac_addr.dst_hi,
+ &eh->d_addr.addr_bytes[0], 2);
+ rte_memcpy(&tx_parse_bd->data.mac_addr.dst_mid,
+ &eh->d_addr.addr_bytes[2], 2);
+ rte_memcpy(&tx_parse_bd->data.mac_addr.dst_lo,
+ &eh->d_addr.addr_bytes[4], 2);
+ rte_memcpy(&tx_parse_bd->data.mac_addr.src_hi,
+ &eh->s_addr.addr_bytes[0], 2);
+ rte_memcpy(&tx_parse_bd->data.mac_addr.src_mid,
+ &eh->s_addr.addr_bytes[2], 2);
+ rte_memcpy(&tx_parse_bd->data.mac_addr.src_lo,
+ &eh->s_addr.addr_bytes[4], 2);
+
+ tx_parse_bd->data.mac_addr.dst_hi =
+ rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_hi);
+ tx_parse_bd->data.mac_addr.dst_mid =
+ rte_cpu_to_be_16(tx_parse_bd->data.
+ mac_addr.dst_mid);
+ tx_parse_bd->data.mac_addr.dst_lo =
+ rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.dst_lo);
+ tx_parse_bd->data.mac_addr.src_hi =
+ rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_hi);
+ tx_parse_bd->data.mac_addr.src_mid =
+ rte_cpu_to_be_16(tx_parse_bd->data.
+ mac_addr.src_mid);
+ tx_parse_bd->data.mac_addr.src_lo =
+ rte_cpu_to_be_16(tx_parse_bd->data.mac_addr.src_lo);
PMD_TX_LOG(DEBUG,
- "start bd: nbytes %d flags %x vlan %x\n",
- tx_start_bd->nbytes,
- tx_start_bd->bd_flags.as_bitfield,
- tx_start_bd->vlan_or_ethertype);
+ "PBD dst %x %x %x src %x %x %x p_data %x",
+ tx_parse_bd->data.mac_addr.dst_hi,
+ tx_parse_bd->data.mac_addr.dst_mid,
+ tx_parse_bd->data.mac_addr.dst_lo,
+ tx_parse_bd->data.mac_addr.src_hi,
+ tx_parse_bd->data.mac_addr.src_mid,
+ tx_parse_bd->data.mac_addr.src_lo,
+ tx_parse_bd->parsing_data);
+ }
- bd_prod = NEXT_TX_BD(bd_prod);
- pkt_prod++;
+ PMD_TX_LOG(DEBUG,
+ "start bd: nbytes %d flags %x vlan %x\n",
+ tx_start_bd->nbytes,
+ tx_start_bd->bd_flags.as_bitfield,
+ tx_start_bd->vlan_or_ethertype);
- if (TX_IDX(bd_prod) < 2) {
- nbds++;
- }
- }
+ bd_prod = NEXT_TX_BD(bd_prod);
+ pkt_prod++;
+
+ if (TX_IDX(bd_prod) < 2)
+ nbds++;
- txq->nb_tx_avail -= m_pkts << 1;
+ txq->nb_tx_avail -= 1 << 1;
txq->tx_bd_tail = bd_prod;
txq->tx_pkt_tail = pkt_prod;
- mb();
- fp->tx_db.data.prod += (m_pkts << 1) + nbds;
- DOORBELL(sc, txq->queue_id, fp->tx_db.raw);
- mb();
-
- return 0;
+ return nbds + 2;
}
static uint16_t bnx2x_cid_ilt_lines(struct bnx2x_softc *sc)
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 5f52450..13a7d3a 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -184,6 +184,8 @@ struct bnx2x_device_type {
#define TX_PAGE(x) (((x) & ~USABLE_TX_BD_PER_PAGE) >> 8)
#define TX_IDX(x) ((x) & USABLE_TX_BD_PER_PAGE)
+#define BDS_PER_TX_PKT (3)
+
/*
* Trigger pending transmits when the number of available BDs is greater
* than 1/8 of the total number of usable BDs.
@@ -1882,7 +1884,7 @@ int bnx2x_alloc_hsi_mem(struct bnx2x_softc *sc);
int bnx2x_alloc_ilt_mem(struct bnx2x_softc *sc);
void bnx2x_free_ilt_mem(struct bnx2x_softc *sc);
void bnx2x_dump_tx_chain(struct bnx2x_fastpath * fp, int bd_prod, int count);
-int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf **m_head, int m_pkts);
+uint32_t bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0);
uint8_t bnx2x_txeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp);
void bnx2x_print_adapter_info(struct bnx2x_softc *sc);
int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp);
diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c
index 69df02e..ac0617a 100644
--- a/drivers/net/bnx2x/bnx2x_ethdev.c
+++ b/drivers/net/bnx2x/bnx2x_ethdev.c
@@ -296,6 +296,17 @@ static void
bnx2x_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
struct bnx2x_softc *sc = dev->data->dev_private;
+ uint32_t brb_truncate_discard;
+ uint64_t brb_drops;
+ uint64_t brb_truncates;
+#ifdef RTE_LIBRTE_BNX2X_DEBUG
+ uint32_t mf_tag_discard;
+ uint32_t mac_filter_discard;
+ uint64_t rx_pause;
+ uint64_t tx_pause;
+ uint64_t rx_pfc;
+ uint64_t tx_pfc;
+#endif
PMD_INIT_FUNC_TRACE();
@@ -336,6 +347,48 @@ bnx2x_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
stats->rx_nombuf =
HILO_U64(sc->eth_stats.no_buff_discard_hi,
sc->eth_stats.no_buff_discard_lo);
+ brb_drops =
+ HILO_U64(sc->eth_stats.brb_drop_hi,
+ sc->eth_stats.brb_drop_lo);
+
+ brb_truncates =
+ HILO_U64(sc->eth_stats.brb_truncate_hi,
+ sc->eth_stats.brb_truncate_lo);
+
+ brb_truncate_discard = sc->eth_stats.brb_truncate_discard;
+
+ stats->imissed = brb_drops + brb_truncates +
+ brb_truncate_discard + stats->rx_nombuf;
+
+#ifdef RTE_LIBRTE_BNX2X_DEBUG
+ mac_filter_discard = sc->eth_stats.mac_filter_discard;
+
+ mf_tag_discard = sc->eth_stats.mf_tag_discard;
+
+ tx_pause = HILO_U64(sc->eth_stats.pause_frames_sent_hi,
+ sc->eth_stats.pause_frames_sent_lo);
+
+ rx_pause = HILO_U64(sc->eth_stats.pause_frames_received_hi,
+ sc->eth_stats.pause_frames_received_lo);
+
+ tx_pfc = HILO_U64(sc->eth_stats.pfc_frames_sent_hi,
+ sc->eth_stats.pfc_frames_sent_lo);
+
+ rx_pfc = HILO_U64(sc->eth_stats.pfc_frames_received_hi,
+ sc->eth_stats.pfc_frames_received_lo);
+
+ PMD_DRV_LOG(DEBUG, "port=%u, nobuff=%"PRIu64", brb_drops=%"PRIu64""
+ "brb_truncates=%"PRIu64", brb_truncate_discard=%"PRIu32""
+ "mac_filter_discard=%"PRIu32", mf_tag_discard=%"PRIu32"",
+ dev->data->port_id, stats->rx_nombuf, brb_drops,
+ brb_truncates, brb_truncate_discard,
+ mac_filter_discard, mf_tag_discard);
+
+ PMD_DRV_LOG(DEBUG, "port=%u, rx_pause=%"PRIu64", tx_pause=%"PRIu64""
+ "rx_pfc=%"PRIu64", tx_pfc=%"PRIu64"",
+ dev->data->port_id, rx_pause, tx_pause,
+ rx_pfc, tx_pfc);
+#endif
}
static void
diff --git a/drivers/net/bnx2x/bnx2x_rxtx.c b/drivers/net/bnx2x/bnx2x_rxtx.c
index 752a5e8..60bd08b 100644
--- a/drivers/net/bnx2x/bnx2x_rxtx.c
+++ b/drivers/net/bnx2x/bnx2x_rxtx.c
@@ -222,40 +222,37 @@ bnx2x_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
struct bnx2x_tx_queue *txq;
struct bnx2x_softc *sc;
struct bnx2x_fastpath *fp;
- uint32_t burst, nb_tx;
- struct rte_mbuf **m = tx_pkts;
- int ret;
+ uint32_t ret;
+ uint16_t nb_tx_pkts;
+ uint16_t nb_pkt_sent = 0;
txq = p_txq;
sc = txq->sc;
fp = &sc->fp[txq->queue_id];
- nb_tx = nb_pkts;
+ if ((unlikely((txq->nb_tx_desc - txq->nb_tx_avail) >
+ txq->tx_free_thresh)))
+ bnx2x_txeof(sc, fp);
- do {
- burst = RTE_MIN(nb_pkts, RTE_PMD_BNX2X_TX_MAX_BURST);
+ nb_tx_pkts = RTE_MIN(nb_pkts, txq->nb_tx_avail / BDS_PER_TX_PKT);
- ret = bnx2x_tx_encap(txq, m, burst);
- if (unlikely(ret)) {
- PMD_TX_LOG(ERR, "tx_encap failed!");
- }
-
- bnx2x_update_fp_sb_idx(fp);
-
- if ((txq->nb_tx_desc - txq->nb_tx_avail) > txq->tx_free_thresh) {
- bnx2x_txeof(sc, fp);
- }
-
- if (unlikely(ret == -ENOMEM)) {
- break;
- }
+ if (unlikely(nb_tx_pkts == 0))
+ return 0;
- m += burst;
- nb_pkts -= burst;
+ while (nb_tx_pkts--) {
+ struct rte_mbuf *m = *tx_pkts++;
+ assert(m != NULL);
+ ret = bnx2x_tx_encap(txq, m);
+ fp->tx_db.data.prod += ret;
+ nb_pkt_sent++;
+ }
- } while (nb_pkts);
+ bnx2x_update_fp_sb_idx(fp);
+ mb();
+ DOORBELL(sc, txq->queue_id, fp->tx_db.raw);
+ mb();
- return nb_tx - nb_pkts;
+ return nb_pkt_sent;
}
int
@@ -408,6 +405,8 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
new_mb = bnx2x_rxmbuf_alloc(rxq->mb_pool);
if (unlikely(!new_mb)) {
PMD_RX_LOG(ERR, "mbuf alloc fail fp[%02d]", fp->index);
+ rte_eth_devices[rxq->port_id].data->
+ rx_mbuf_alloc_failed++;
goto next_rx;
}
--
1.7.10.3
^ permalink raw reply [flat|nested] 5+ messages in thread