From: Shahed Shaikh <shshaikh@marvell.com>
To: <dev@dpdk.org>
Cc: <rmody@marvell.com>, <jerinj@marvell.com>,
<GR-Everest-DPDK-Dev@marvell.com>
Subject: [dpdk-dev] [PATCH 2/2] net/qede: enhance transmit data path CPU utilization
Date: Thu, 2 Jan 2020 09:59:03 -0800 [thread overview]
Message-ID: <20200102175903.9556-2-shshaikh@marvell.com> (raw)
In-Reply-To: <20200102175903.9556-1-shshaikh@marvell.com>
Use lightweight transmit handler which handles non-offloaded Tx data path.
We get CPU utilization improvement of ~8%.
Signed-off-by: Shahed Shaikh <shshaikh@marvell.com>
---
drivers/net/qede/qede_ethdev.c | 15 +++-
drivers/net/qede/qede_rxtx.c | 125 +++++++++++++++++++++++++++++++++
drivers/net/qede/qede_rxtx.h | 2 +
3 files changed, 141 insertions(+), 1 deletion(-)
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 47e90096a..055f046e2 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -270,8 +270,10 @@ qede_interrupt_handler(void *param)
static void
qede_assign_rxtx_handlers(struct rte_eth_dev *dev)
{
+ uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
struct qede_dev *qdev = dev->data->dev_private;
struct ecore_dev *edev = &qdev->edev;
+ bool use_tx_offload = false;
if (ECORE_IS_CMT(edev)) {
dev->rx_pkt_burst = qede_recv_pkts_cmt;
@@ -287,7 +289,18 @@ qede_assign_rxtx_handlers(struct rte_eth_dev *dev)
dev->rx_pkt_burst = qede_recv_pkts_regular;
}
- dev->tx_pkt_burst = qede_xmit_pkts;
+ use_tx_offload = !!(tx_offloads &
+ (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | /* tunnel */
+ DEV_TX_OFFLOAD_TCP_TSO | /* tso */
+ DEV_TX_OFFLOAD_VLAN_INSERT)); /* vlan insert */
+
+ if (use_tx_offload) {
+ DP_INFO(edev, "Assigning qede_xmit_pkts\n");
+ dev->tx_pkt_burst = qede_xmit_pkts;
+ } else {
+ DP_INFO(edev, "Assigning qede_xmit_pkts_regular\n");
+ dev->tx_pkt_burst = qede_xmit_pkts_regular;
+ }
}
static void
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 3b486a0a4..985e49f1c 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -2234,6 +2234,131 @@ qede_mpls_tunn_tx_sanity_check(struct rte_mbuf *mbuf,
}
#endif
+uint16_t
+qede_xmit_pkts_regular(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct qede_tx_queue *txq = p_txq;
+ struct qede_dev *qdev = txq->qdev;
+ struct ecore_dev *edev = &qdev->edev;
+ struct eth_tx_1st_bd *bd1;
+ struct eth_tx_2nd_bd *bd2;
+ struct eth_tx_3rd_bd *bd3;
+ struct rte_mbuf *m_seg = NULL;
+ struct rte_mbuf *mbuf;
+ struct qede_tx_entry *sw_tx_ring;
+ uint16_t nb_tx_pkts;
+ uint16_t bd_prod;
+ uint16_t idx;
+ uint16_t nb_frags = 0;
+ uint16_t nb_pkt_sent = 0;
+ uint8_t nbds;
+ uint64_t tx_ol_flags;
+ /* BD1 */
+ uint16_t bd1_bf;
+ uint8_t bd1_bd_flags_bf;
+
+ if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
+ PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
+ nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
+ qede_process_tx_compl(edev, txq);
+ }
+
+ nb_tx_pkts = nb_pkts;
+ bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
+ sw_tx_ring = txq->sw_tx_ring;
+
+ while (nb_tx_pkts--) {
+ /* Init flags/values */
+ nbds = 0;
+ bd1 = NULL;
+ bd2 = NULL;
+ bd3 = NULL;
+ bd1_bf = 0;
+ bd1_bd_flags_bf = 0;
+ nb_frags = 0;
+
+ mbuf = *tx_pkts++;
+ assert(mbuf);
+
+
+ /* Check minimum TX BDS availability against available BDs */
+ if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
+ break;
+
+ tx_ol_flags = mbuf->ol_flags;
+ bd1_bd_flags_bf |= 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
+
+ if (unlikely(txq->nb_tx_avail <
+ ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
+ break;
+ bd1_bf |=
+ (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
+ << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
+
+ /* Offload the IP checksum in the hardware */
+ if (tx_ol_flags & PKT_TX_IP_CKSUM)
+ bd1_bd_flags_bf |=
+ 1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
+
+ /* L4 checksum offload (tcp or udp) */
+ if ((tx_ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) &&
+ (tx_ol_flags & (PKT_TX_UDP_CKSUM | PKT_TX_TCP_CKSUM)))
+ bd1_bd_flags_bf |=
+ 1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
+
+ /* Fill the entry in the SW ring and the BDs in the FW ring */
+ idx = TX_PROD(txq);
+ sw_tx_ring[idx].mbuf = mbuf;
+
+ /* BD1 */
+ bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
+ memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
+ nbds++;
+
+ /* Map MBUF linear data for DMA and set in the BD1 */
+ QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
+ mbuf->data_len);
+ bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf);
+ bd1->data.bd_flags.bitfields = bd1_bd_flags_bf;
+
+ /* Handle fragmented MBUF */
+ if (unlikely(mbuf->nb_segs > 1)) {
+ m_seg = mbuf->next;
+
+ /* Encode scatter gather buffer descriptors */
+ nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3,
+ nbds - 1);
+ }
+
+ bd1->data.nbds = nbds + nb_frags;
+
+ txq->nb_tx_avail -= bd1->data.nbds;
+ txq->sw_tx_prod++;
+ bd_prod =
+ rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
+#ifdef RTE_LIBRTE_QEDE_DEBUG_TX
+ print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
+#endif
+ nb_pkt_sent++;
+ txq->xmit_pkts++;
+ }
+
+ /* Write value of prod idx into bd_prod */
+ txq->tx_db.data.bd_prod = bd_prod;
+ rte_wmb();
+ rte_compiler_barrier();
+ DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
+ rte_wmb();
+
+ /* Check again for Tx completions */
+ qede_process_tx_compl(edev, txq);
+
+ PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
+ nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
+
+ return nb_pkt_sent;
+}
+
uint16_t
qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h
index a4c634e88..d7ff870b2 100644
--- a/drivers/net/qede/qede_rxtx.h
+++ b/drivers/net/qede/qede_rxtx.h
@@ -275,6 +275,8 @@ uint16_t qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
uint16_t qede_xmit_pkts_cmt(void *p_txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t qede_xmit_pkts_regular(void *p_txq, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
uint16_t qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
--
2.17.1
next prev parent reply other threads:[~2020-01-02 17:59 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-02 17:59 [dpdk-dev] [PATCH 1/2] net/qede: enhance receive " Shahed Shaikh
2020-01-02 17:59 ` Shahed Shaikh [this message]
2020-01-14 6:11 ` Jerin Jacob
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200102175903.9556-2-shshaikh@marvell.com \
--to=shshaikh@marvell.com \
--cc=GR-Everest-DPDK-Dev@marvell.com \
--cc=dev@dpdk.org \
--cc=jerinj@marvell.com \
--cc=rmody@marvell.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).