DPDK patches and discussions
 help / color / mirror / Atom feed
From: Bruce Richardson <bruce.richardson@intel.com>
To: dev@dpdk.org
Cc: Bruce Richardson <bruce.richardson@intel.com>,
	Anatoly Burakov <anatoly.burakov@intel.com>,
	Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Subject: [RFC PATCH 25/27] net/ixgbe: use separate array for desc status tracking
Date: Fri, 19 Dec 2025 17:25:42 +0000	[thread overview]
Message-ID: <20251219172548.2660777-26-bruce.richardson@intel.com> (raw)
In-Reply-To: <20251219172548.2660777-1-bruce.richardson@intel.com>

Due to significant differences in the ixgbe transmit descriptors, the
ixgbe driver does not use the common scalar Tx functionality. Update the
driver directly so its use of the rs_last_id array matches that of the
common Tx code.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/intel/ixgbe/ixgbe_rxtx.c | 86 +++++++++++++++-------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
index a7583c178a..3eeec220fd 100644
--- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c
@@ -43,6 +43,7 @@
 #include <rte_ip.h>
 #include <rte_net.h>
 #include <rte_vect.h>
+#include <rte_bitops.h>
 
 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -571,57 +572,35 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
 static inline int
 ixgbe_xmit_cleanup(struct ci_tx_queue *txq)
 {
-	struct ci_tx_entry *sw_ring = txq->sw_ring;
 	volatile union ixgbe_adv_tx_desc *txr = txq->ixgbe_tx_ring;
-	uint16_t last_desc_cleaned = txq->last_desc_cleaned;
-	uint16_t nb_tx_desc = txq->nb_tx_desc;
-	uint16_t desc_to_clean_to;
-	uint16_t nb_tx_to_clean;
-	uint32_t status;
+	const uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+	const uint16_t nb_tx_desc = txq->nb_tx_desc;
 
-	/* Determine the last descriptor needing to be cleaned */
-	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
-	if (desc_to_clean_to >= nb_tx_desc)
-		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+	const uint16_t rs_idx = (last_desc_cleaned == nb_tx_desc - 1) ?
+			0 :
+			(last_desc_cleaned + 1) >> txq->log2_rs_thresh;
+	uint16_t desc_to_clean_to = (rs_idx << txq->log2_rs_thresh) + (txq->tx_rs_thresh - 1);
 
-	/* Check to make sure the last descriptor to clean is done */
-	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
-	status = txr[desc_to_clean_to].wb.status;
+	uint32_t status = txr[txq->rs_last_id[rs_idx]].wb.status;
 	if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
 		PMD_TX_LOG(DEBUG,
 			   "TX descriptor %4u is not done"
 			   "(port=%d queue=%d)",
-			   desc_to_clean_to,
+			   txq->rs_last_id[rs_idx],
 			   txq->port_id, txq->queue_id);
 		/* Failed to clean any descriptors, better luck next time */
 		return -(1);
 	}
 
-	/* Figure out how many descriptors will be cleaned */
-	if (last_desc_cleaned > desc_to_clean_to)
-		nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
-							desc_to_clean_to);
-	else
-		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
-						last_desc_cleaned);
-
 	PMD_TX_LOG(DEBUG,
 		   "Cleaning %4u TX descriptors: %4u to %4u "
 		   "(port=%d queue=%d)",
-		   nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
+		   txq->tx_rs_thresh, last_desc_cleaned, desc_to_clean_to,
 		   txq->port_id, txq->queue_id);
 
-	/*
-	 * The last descriptor to clean is done, so that means all the
-	 * descriptors from the last descriptor that was cleaned
-	 * up to the last descriptor with the RS bit set
-	 * are done. Only reset the threshold descriptor.
-	 */
-	txr[desc_to_clean_to].wb.status = 0;
-
 	/* Update the txq to reflect the last descriptor that was cleaned */
 	txq->last_desc_cleaned = desc_to_clean_to;
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
 
 	/* No Error */
 	return 0;
@@ -749,6 +728,9 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		if (tx_last >= txq->nb_tx_desc)
 			tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
 
+		/* Track the RS threshold bucket at packet start */
+		uint16_t pkt_rs_idx = (uint16_t)(tx_id >> txq->log2_rs_thresh);
+
 		PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
 			   " tx_first=%u tx_last=%u",
 			   (unsigned) txq->port_id,
@@ -876,7 +858,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 					tx_offload,
 					rte_security_dynfield(tx_pkt));
 
-				txe->last_id = tx_last;
 				tx_id = txe->next_id;
 				txe = txn;
 			}
@@ -922,7 +903,6 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 				rte_cpu_to_le_32(cmd_type_len | slen);
 			txd->read.olinfo_status =
 				rte_cpu_to_le_32(olinfo_status);
-			txe->last_id = tx_last;
 			tx_id = txe->next_id;
 			txe = txn;
 			m_seg = m_seg->next;
@@ -935,8 +915,18 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
 		txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
 
-		/* Set RS bit only on threshold packets' last descriptor */
-		if (txq->nb_tx_used >= txq->tx_rs_thresh) {
+		/*
+		 * Check if packet crosses into a new RS threshold bucket.
+		 * The RS bit is set on the last descriptor when we move from one bucket to another.
+		 * For example, with tx_rs_thresh=32 and a 5-descriptor packet using slots 30-34:
+		 *   - pkt_rs_idx = 30 >> 5 = 0 (started in bucket 0)
+		 *   - tx_last = 34, so 35 >> 5 = 1 (next packet is in bucket 1)
+		 *   - Since 0 != 1, set RS bit on descriptor 34, and record rs_last_id[0] = 34
+		 */
+		uint16_t next_rs_idx = ((tx_last + 1) >> txq->log2_rs_thresh);
+
+		if (next_rs_idx != pkt_rs_idx) {
+			/* Packet crossed into a new bucket - set RS bit on last descriptor */
 			PMD_TX_LOG(DEBUG,
 				   "Setting RS bit on TXD id="
 				   "%4u (port=%d queue=%d)",
@@ -944,9 +934,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 			cmd_type_len |= IXGBE_TXD_CMD_RS;
 
-			/* Update txq RS bit counters */
-			txq->nb_tx_used = 0;
-			txp = NULL;
+			/* Record the last descriptor ID for the bucket we're leaving */
+			txq->rs_last_id[pkt_rs_idx] = tx_last;
 		} else
 			txp = txd;
 
@@ -2521,6 +2510,7 @@ ixgbe_tx_queue_release(struct ci_tx_queue *txq)
 	if (txq != NULL && txq->ops != NULL) {
 		ci_txq_release_all_mbufs(txq, false);
 		txq->ops->free_swring(txq);
+		rte_free(txq->rs_last_id);
 		rte_memzone_free(txq->mz);
 		rte_free(txq);
 	}
@@ -2825,6 +2815,13 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 			     (int)dev->data->port_id, (int)queue_idx);
 		return -(EINVAL);
 	}
+	if (!rte_is_power_of_2(tx_rs_thresh)) {
+		PMD_INIT_LOG(ERR, "tx_rs_thresh must be a power of 2. (tx_rs_thresh=%u port=%d queue=%d)",
+			     (unsigned int)tx_rs_thresh,
+			     (int)dev->data->port_id,
+			     (int)queue_idx);
+		return -(EINVAL);
+	}
 
 	/*
 	 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
@@ -2870,6 +2867,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->mz = tz;
 	txq->nb_tx_desc = nb_desc;
 	txq->tx_rs_thresh = tx_rs_thresh;
+	txq->log2_rs_thresh = rte_log2_u32(tx_rs_thresh);
 	txq->tx_free_thresh = tx_free_thresh;
 	txq->pthresh = tx_conf->tx_thresh.pthresh;
 	txq->hthresh = tx_conf->tx_thresh.hthresh;
@@ -2911,6 +2909,16 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
 		     txq->sw_ring, txq->ixgbe_tx_ring, txq->tx_ring_dma);
 
+	/* Allocate RS last_id tracking array */
+	uint16_t num_rs_buckets = nb_desc / tx_rs_thresh;
+	txq->rs_last_id = rte_zmalloc_socket(NULL, sizeof(txq->rs_last_id[0]) * num_rs_buckets,
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (txq->rs_last_id == NULL) {
+		ixgbe_tx_queue_release(txq);
+		PMD_DRV_LOG(ERR, "Failed to allocate memory for RS last_id array");
+		return -ENOMEM;
+	}
+
 	/* set up vector or scalar TX function as appropriate */
 	ixgbe_set_tx_function(dev, txq);
 
-- 
2.51.0


  parent reply	other threads:[~2025-12-19 17:28 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-19 17:25 [RFC PATCH 00/27] combine multiple Intel scalar Tx paths Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 01/27] net/intel: create common Tx descriptor structure Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 02/27] net/intel: use common tx ring structure Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 03/27] net/intel: create common post-Tx cleanup function Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 04/27] net/intel: consolidate definitions for Tx desc fields Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 05/27] net/intel: create separate header for Tx scalar fns Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 06/27] net/intel: add common fn to calculate needed descriptors Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 07/27] net/ice: refactor context descriptor handling Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 08/27] net/i40e: " Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 09/27] net/idpf: " Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 10/27] net/intel: consolidate checksum mask definition Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 11/27] net/intel: create common checksum Tx offload function Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 12/27] net/intel: create a common scalar Tx function Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 13/27] net/i40e: use " Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 14/27] net/intel: add IPSec hooks to common " Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 15/27] net/intel: support configurable VLAN tag insertion on Tx Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 16/27] net/iavf: use common scalar Tx function Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 17/27] net/i40e: document requirement for QinQ support Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 18/27] net/idpf: use common scalar Tx function Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 19/27] net/intel: avoid writing the final pkt descriptor twice Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 20/27] net/intel: write descriptors using non-volatile pointers Bruce Richardson
2025-12-20  8:43   ` Morten Brørup
2025-12-19 17:25 ` [RFC PATCH 21/27] net/intel: remove unnecessary flag clearing Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 22/27] net/intel: mark mid-burst ring cleanup as unlikely Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 23/27] net/intel: add special handling for single desc packets Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 24/27] net/intel: use separate array for desc status tracking Bruce Richardson
2025-12-19 17:25 ` Bruce Richardson [this message]
2025-12-19 17:25 ` [RFC PATCH 26/27] net/intel: drop unused Tx queue used count Bruce Richardson
2025-12-19 17:25 ` [RFC PATCH 27/27] net/intel: remove index for tracking end of packet Bruce Richardson
2025-12-20  9:05   ` Morten Brørup

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251219172548.2660777-26-bruce.richardson@intel.com \
    --to=bruce.richardson@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    --cc=vladimir.medvedkin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).