DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1] net/ice: improve performance of RX timestamp offload
@ 2022-02-22  5:16 Wenjun Wu
  2022-02-22  5:50 ` [PATCH v2] " Wenjun Wu
  0 siblings, 1 reply; 12+ messages in thread
From: Wenjun Wu @ 2022-02-22  5:16 UTC (permalink / raw)
  To: dev, qiming.yang, qi.z.zhang; +Cc: harry.van.haaren, simei.su, Wenjun Wu

Previously, everytime a burst of packets is received, SW reads HW
register and assembles it and the timestamp from descriptor together to
get the complete 64 bits timestamp.

This patch optimizes the algorithm. The SW only needs to check the
monotonicity of the low 32bits timestamp to avoid crossing borders.
Everytime before SW receives a burst of packets, it should check the
time difference between current time and last update time to avoid
the low 32 bits timestamp cycling twice.

Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
---
 drivers/net/ice/ice_ethdev.h |   3 +
 drivers/net/ice/ice_rxtx.c   | 126 ++++++++++++++++++++++++++---------
 2 files changed, 98 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ed580d438..6778941d7d 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -554,6 +554,9 @@ struct ice_adapter {
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
 	uint64_t time_hw;
+	uint32_t hw_time_high; /* high 32 bits of timestamp */
+	uint32_t hw_time_low; /* low 32 bits of timestamp */
+	uint64_t hw_time_update; /* SW time of HW record updating */
 	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
 	struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
 	/* True if DCF state of the associated PF is on */
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 4f218bcd0d..33dd2195e1 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1576,7 +1576,6 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1588,8 +1587,15 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
 
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
@@ -1625,14 +1631,25 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 			if (ice_timestamp_dynflag > 0) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-					rxq->hw_register_set,
-					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				rxq->hw_register_set = 0;
+				rxq->time_high =
+				rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high);
 				*RTE_MBUF_DYNFIELD(mb,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				mb->ol_flags |= ice_timestamp_dynflag;
+						   ice_timestamp_dynfield_offset,
+						   uint32_t *) = rxq->time_high;
+				if (rxq->time_high > ad->hw_time_low)
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+				else {
+					ad->hw_time_high += 1;
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+					ad->hw_time_update =
+						rte_get_timer_cycles() /
+						(rte_get_timer_hz() / 1000);
+				}
+				pkt_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1657,6 +1674,11 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			break;
 	}
 
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rxq->rx_stage[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+
 	/* Clear software ring entries */
 	for (i = 0; i < nb_rx; i++)
 		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
@@ -1833,12 +1855,18 @@ ice_recv_scattered_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1951,14 +1979,25 @@ ice_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(first_seg,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			first_seg->ol_flags |= ice_timestamp_dynflag;
+			rxq->time_high =
+			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
+			*RTE_MBUF_DYNFIELD(rxm,
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low)
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -1977,6 +2016,11 @@ ice_recv_scattered_pkts(void *rx_queue,
 		first_seg = NULL;
 	}
 
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+
 	/* Record index of the next RX descriptor to probe. */
 	rxq->rx_tail = rx_id;
 	rxq->pkt_first_seg = first_seg;
@@ -2327,13 +2371,18 @@ ice_recv_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
 
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2386,14 +2435,24 @@ ice_recv_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
+			rxq->time_high = rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
 			*RTE_MBUF_DYNFIELD(rxm,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= ice_timestamp_dynflag;
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low)
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
@@ -2408,6 +2467,11 @@ ice_recv_pkts(void *rx_queue,
 		/* copy old mbuf to rx_pkts */
 		rx_pkts[nb_rx++] = rxm;
 	}
+
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
 	rxq->rx_tail = rx_id;
 	/**
 	 * If the number of free RX descriptors is greater than the RX free
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-03-24 13:57 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-22  5:16 [PATCH v1] net/ice: improve performance of RX timestamp offload Wenjun Wu
2022-02-22  5:50 ` [PATCH v2] " Wenjun Wu
2022-02-22  6:26   ` [PATCH v3] " Wenjun Wu
2022-02-28  7:36     ` [PATCH v4] " Wenjun Wu
2022-03-01 11:07       ` Zhang, Qi Z
2022-03-24  9:09       ` Zhang, Qi Z
2022-03-24 11:16         ` Kevin Traynor
2022-03-24 11:51           ` Zhang, Qi Z
2022-03-24 12:17             ` Kevin Traynor
2022-03-24 13:05               ` Zhang, Qi Z
2022-03-24 13:44                 ` Kevin Traynor
2022-03-24 13:57                   ` Zhang, Qi Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).