DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1] net/ice: improve performance of RX timestamp offload
@ 2022-02-22  5:16 Wenjun Wu
  2022-02-22  5:50 ` [PATCH v2] " Wenjun Wu
  0 siblings, 1 reply; 12+ messages in thread
From: Wenjun Wu @ 2022-02-22  5:16 UTC (permalink / raw)
  To: dev, qiming.yang, qi.z.zhang; +Cc: harry.van.haaren, simei.su, Wenjun Wu

Previously, everytime a burst of packets is received, SW reads HW
register and assembles it and the timestamp from descriptor together to
get the complete 64 bits timestamp.

This patch optimizes the algorithm. The SW only needs to check the
monotonicity of the low 32bits timestamp to avoid crossing borders.
Everytime before SW receives a burst of packets, it should check the
time difference between current time and last update time to avoid
the low 32 bits timestamp cycling twice.

Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
---
 drivers/net/ice/ice_ethdev.h |   3 +
 drivers/net/ice/ice_rxtx.c   | 126 ++++++++++++++++++++++++++---------
 2 files changed, 98 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ed580d438..6778941d7d 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -554,6 +554,9 @@ struct ice_adapter {
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
 	uint64_t time_hw;
+	uint32_t hw_time_high; /* high 32 bits of timestamp */
+	uint32_t hw_time_low; /* low 32 bits of timestamp */
+	uint64_t hw_time_update; /* SW time of HW record updating */
 	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
 	struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
 	/* True if DCF state of the associated PF is on */
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 4f218bcd0d..33dd2195e1 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1576,7 +1576,6 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1588,8 +1587,15 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
 
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
@@ -1625,14 +1631,25 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 			if (ice_timestamp_dynflag > 0) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-					rxq->hw_register_set,
-					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				rxq->hw_register_set = 0;
+				rxq->time_high =
+				rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high);
 				*RTE_MBUF_DYNFIELD(mb,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				mb->ol_flags |= ice_timestamp_dynflag;
+						   ice_timestamp_dynfield_offset,
+						   uint32_t *) = rxq->time_high;
+				if (rxq->time_high > ad->hw_time_low)
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+				else {
+					ad->hw_time_high += 1;
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+					ad->hw_time_update =
+						rte_get_timer_cycles() /
+						(rte_get_timer_hz() / 1000);
+				}
+				pkt_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1657,6 +1674,11 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			break;
 	}
 
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rxq->rx_stage[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+
 	/* Clear software ring entries */
 	for (i = 0; i < nb_rx; i++)
 		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
@@ -1833,12 +1855,18 @@ ice_recv_scattered_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1951,14 +1979,25 @@ ice_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(first_seg,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			first_seg->ol_flags |= ice_timestamp_dynflag;
+			rxq->time_high =
+			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
+			*RTE_MBUF_DYNFIELD(rxm,
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low)
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -1977,6 +2016,11 @@ ice_recv_scattered_pkts(void *rx_queue,
 		first_seg = NULL;
 	}
 
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+
 	/* Record index of the next RX descriptor to probe. */
 	rxq->rx_tail = rx_id;
 	rxq->pkt_first_seg = first_seg;
@@ -2327,13 +2371,18 @@ ice_recv_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
 
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2386,14 +2435,24 @@ ice_recv_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
+			rxq->time_high = rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
 			*RTE_MBUF_DYNFIELD(rxm,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= ice_timestamp_dynflag;
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low)
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
@@ -2408,6 +2467,11 @@ ice_recv_pkts(void *rx_queue,
 		/* copy old mbuf to rx_pkts */
 		rx_pkts[nb_rx++] = rxm;
 	}
+
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
 	rxq->rx_tail = rx_id;
 	/**
 	 * If the number of free RX descriptors is greater than the RX free
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] net/ice: improve performance of RX timestamp offload
  2022-02-22  5:16 [PATCH v1] net/ice: improve performance of RX timestamp offload Wenjun Wu
@ 2022-02-22  5:50 ` Wenjun Wu
  2022-02-22  6:26   ` [PATCH v3] " Wenjun Wu
  0 siblings, 1 reply; 12+ messages in thread
From: Wenjun Wu @ 2022-02-22  5:50 UTC (permalink / raw)
  To: dev, qiming.yang, qi.z.zhang; +Cc: harry.van.haaren, simei.su, Wenjun Wu

Previously, each time a burst of packets is received, SW reads HW
register and assembles it and the timestamp from descriptor together to
get the complete 64 bits timestamp.

This patch optimizes the algorithm. The SW only needs to check the
monotonicity of the low 32bits timestamp to avoid crossing borders.
Each time before SW receives a burst of packets, it should check the
time difference between current time and last update time to avoid
the low 32 bits timestamp cycling twice.

Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>

---
v2: add conditional compilation
---
 drivers/net/ice/ice_ethdev.h |   3 +
 drivers/net/ice/ice_rxtx.c   | 133 ++++++++++++++++++++++++++---------
 2 files changed, 103 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ed580d438..6778941d7d 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -554,6 +554,9 @@ struct ice_adapter {
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
 	uint64_t time_hw;
+	uint32_t hw_time_high; /* high 32 bits of timestamp */
+	uint32_t hw_time_low; /* low 32 bits of timestamp */
+	uint64_t hw_time_update; /* SW time of HW record updating */
 	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
 	struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
 	/* True if DCF state of the associated PF is on */
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 4f218bcd0d..6bb15ee825 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1576,7 +1576,6 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1588,8 +1587,17 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
+#endif
 
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
@@ -1625,14 +1633,25 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 			if (ice_timestamp_dynflag > 0) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-					rxq->hw_register_set,
-					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				rxq->hw_register_set = 0;
+				rxq->time_high =
+				rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high);
 				*RTE_MBUF_DYNFIELD(mb,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				mb->ol_flags |= ice_timestamp_dynflag;
+						   ice_timestamp_dynfield_offset,
+						   uint32_t *) = rxq->time_high;
+				if (rxq->time_high > ad->hw_time_low)
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+				else {
+					ad->hw_time_high += 1;
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+					ad->hw_time_update =
+						rte_get_timer_cycles() /
+						(rte_get_timer_hz() / 1000);
+				}
+				pkt_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1657,6 +1676,11 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			break;
 	}
 
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rxq->rx_stage[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+
 	/* Clear software ring entries */
 	for (i = 0; i < nb_rx; i++)
 		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
@@ -1833,12 +1857,18 @@ ice_recv_scattered_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
-#endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
+#endif
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1951,14 +1981,25 @@ ice_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(first_seg,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			first_seg->ol_flags |= ice_timestamp_dynflag;
+			rxq->time_high =
+			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
+			*RTE_MBUF_DYNFIELD(rxm,
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low)
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -1977,6 +2018,11 @@ ice_recv_scattered_pkts(void *rx_queue,
 		first_seg = NULL;
 	}
 
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+
 	/* Record index of the next RX descriptor to probe. */
 	rxq->rx_tail = rx_id;
 	rxq->pkt_first_seg = first_seg;
@@ -2327,12 +2373,18 @@ ice_recv_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
-#endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
+#endif
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -2386,14 +2438,24 @@ ice_recv_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
+			rxq->time_high = rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
 			*RTE_MBUF_DYNFIELD(rxm,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= ice_timestamp_dynflag;
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low)
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
@@ -2408,6 +2470,11 @@ ice_recv_pkts(void *rx_queue,
 		/* copy old mbuf to rx_pkts */
 		rx_pkts[nb_rx++] = rxm;
 	}
+
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
 	rxq->rx_tail = rx_id;
 	/**
 	 * If the number of free RX descriptors is greater than the RX free
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3] net/ice: improve performance of RX timestamp offload
  2022-02-22  5:50 ` [PATCH v2] " Wenjun Wu
@ 2022-02-22  6:26   ` Wenjun Wu
  2022-02-28  7:36     ` [PATCH v4] " Wenjun Wu
  0 siblings, 1 reply; 12+ messages in thread
From: Wenjun Wu @ 2022-02-22  6:26 UTC (permalink / raw)
  To: dev, qiming.yang, qi.z.zhang; +Cc: harry.van.haaren, simei.su, Wenjun Wu

Previously, each time a burst of packets is received, SW reads HW
register and assembles it and the timestamp from descriptor together to
get the complete 64 bits timestamp.

This patch optimizes the algorithm. The SW only needs to check the
monotonicity of the low 32bits timestamp to avoid crossing borders.
Each time before SW receives a burst of packets, it should check the
time difference between current time and last update time to avoid
the low 32 bits timestamp cycling twice.

Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>

---
v3: add missing conditional compilation
v2: add conditional compilation
---
 drivers/net/ice/ice_ethdev.h |   3 +
 drivers/net/ice/ice_rxtx.c   | 140 ++++++++++++++++++++++++++---------
 2 files changed, 110 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ed580d438..6778941d7d 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -554,6 +554,9 @@ struct ice_adapter {
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
 	uint64_t time_hw;
+	uint32_t hw_time_high; /* high 32 bits of timestamp */
+	uint32_t hw_time_low; /* low 32 bits of timestamp */
+	uint64_t hw_time_update; /* SW time of HW record updating */
 	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
 	struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
 	/* True if DCF state of the associated PF is on */
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 4f218bcd0d..981347b618 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1576,7 +1576,6 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1588,8 +1587,17 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
+#endif
 
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
@@ -1625,14 +1633,25 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 			if (ice_timestamp_dynflag > 0) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-					rxq->hw_register_set,
-					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				rxq->hw_register_set = 0;
+				rxq->time_high =
+				rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high);
 				*RTE_MBUF_DYNFIELD(mb,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				mb->ol_flags |= ice_timestamp_dynflag;
+						   ice_timestamp_dynfield_offset,
+						   uint32_t *) = rxq->time_high;
+				if (rxq->time_high > ad->hw_time_low) {
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+				} else {
+					ad->hw_time_high += 1;
+					*RTE_MBUF_DYNFIELD(mb,
+						(ice_timestamp_dynfield_offset + 4),
+						uint32_t *) = ad->hw_time_high;
+					ad->hw_time_update =
+						rte_get_timer_cycles() /
+						(rte_get_timer_hz() / 1000);
+				}
+				pkt_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1657,6 +1676,13 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			break;
 	}
 
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rxq->rx_stage[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+#endif
+
 	/* Clear software ring entries */
 	for (i = 0; i < nb_rx; i++)
 		rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
@@ -1833,12 +1859,18 @@ ice_recv_scattered_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
-#endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
+#endif
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1951,14 +1983,25 @@ ice_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(first_seg,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			first_seg->ol_flags |= ice_timestamp_dynflag;
+			rxq->time_high =
+			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
+			*RTE_MBUF_DYNFIELD(rxm,
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low) {
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			} else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -1977,6 +2020,13 @@ ice_recv_scattered_pkts(void *rx_queue,
 		first_seg = NULL;
 	}
 
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+#endif
+
 	/* Record index of the next RX descriptor to probe. */
 	rxq->rx_tail = rx_id;
 	rxq->pkt_first_seg = first_seg;
@@ -2327,12 +2377,18 @@ ice_recv_pkts(void *rx_queue,
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
-#endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (sw_cur_time - ad->hw_time_update > 4) {
+			ad->hw_time_high = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+			ad->hw_time_low = 0;
+			ad->hw_time_update = sw_cur_time;
+		}
+	}
+#endif
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -2386,14 +2442,24 @@ ice_recv_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
+			rxq->time_high = rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
 			*RTE_MBUF_DYNFIELD(rxm,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= ice_timestamp_dynflag;
+					   ice_timestamp_dynfield_offset,
+					   uint32_t *) = rxq->time_high;
+			if (rxq->time_high > ad->hw_time_low) {
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+			} else {
+				ad->hw_time_high += 1;
+				*RTE_MBUF_DYNFIELD(rxm,
+					(ice_timestamp_dynfield_offset + 4),
+					uint32_t *) = ad->hw_time_high;
+				ad->hw_time_update =
+					rte_get_timer_cycles() /
+					(rte_get_timer_hz() / 1000);
+			}
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
@@ -2408,6 +2474,14 @@ ice_recv_pkts(void *rx_queue,
 		/* copy old mbuf to rx_pkts */
 		rx_pkts[nb_rx++] = rxm;
 	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (nb_rx > 0 && rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		ad->hw_time_low = *RTE_MBUF_DYNFIELD(rx_pkts[nb_rx - 1],
+					ice_timestamp_dynfield_offset,
+					uint32_t *);
+#endif
+
 	rxq->rx_tail = rx_id;
 	/**
 	 * If the number of free RX descriptors is greater than the RX free
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-02-22  6:26   ` [PATCH v3] " Wenjun Wu
@ 2022-02-28  7:36     ` Wenjun Wu
  2022-03-01 11:07       ` Zhang, Qi Z
  2022-03-24  9:09       ` Zhang, Qi Z
  0 siblings, 2 replies; 12+ messages in thread
From: Wenjun Wu @ 2022-02-28  7:36 UTC (permalink / raw)
  To: dev, qi.z.zhang, qiming.yang; +Cc: harry.van.haaren, simei.su, Wenjun Wu

Previously, each time a burst of packets is received, SW reads HW
register and assembles it and the timestamp from descriptor together to
get the complete 64 bits timestamp.

This patch optimizes the algorithm. The SW only needs to check the
monotonicity of the low 32bits timestamp to avoid crossing borders.
Each time before SW receives a burst of packets, it should check the
time difference between current time and last update time to avoid
the low 32 bits timestamp cycling twice.

Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>

---
v4: rework initialization behavior
v3: add missing conditional compilation
v2: add conditional compilation
---
 drivers/net/ice/ice_ethdev.h |   3 +
 drivers/net/ice/ice_rxtx.c   | 118 +++++++++++++++++++++++++----------
 2 files changed, 88 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ed580d438..6778941d7d 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -554,6 +554,9 @@ struct ice_adapter {
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
 	uint64_t time_hw;
+	uint32_t hw_time_high; /* high 32 bits of timestamp */
+	uint32_t hw_time_low; /* low 32 bits of timestamp */
+	uint64_t hw_time_update; /* SW time of HW record updating */
 	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
 	struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
 	/* True if DCF state of the associated PF is on */
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 4f218bcd0d..4b0bcd4863 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1574,9 +1574,10 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	uint64_t pkt_flags = 0;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	bool is_tsinit = false;
+	uint64_t ts_ns;
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
 	rxdp = &rxq->rx_ring[rxq->rx_tail];
@@ -1588,8 +1589,14 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (unlikely(sw_cur_time - ad->hw_time_update > 4))
+			is_tsinit = 1;
+	}
+#endif
 
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
@@ -1625,14 +1632,26 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 			if (ice_timestamp_dynflag > 0) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-					rxq->hw_register_set,
-					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				rxq->hw_register_set = 0;
+				rxq->time_high =
+				rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high);
+				if (unlikely(is_tsinit)) {
+					ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1,
+									   rxq->time_high);
+					ad->hw_time_low = (uint32_t)ts_ns;
+					ad->hw_time_high = (uint32_t)(ts_ns >> 32);
+					is_tsinit = false;
+				} else {
+					if (rxq->time_high < ad->hw_time_low)
+						ad->hw_time_high += 1;
+					ts_ns = (uint64_t)ad->hw_time_high << 32 | rxq->time_high;
+					ad->hw_time_low = rxq->time_high;
+				}
+				ad->hw_time_update = rte_get_timer_cycles() /
+						     (rte_get_timer_hz() / 1000);
 				*RTE_MBUF_DYNFIELD(mb,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				mb->ol_flags |= ice_timestamp_dynflag;
+						   ice_timestamp_dynfield_offset,
+						   rte_mbuf_timestamp_t *) = ts_ns;
+				pkt_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1831,14 +1850,19 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint64_t pkt_flags;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	bool is_tsinit = false;
+	uint64_t ts_ns;
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
-#endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (unlikely(sw_cur_time - ad->hw_time_update > 4))
+			is_tsinit = true;
+	}
+#endif
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -1951,14 +1975,25 @@ ice_recv_scattered_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
-			*RTE_MBUF_DYNFIELD(first_seg,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			first_seg->ol_flags |= ice_timestamp_dynflag;
+			rxq->time_high =
+			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
+			if (unlikely(is_tsinit)) {
+				ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, rxq->time_high);
+				ad->hw_time_low = (uint32_t)ts_ns;
+				ad->hw_time_high = (uint32_t)(ts_ns >> 32);
+				is_tsinit = false;
+			} else {
+				if (rxq->time_high < ad->hw_time_low)
+					ad->hw_time_high += 1;
+				ts_ns = (uint64_t)ad->hw_time_high << 32 | rxq->time_high;
+				ad->hw_time_low = rxq->time_high;
+			}
+			ad->hw_time_update = rte_get_timer_cycles() /
+					     (rte_get_timer_hz() / 1000);
+			*RTE_MBUF_DYNFIELD(rxm,
+					   (ice_timestamp_dynfield_offset),
+					   rte_mbuf_timestamp_t *) = ts_ns;
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -2325,14 +2360,19 @@ ice_recv_pkts(void *rx_queue,
 	uint64_t pkt_flags;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	bool is_tsinit = false;
+	uint64_t ts_ns;
 	struct ice_vsi *vsi = rxq->vsi;
 	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
-	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
-#endif
 
-	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-		rxq->hw_register_set = 1;
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);
+
+		if (unlikely(sw_cur_time - ad->hw_time_update > 4))
+			is_tsinit = 1;
+	}
+#endif
 
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
@@ -2386,14 +2426,25 @@ ice_recv_pkts(void *rx_queue,
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
 		if (ice_timestamp_dynflag > 0) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
-				rxq->hw_register_set,
-				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			rxq->hw_register_set = 0;
+			rxq->time_high =
+			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
+			if (unlikely(is_tsinit)) {
+				ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, rxq->time_high);
+				ad->hw_time_low = (uint32_t)ts_ns;
+				ad->hw_time_high = (uint32_t)(ts_ns >> 32);
+				is_tsinit = false;
+			} else {
+				if (rxq->time_high < ad->hw_time_low)
+					ad->hw_time_high += 1;
+				ts_ns = (uint64_t)ad->hw_time_high << 32 | rxq->time_high;
+				ad->hw_time_low = rxq->time_high;
+			}
+			ad->hw_time_update = rte_get_timer_cycles() /
+					     (rte_get_timer_hz() / 1000);
 			*RTE_MBUF_DYNFIELD(rxm,
-				ice_timestamp_dynfield_offset,
-				rte_mbuf_timestamp_t *) = ts_ns;
-			rxm->ol_flags |= ice_timestamp_dynflag;
+					   (ice_timestamp_dynfield_offset),
+					   rte_mbuf_timestamp_t *) = ts_ns;
+			pkt_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
@@ -2408,6 +2459,7 @@ ice_recv_pkts(void *rx_queue,
 		/* copy old mbuf to rx_pkts */
 		rx_pkts[nb_rx++] = rxm;
 	}
+
 	rxq->rx_tail = rx_id;
 	/**
 	 * If the number of free RX descriptors is greater than the RX free
-- 
2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-02-28  7:36     ` [PATCH v4] " Wenjun Wu
@ 2022-03-01 11:07       ` Zhang, Qi Z
  2022-03-24  9:09       ` Zhang, Qi Z
  1 sibling, 0 replies; 12+ messages in thread
From: Zhang, Qi Z @ 2022-03-01 11:07 UTC (permalink / raw)
  To: Wu, Wenjun1, dev, Yang, Qiming; +Cc: Van Haaren, Harry, Su, Simei



> -----Original Message-----
> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> Sent: Monday, February 28, 2022 3:36 PM
> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
> <qiming.yang@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
> Subject: [PATCH v4] net/ice: improve performance of RX timestamp offload
> 
> Previously, each time a burst of packets is received, SW reads HW register
> and assembles it and the timestamp from descriptor together to get the
> complete 64 bits timestamp.
> 
> This patch optimizes the algorithm. The SW only needs to check the
> monotonicity of the low 32bits timestamp to avoid crossing borders.
> Each time before SW receives a burst of packets, it should check the time
> difference between current time and last update time to avoid the low 32
> bits timestamp cycling twice.

Overall, the patch looks good to me and we can cc-stable for LTS
but I'd like to defer this to the next release as we are close to the release date and don't want to take the risk to merge complex changes at this moment.

Regards
Qi
> 
> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
> 
> ---
> v4: rework initialization behavior
> v3: add missing conditional compilation
> v2: add conditional compilation
> ---
>  drivers/net/ice/ice_ethdev.h |   3 +
>  drivers/net/ice/ice_rxtx.c   | 118 +++++++++++++++++++++++++----------
>  2 files changed, 88 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h index
> 3ed580d438..6778941d7d 100644
> --- a/drivers/net/ice/ice_ethdev.h
> +++ b/drivers/net/ice/ice_ethdev.h
> @@ -554,6 +554,9 @@ struct ice_adapter {
>  	struct rte_timecounter tx_tstamp_tc;
>  	bool ptp_ena;
>  	uint64_t time_hw;
> +	uint32_t hw_time_high; /* high 32 bits of timestamp */
> +	uint32_t hw_time_low; /* low 32 bits of timestamp */
> +	uint64_t hw_time_update; /* SW time of HW record updating */
>  	struct ice_fdir_prof_info fdir_prof_info[ICE_MAX_PTGS];
>  	struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
>  	/* True if DCF state of the associated PF is on */ diff --git
> a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index
> 4f218bcd0d..4b0bcd4863 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -1574,9 +1574,10 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>  	uint64_t pkt_flags = 0;
>  	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;  #ifndef
> RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +	bool is_tsinit = false;
> +	uint64_t ts_ns;
>  	struct ice_vsi *vsi = rxq->vsi;
>  	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
> -	uint64_t ts_ns;
>  	struct ice_adapter *ad = rxq->vsi->adapter;  #endif
>  	rxdp = &rxq->rx_ring[rxq->rx_tail];
> @@ -1588,8 +1589,14 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>  	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
>  		return 0;
> 
> -	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> -		rxq->hw_register_set = 1;
> +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> +		uint64_t sw_cur_time = rte_get_timer_cycles() /
> (rte_get_timer_hz() /
> +1000);
> +
> +		if (unlikely(sw_cur_time - ad->hw_time_update > 4))
> +			is_tsinit = 1;
> +	}
> +#endif
> 
>  	/**
>  	 * Scan LOOK_AHEAD descriptors at a time to determine which @@ -
> 1625,14 +1632,26 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>  			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
> #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
>  			if (ice_timestamp_dynflag > 0) {
> -				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> -					rxq->hw_register_set,
> -
> 	rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
> -				rxq->hw_register_set = 0;
> +				rxq->time_high =
> +				rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high);
> +				if (unlikely(is_tsinit)) {
> +					ts_ns =
> ice_tstamp_convert_32b_64b(hw, ad, 1,
> +									   rxq-
> >time_high);
> +					ad->hw_time_low = (uint32_t)ts_ns;
> +					ad->hw_time_high =
> (uint32_t)(ts_ns >> 32);
> +					is_tsinit = false;
> +				} else {
> +					if (rxq->time_high < ad-
> >hw_time_low)
> +						ad->hw_time_high += 1;
> +					ts_ns = (uint64_t)ad->hw_time_high
> << 32 | rxq->time_high;
> +					ad->hw_time_low = rxq->time_high;
> +				}
> +				ad->hw_time_update = rte_get_timer_cycles()
> /
> +						     (rte_get_timer_hz() /
> 1000);
>  				*RTE_MBUF_DYNFIELD(mb,
> -					ice_timestamp_dynfield_offset,
> -					rte_mbuf_timestamp_t *) = ts_ns;
> -				mb->ol_flags |= ice_timestamp_dynflag;
> +
> ice_timestamp_dynfield_offset,
> +						   rte_mbuf_timestamp_t *) =
> ts_ns;
> +				pkt_flags |= ice_timestamp_dynflag;
>  			}
> 
>  			if (ad->ptp_ena && ((mb->packet_type & @@ -
> 1831,14 +1850,19 @@ ice_recv_scattered_pkts(void *rx_queue,
>  	uint64_t pkt_flags;
>  	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;  #ifndef
> RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +	bool is_tsinit = false;
> +	uint64_t ts_ns;
>  	struct ice_vsi *vsi = rxq->vsi;
>  	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
> -	uint64_t ts_ns;
>  	struct ice_adapter *ad = rxq->vsi->adapter; -#endif
> 
> -	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> -		rxq->hw_register_set = 1;
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> +		uint64_t sw_cur_time = rte_get_timer_cycles() /
> (rte_get_timer_hz() /
> +1000);
> +
> +		if (unlikely(sw_cur_time - ad->hw_time_update > 4))
> +			is_tsinit = true;
> +	}
> +#endif
> 
>  	while (nb_rx < nb_pkts) {
>  		rxdp = &rx_ring[rx_id];
> @@ -1951,14 +1975,25 @@ ice_recv_scattered_pkts(void *rx_queue,
>  		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
>  #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
>  		if (ice_timestamp_dynflag > 0) {
> -			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> -				rxq->hw_register_set,
> -				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
> -			rxq->hw_register_set = 0;
> -			*RTE_MBUF_DYNFIELD(first_seg,
> -				ice_timestamp_dynfield_offset,
> -				rte_mbuf_timestamp_t *) = ts_ns;
> -			first_seg->ol_flags |= ice_timestamp_dynflag;
> +			rxq->time_high =
> +			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
> +			if (unlikely(is_tsinit)) {
> +				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> 1, rxq->time_high);
> +				ad->hw_time_low = (uint32_t)ts_ns;
> +				ad->hw_time_high = (uint32_t)(ts_ns >> 32);
> +				is_tsinit = false;
> +			} else {
> +				if (rxq->time_high < ad->hw_time_low)
> +					ad->hw_time_high += 1;
> +				ts_ns = (uint64_t)ad->hw_time_high << 32 |
> rxq->time_high;
> +				ad->hw_time_low = rxq->time_high;
> +			}
> +			ad->hw_time_update = rte_get_timer_cycles() /
> +					     (rte_get_timer_hz() / 1000);
> +			*RTE_MBUF_DYNFIELD(rxm,
> +					   (ice_timestamp_dynfield_offset),
> +					   rte_mbuf_timestamp_t *) = ts_ns;
> +			pkt_flags |= ice_timestamp_dynflag;
>  		}
> 
>  		if (ad->ptp_ena && ((first_seg->packet_type &
> RTE_PTYPE_L2_MASK) @@ -2325,14 +2360,19 @@ ice_recv_pkts(void
> *rx_queue,
>  	uint64_t pkt_flags;
>  	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;  #ifndef
> RTE_LIBRTE_ICE_16BYTE_RX_DESC
> +	bool is_tsinit = false;
> +	uint64_t ts_ns;
>  	struct ice_vsi *vsi = rxq->vsi;
>  	struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
> -	uint64_t ts_ns;
>  	struct ice_adapter *ad = rxq->vsi->adapter; -#endif
> 
> -	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> -		rxq->hw_register_set = 1;
> +	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> +		uint64_t sw_cur_time = rte_get_timer_cycles() /
> (rte_get_timer_hz() /
> +1000);
> +
> +		if (unlikely(sw_cur_time - ad->hw_time_update > 4))
> +			is_tsinit = 1;
> +	}
> +#endif
> 
>  	while (nb_rx < nb_pkts) {
>  		rxdp = &rx_ring[rx_id];
> @@ -2386,14 +2426,25 @@ ice_recv_pkts(void *rx_queue,
>  		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
>  #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
>  		if (ice_timestamp_dynflag > 0) {
> -			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> -				rxq->hw_register_set,
> -				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
> -			rxq->hw_register_set = 0;
> +			rxq->time_high =
> +			   rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
> +			if (unlikely(is_tsinit)) {
> +				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> 1, rxq->time_high);
> +				ad->hw_time_low = (uint32_t)ts_ns;
> +				ad->hw_time_high = (uint32_t)(ts_ns >> 32);
> +				is_tsinit = false;
> +			} else {
> +				if (rxq->time_high < ad->hw_time_low)
> +					ad->hw_time_high += 1;
> +				ts_ns = (uint64_t)ad->hw_time_high << 32 |
> rxq->time_high;
> +				ad->hw_time_low = rxq->time_high;
> +			}
> +			ad->hw_time_update = rte_get_timer_cycles() /
> +					     (rte_get_timer_hz() / 1000);
>  			*RTE_MBUF_DYNFIELD(rxm,
> -				ice_timestamp_dynfield_offset,
> -				rte_mbuf_timestamp_t *) = ts_ns;
> -			rxm->ol_flags |= ice_timestamp_dynflag;
> +					   (ice_timestamp_dynfield_offset),
> +					   rte_mbuf_timestamp_t *) = ts_ns;
> +			pkt_flags |= ice_timestamp_dynflag;
>  		}
> 
>  		if (ad->ptp_ena && ((rxm->packet_type &
> RTE_PTYPE_L2_MASK) == @@ -2408,6 +2459,7 @@ ice_recv_pkts(void
> *rx_queue,
>  		/* copy old mbuf to rx_pkts */
>  		rx_pkts[nb_rx++] = rxm;
>  	}
> +
>  	rxq->rx_tail = rx_id;
>  	/**
>  	 * If the number of free RX descriptors is greater than the RX free
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-02-28  7:36     ` [PATCH v4] " Wenjun Wu
  2022-03-01 11:07       ` Zhang, Qi Z
@ 2022-03-24  9:09       ` Zhang, Qi Z
  2022-03-24 11:16         ` Kevin Traynor
  1 sibling, 1 reply; 12+ messages in thread
From: Zhang, Qi Z @ 2022-03-24  9:09 UTC (permalink / raw)
  To: Wu, Wenjun1, dev, Yang, Qiming; +Cc: Van Haaren, Harry, Su, Simei



> -----Original Message-----
> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> Sent: Monday, February 28, 2022 3:36 PM
> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
> <qiming.yang@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
> Subject: [PATCH v4] net/ice: improve performance of RX timestamp offload
> 
> Previously, each time a burst of packets is received, SW reads HW register
> and assembles it and the timestamp from descriptor together to get the
> complete 64 bits timestamp.
> 
> This patch optimizes the algorithm. The SW only needs to check the
> monotonicity of the low 32bits timestamp to avoid crossing borders.
> Each time before SW receives a burst of packets, it should check the time
> difference between current time and last update time to avoid the low 32
> bits timestamp cycling twice.
> 
> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>

Added cc stable

Acked-by: Qi Zhang <qi.z.zhang@intel.com>

Applied to dpdk-next-net-intel.

Thanks
Qi


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-03-24  9:09       ` Zhang, Qi Z
@ 2022-03-24 11:16         ` Kevin Traynor
  2022-03-24 11:51           ` Zhang, Qi Z
  0 siblings, 1 reply; 12+ messages in thread
From: Kevin Traynor @ 2022-03-24 11:16 UTC (permalink / raw)
  To: Zhang, Qi Z, Wu, Wenjun1, dev, Yang, Qiming
  Cc: Van Haaren, Harry, Su, Simei, Luca Boccassi, Christian Ehrhardt

On 24/03/2022 09:09, Zhang, Qi Z wrote:
> 
> 
>> -----Original Message-----
>> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
>> Sent: Monday, February 28, 2022 3:36 PM
>> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
>> <qiming.yang@intel.com>
>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
>> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
>> Subject: [PATCH v4] net/ice: improve performance of RX timestamp offload
>>
>> Previously, each time a burst of packets is received, SW reads HW register
>> and assembles it and the timestamp from descriptor together to get the
>> complete 64 bits timestamp.
>>
>> This patch optimizes the algorithm. The SW only needs to check the
>> monotonicity of the low 32bits timestamp to avoid crossing borders.
>> Each time before SW receives a burst of packets, it should check the time
>> difference between current time and last update time to avoid the low 32
>> bits timestamp cycling twice.
>>
>> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
> 
> Added cc stable
> 

Hi Qi. The DPDK documentation has guidance about what should be 
backported to LTS [0] and distinguishes between fixes and performance 
improvements. Please try and stick with this when applying patches or 
let LTS maintainers know if there is a debatable case.

thanks,
Kevin.

[0] 
http://doc.dpdk.org/guides-21.11/contributing/stable.html#what-changes-should-be-backported

> Acked-by: Qi Zhang <qi.z.zhang@intel.com>
> 
> Applied to dpdk-next-net-intel.
> 
> Thanks
> Qi
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-03-24 11:16         ` Kevin Traynor
@ 2022-03-24 11:51           ` Zhang, Qi Z
  2022-03-24 12:17             ` Kevin Traynor
  0 siblings, 1 reply; 12+ messages in thread
From: Zhang, Qi Z @ 2022-03-24 11:51 UTC (permalink / raw)
  To: Kevin Traynor, Wu, Wenjun1, dev, Yang, Qiming
  Cc: Van Haaren, Harry, Su, Simei, Luca Boccassi, Christian Ehrhardt



> -----Original Message-----
> From: Kevin Traynor <ktraynor@redhat.com>
> Sent: Thursday, March 24, 2022 7:17 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
> <qiming.yang@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian Ehrhardt
> <christian.ehrhardt@canonical.com>
> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
> offload
> 
> On 24/03/2022 09:09, Zhang, Qi Z wrote:
> >
> >
> >> -----Original Message-----
> >> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> >> Sent: Monday, February 28, 2022 3:36 PM
> >> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
> >> <qiming.yang@intel.com>
> >> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> >> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
> >> Subject: [PATCH v4] net/ice: improve performance of RX timestamp
> >> offload
> >>
> >> Previously, each time a burst of packets is received, SW reads HW
> >> register and assembles it and the timestamp from descriptor together
> >> to get the complete 64 bits timestamp.
> >>
> >> This patch optimizes the algorithm. The SW only needs to check the
> >> monotonicity of the low 32bits timestamp to avoid crossing borders.
> >> Each time before SW receives a burst of packets, it should check the
> >> time difference between current time and last update time to avoid
> >> the low 32 bits timestamp cycling twice.
> >>
> >> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
> >
> > Added cc stable
> >
> 
> Hi Qi. The DPDK documentation has guidance about what should be
> backported to LTS [0] and distinguishes between fixes and performance
> improvements. Please try and stick with this when applying patches or let LTS
> maintainers know if there is a debatable case.

Thanks for the comments
Yes, actually this is about a 50% ~ 70% performance improvement, which maybe critical for some performance sensitive use cases.(e.g. network forensics)
So I'd like to defend with below case

An existing feature in LTS is not usable as intended without it.

Thanks
Qi

> 
> thanks,
> Kevin.
> 
> [0]
> http://doc.dpdk.org/guides-21.11/contributing/stable.html#what-changes-
> should-be-backported

> 
> > Acked-by: Qi Zhang <qi.z.zhang@intel.com>
> >
> > Applied to dpdk-next-net-intel.
> >
> > Thanks
> > Qi
> >
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-03-24 11:51           ` Zhang, Qi Z
@ 2022-03-24 12:17             ` Kevin Traynor
  2022-03-24 13:05               ` Zhang, Qi Z
  0 siblings, 1 reply; 12+ messages in thread
From: Kevin Traynor @ 2022-03-24 12:17 UTC (permalink / raw)
  To: Zhang, Qi Z, Wu, Wenjun1, dev, Yang, Qiming
  Cc: Van Haaren, Harry, Su, Simei, Luca Boccassi, Christian Ehrhardt

On 24/03/2022 11:51, Zhang, Qi Z wrote:
> 
> 
>> -----Original Message-----
>> From: Kevin Traynor <ktraynor@redhat.com>
>> Sent: Thursday, March 24, 2022 7:17 PM
>> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
>> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
>> <qiming.yang@intel.com>
>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
>> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian Ehrhardt
>> <christian.ehrhardt@canonical.com>
>> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
>> offload
>>
>> On 24/03/2022 09:09, Zhang, Qi Z wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
>>>> Sent: Monday, February 28, 2022 3:36 PM
>>>> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
>>>> <qiming.yang@intel.com>
>>>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
>>>> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
>>>> Subject: [PATCH v4] net/ice: improve performance of RX timestamp
>>>> offload
>>>>
>>>> Previously, each time a burst of packets is received, SW reads HW
>>>> register and assembles it and the timestamp from descriptor together
>>>> to get the complete 64 bits timestamp.
>>>>
>>>> This patch optimizes the algorithm. The SW only needs to check the
>>>> monotonicity of the low 32bits timestamp to avoid crossing borders.
>>>> Each time before SW receives a burst of packets, it should check the
>>>> time difference between current time and last update time to avoid
>>>> the low 32 bits timestamp cycling twice.
>>>>
>>>> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
>>>
>>> Added cc stable
>>>
>>
>> Hi Qi. The DPDK documentation has guidance about what should be
>> backported to LTS [0] and distinguishes between fixes and performance
>> improvements. Please try and stick with this when applying patches or let LTS
>> maintainers know if there is a debatable case.
> 
> Thanks for the comments
> Yes, actually this is about a 50% ~ 70% performance improvement, which maybe critical for some performance sensitive use cases.(e.g. network forensics)
> So I'd like to defend with below case
> 
> An existing feature in LTS is not usable as intended without it.
> 

If that is the case, then I think the commitlog should be re-written. It 
just talks about the code changes, there's nothing about an impact to an 
existing use case that was unusuable and is now fixed.

> Thanks
> Qi
> 
>>
>> thanks,
>> Kevin.
>>
>> [0]
>> http://doc.dpdk.org/guides-21.11/contributing/stable.html#what-changes-
>> should-be-backported
> 
>>
>>> Acked-by: Qi Zhang <qi.z.zhang@intel.com>
>>>
>>> Applied to dpdk-next-net-intel.
>>>
>>> Thanks
>>> Qi
>>>
>>
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-03-24 12:17             ` Kevin Traynor
@ 2022-03-24 13:05               ` Zhang, Qi Z
  2022-03-24 13:44                 ` Kevin Traynor
  0 siblings, 1 reply; 12+ messages in thread
From: Zhang, Qi Z @ 2022-03-24 13:05 UTC (permalink / raw)
  To: Kevin Traynor, Wu, Wenjun1, dev, Yang, Qiming
  Cc: Van Haaren, Harry, Su, Simei, Luca Boccassi, Christian Ehrhardt



> -----Original Message-----
> From: Kevin Traynor <ktraynor@redhat.com>
> Sent: Thursday, March 24, 2022 8:18 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
> <qiming.yang@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian Ehrhardt
> <christian.ehrhardt@canonical.com>
> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
> offload
> 
> On 24/03/2022 11:51, Zhang, Qi Z wrote:
> >
> >
> >> -----Original Message-----
> >> From: Kevin Traynor <ktraynor@redhat.com>
> >> Sent: Thursday, March 24, 2022 7:17 PM
> >> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
> >> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
> >> <qiming.yang@intel.com>
> >> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> >> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian
> >> Ehrhardt <christian.ehrhardt@canonical.com>
> >> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
> >> offload
> >>
> >> On 24/03/2022 09:09, Zhang, Qi Z wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> >>>> Sent: Monday, February 28, 2022 3:36 PM
> >>>> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
> >>>> <qiming.yang@intel.com>
> >>>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> >>>> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
> >>>> Subject: [PATCH v4] net/ice: improve performance of RX timestamp
> >>>> offload
> >>>>
> >>>> Previously, each time a burst of packets is received, SW reads HW
> >>>> register and assembles it and the timestamp from descriptor
> >>>> together to get the complete 64 bits timestamp.
> >>>>
> >>>> This patch optimizes the algorithm. The SW only needs to check the
> >>>> monotonicity of the low 32bits timestamp to avoid crossing borders.
> >>>> Each time before SW receives a burst of packets, it should check
> >>>> the time difference between current time and last update time to
> >>>> avoid the low 32 bits timestamp cycling twice.
> >>>>
> >>>> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
> >>>
> >>> Added cc stable
> >>>
> >>
> >> Hi Qi. The DPDK documentation has guidance about what should be
> >> backported to LTS [0] and distinguishes between fixes and performance
> >> improvements. Please try and stick with this when applying patches or
> >> let LTS maintainers know if there is a debatable case.
> >
> > Thanks for the comments
> > Yes, actually this is about a 50% ~ 70% performance improvement, which
> > maybe critical for some performance sensitive use cases.(e.g. network
> > forensics) So I'd like to defend with below case
> >
> > An existing feature in LTS is not usable as intended without it.
> >
> 
> If that is the case, then I think the commitlog should be re-written. It just
> talks about the code changes, there's nothing about an impact to an existing
> use case that was unusuable and is now fixed.

OK, I updated with below commit log in dpdk-next-net-intel.
Kevin , Wenjun, let me know if anything I missed.

    Previously, each time a burst of packets is received, SW reads HW
    register and assembles it and the timestamp from descriptor together to
    get the complete 64 bits timestamp.

    This patch optimizes the algorithm. The SW only needs to check the
    monotonicity of the low 32bits timestamp to avoid crossing borders.
    Each time before SW receives a burst of packets, it should check the
    time difference between current time and last update time to avoid
    the low 32 bits timestamp cycling twice.

    The patch proved a 50% ~ 70% single core performance improvement on a
    main stream Xeon server, and it is necessary to be backport to LTS release,
    as this fix the performance gap for some use cases.

Thanks
Qi

> 
> > Thanks
> > Qi
> >
> >>
> >> thanks,
> >> Kevin.
> >>
> >> [0]
> >> http://doc.dpdk.org/guides-21.11/contributing/stable.html#what-change
> >> s-
> >> should-be-backported
> >
> >>
> >>> Acked-by: Qi Zhang <qi.z.zhang@intel.com>
> >>>
> >>> Applied to dpdk-next-net-intel.
> >>>
> >>> Thanks
> >>> Qi
> >>>
> >>
> >


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-03-24 13:05               ` Zhang, Qi Z
@ 2022-03-24 13:44                 ` Kevin Traynor
  2022-03-24 13:57                   ` Zhang, Qi Z
  0 siblings, 1 reply; 12+ messages in thread
From: Kevin Traynor @ 2022-03-24 13:44 UTC (permalink / raw)
  To: Zhang, Qi Z, Wu, Wenjun1, dev, Yang, Qiming
  Cc: Van Haaren, Harry, Su, Simei, Luca Boccassi, Christian Ehrhardt

On 24/03/2022 13:05, Zhang, Qi Z wrote:
> 
> 
>> -----Original Message-----
>> From: Kevin Traynor <ktraynor@redhat.com>
>> Sent: Thursday, March 24, 2022 8:18 PM
>> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
>> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
>> <qiming.yang@intel.com>
>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
>> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian Ehrhardt
>> <christian.ehrhardt@canonical.com>
>> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
>> offload
>>
>> On 24/03/2022 11:51, Zhang, Qi Z wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Kevin Traynor <ktraynor@redhat.com>
>>>> Sent: Thursday, March 24, 2022 7:17 PM
>>>> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
>>>> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
>>>> <qiming.yang@intel.com>
>>>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
>>>> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian
>>>> Ehrhardt <christian.ehrhardt@canonical.com>
>>>> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
>>>> offload
>>>>
>>>> On 24/03/2022 09:09, Zhang, Qi Z wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
>>>>>> Sent: Monday, February 28, 2022 3:36 PM
>>>>>> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang, Qiming
>>>>>> <qiming.yang@intel.com>
>>>>>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
>>>>>> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
>>>>>> Subject: [PATCH v4] net/ice: improve performance of RX timestamp
>>>>>> offload
>>>>>>
>>>>>> Previously, each time a burst of packets is received, SW reads HW
>>>>>> register and assembles it and the timestamp from descriptor
>>>>>> together to get the complete 64 bits timestamp.
>>>>>>
>>>>>> This patch optimizes the algorithm. The SW only needs to check the
>>>>>> monotonicity of the low 32bits timestamp to avoid crossing borders.
>>>>>> Each time before SW receives a burst of packets, it should check
>>>>>> the time difference between current time and last update time to
>>>>>> avoid the low 32 bits timestamp cycling twice.
>>>>>>
>>>>>> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
>>>>>
>>>>> Added cc stable
>>>>>
>>>>
>>>> Hi Qi. The DPDK documentation has guidance about what should be
>>>> backported to LTS [0] and distinguishes between fixes and performance
>>>> improvements. Please try and stick with this when applying patches or
>>>> let LTS maintainers know if there is a debatable case.
>>>
>>> Thanks for the comments
>>> Yes, actually this is about a 50% ~ 70% performance improvement, which
>>> maybe critical for some performance sensitive use cases.(e.g. network
>>> forensics) So I'd like to defend with below case
>>>
>>> An existing feature in LTS is not usable as intended without it.
>>>
>>
>> If that is the case, then I think the commitlog should be re-written. It just
>> talks about the code changes, there's nothing about an impact to an existing
>> use case that was unusuable and is now fixed.
> 
> OK, I updated with below commit log in dpdk-next-net-intel.
> Kevin , Wenjun, let me know if anything I missed.
> 
>      Previously, each time a burst of packets is received, SW reads HW
>      register and assembles it and the timestamp from descriptor together to
>      get the complete 64 bits timestamp.
> 
>      This patch optimizes the algorithm. The SW only needs to check the
>      monotonicity of the low 32bits timestamp to avoid crossing borders.
>      Each time before SW receives a burst of packets, it should check the
>      time difference between current time and last update time to avoid
>      the low 32 bits timestamp cycling twice.
> 
>      The patch proved a 50% ~ 70% single core performance improvement on a
>      main stream Xeon server, and it is necessary to be backport to LTS release,
>      as this fix the performance gap for some use cases.
> 

The comment about backport to LTS can be removed. That is an operational 
comment, so not really relevant to a future reader trying to understand 
the commit.

thanks,
Kevin.

> Thanks
> Qi
> 
>>
>>> Thanks
>>> Qi
>>>
>>>>
>>>> thanks,
>>>> Kevin.
>>>>
>>>> [0]
>>>> http://doc.dpdk.org/guides-21.11/contributing/stable.html#what-change
>>>> s-
>>>> should-be-backported
>>>
>>>>
>>>>> Acked-by: Qi Zhang <qi.z.zhang@intel.com>
>>>>>
>>>>> Applied to dpdk-next-net-intel.
>>>>>
>>>>> Thanks
>>>>> Qi
>>>>>
>>>>
>>>
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v4] net/ice: improve performance of RX timestamp offload
  2022-03-24 13:44                 ` Kevin Traynor
@ 2022-03-24 13:57                   ` Zhang, Qi Z
  0 siblings, 0 replies; 12+ messages in thread
From: Zhang, Qi Z @ 2022-03-24 13:57 UTC (permalink / raw)
  To: Kevin Traynor, Wu, Wenjun1, dev, Yang, Qiming
  Cc: Van Haaren, Harry, Su, Simei, Luca Boccassi, Christian Ehrhardt



> -----Original Message-----
> From: Kevin Traynor <ktraynor@redhat.com>
> Sent: Thursday, March 24, 2022 9:45 PM
> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
> <qiming.yang@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian Ehrhardt
> <christian.ehrhardt@canonical.com>
> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
> offload
> 
> On 24/03/2022 13:05, Zhang, Qi Z wrote:
> >
> >
> >> -----Original Message-----
> >> From: Kevin Traynor <ktraynor@redhat.com>
> >> Sent: Thursday, March 24, 2022 8:18 PM
> >> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
> >> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
> >> <qiming.yang@intel.com>
> >> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> >> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian
> >> Ehrhardt <christian.ehrhardt@canonical.com>
> >> Subject: Re: [PATCH v4] net/ice: improve performance of RX timestamp
> >> offload
> >>
> >> On 24/03/2022 11:51, Zhang, Qi Z wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Kevin Traynor <ktraynor@redhat.com>
> >>>> Sent: Thursday, March 24, 2022 7:17 PM
> >>>> To: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Wenjun1
> >>>> <wenjun1.wu@intel.com>; dev@dpdk.org; Yang, Qiming
> >>>> <qiming.yang@intel.com>
> >>>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> >>>> <simei.su@intel.com>; Luca Boccassi <bluca@debian.org>; Christian
> >>>> Ehrhardt <christian.ehrhardt@canonical.com>
> >>>> Subject: Re: [PATCH v4] net/ice: improve performance of RX
> >>>> timestamp offload
> >>>>
> >>>> On 24/03/2022 09:09, Zhang, Qi Z wrote:
> >>>>>
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Wu, Wenjun1 <wenjun1.wu@intel.com>
> >>>>>> Sent: Monday, February 28, 2022 3:36 PM
> >>>>>> To: dev@dpdk.org; Zhang, Qi Z <qi.z.zhang@intel.com>; Yang,
> >>>>>> Qiming <qiming.yang@intel.com>
> >>>>>> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Su, Simei
> >>>>>> <simei.su@intel.com>; Wu, Wenjun1 <wenjun1.wu@intel.com>
> >>>>>> Subject: [PATCH v4] net/ice: improve performance of RX timestamp
> >>>>>> offload
> >>>>>>
> >>>>>> Previously, each time a burst of packets is received, SW reads HW
> >>>>>> register and assembles it and the timestamp from descriptor
> >>>>>> together to get the complete 64 bits timestamp.
> >>>>>>
> >>>>>> This patch optimizes the algorithm. The SW only needs to check
> >>>>>> the monotonicity of the low 32bits timestamp to avoid crossing
> borders.
> >>>>>> Each time before SW receives a burst of packets, it should check
> >>>>>> the time difference between current time and last update time to
> >>>>>> avoid the low 32 bits timestamp cycling twice.
> >>>>>>
> >>>>>> Signed-off-by: Wenjun Wu <wenjun1.wu@intel.com>
> >>>>>
> >>>>> Added cc stable
> >>>>>
> >>>>
> >>>> Hi Qi. The DPDK documentation has guidance about what should be
> >>>> backported to LTS [0] and distinguishes between fixes and
> >>>> performance improvements. Please try and stick with this when
> >>>> applying patches or let LTS maintainers know if there is a debatable
> case.
> >>>
> >>> Thanks for the comments
> >>> Yes, actually this is about a 50% ~ 70% performance improvement,
> >>> which maybe critical for some performance sensitive use cases.(e.g.
> >>> network
> >>> forensics) So I'd like to defend with below case
> >>>
> >>> An existing feature in LTS is not usable as intended without it.
> >>>
> >>
> >> If that is the case, then I think the commitlog should be re-written.
> >> It just talks about the code changes, there's nothing about an impact
> >> to an existing use case that was unusuable and is now fixed.
> >
> > OK, I updated with below commit log in dpdk-next-net-intel.
> > Kevin , Wenjun, let me know if anything I missed.
> >
> >      Previously, each time a burst of packets is received, SW reads HW
> >      register and assembles it and the timestamp from descriptor together to
> >      get the complete 64 bits timestamp.
> >
> >      This patch optimizes the algorithm. The SW only needs to check the
> >      monotonicity of the low 32bits timestamp to avoid crossing borders.
> >      Each time before SW receives a burst of packets, it should check the
> >      time difference between current time and last update time to avoid
> >      the low 32 bits timestamp cycling twice.
> >
> >      The patch proved a 50% ~ 70% single core performance improvement on
> a
> >      main stream Xeon server, and it is necessary to be backport to LTS
> release,
> >      as this fix the performance gap for some use cases.
> >
> 
> The comment about backport to LTS can be removed. That is an operational
> comment, so not really relevant to a future reader trying to understand the
> commit.

Yes, removed the unnecessary comment.

The patch proved a 50% ~ 70% single core performance improvement on a
main stream Xeon server, this fix the performance gap for some use cases.

> 
> thanks,
> Kevin.
> 
> > Thanks
> > Qi
> >
> >>
> >>> Thanks
> >>> Qi
> >>>
> >>>>
> >>>> thanks,
> >>>> Kevin.
> >>>>
> >>>> [0]
> >>>> http://doc.dpdk.org/guides-21.11/contributing/stable.html#what-chan
> >>>> ge
> >>>> s-
> >>>> should-be-backported
> >>>
> >>>>
> >>>>> Acked-by: Qi Zhang <qi.z.zhang@intel.com>
> >>>>>
> >>>>> Applied to dpdk-next-net-intel.
> >>>>>
> >>>>> Thanks
> >>>>> Qi
> >>>>>
> >>>>
> >>>
> >


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-03-24 13:57 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-22  5:16 [PATCH v1] net/ice: improve performance of RX timestamp offload Wenjun Wu
2022-02-22  5:50 ` [PATCH v2] " Wenjun Wu
2022-02-22  6:26   ` [PATCH v3] " Wenjun Wu
2022-02-28  7:36     ` [PATCH v4] " Wenjun Wu
2022-03-01 11:07       ` Zhang, Qi Z
2022-03-24  9:09       ` Zhang, Qi Z
2022-03-24 11:16         ` Kevin Traynor
2022-03-24 11:51           ` Zhang, Qi Z
2022-03-24 12:17             ` Kevin Traynor
2022-03-24 13:05               ` Zhang, Qi Z
2022-03-24 13:44                 ` Kevin Traynor
2022-03-24 13:57                   ` Zhang, Qi Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).