DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/ice: fix performance issue for Rx timestamp
@ 2021-10-28  7:58 Simei Su
  2021-10-28  8:49 ` Zhang, Qi Z
  2021-10-28 14:42 ` [dpdk-dev] [PATCH v2] " Simei Su
  0 siblings, 2 replies; 7+ messages in thread
From: Simei Su @ 2021-10-28  7:58 UTC (permalink / raw)
  To: qi.z.zhang; +Cc: dev, harry.van.haaren, wenjun1.wu, Simei Su

In Rx data path, it reads hardware registers per packet, resulting in
big performance drop. This patch improves performance from two aspects:
(1) replace per packet hardware register read by per burst.
(2) reduce hardware register read time from 3 to 2 when the low value of
    time is not close to overflow.

Meanwhile, this patch refines "ice_timesync_read_rx_timestamp" and
"ice_timesync_read_tx_timestamp" API in which "ice_tstamp_convert_32b_64b"
is also used.

Fixes: 953e74e6b73a ("net/ice: enable Rx timestamp on flex descriptor")
Fixes: 646dcbe6c701 ("net/ice: support IEEE 1588 PTP")

Suggested-by: Harry van Haaren <harry.van.haaren@intel.com>
Signed-off-by: Simei Su <simei.su@intel.com>
---
 drivers/net/ice/ice_ethdev.c |  4 +--
 drivers/net/ice/ice_ethdev.h |  1 +
 drivers/net/ice/ice_rxtx.c   | 59 ++++++++++++++++++++++++++------------------
 drivers/net/ice/ice_rxtx.h   | 34 +++++++++++++++----------
 4 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index ef6ee1c..13a7a97 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -5560,7 +5560,7 @@ ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	rxq = dev->data->rx_queues[flags];
 
 	ts_high = rxq->time_high;
-	ts_ns = ice_tstamp_convert_32b_64b(hw, ts_high);
+	ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, ts_high);
 	ns = rte_timecounter_update(&ad->rx_tstamp_tc, ts_ns);
 	*timestamp = rte_ns_to_timespec(ns);
 
@@ -5587,7 +5587,7 @@ ice_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
 		return -1;
 	}
 
-	ts_ns = ice_tstamp_convert_32b_64b(hw, (tstamp >> 8) & mask);
+	ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, (tstamp >> 8) & mask);
 	ns = rte_timecounter_update(&ad->tx_tstamp_tc, ts_ns);
 	*timestamp = rte_ns_to_timespec(ns);
 
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 599e002..0e42c4c 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -509,6 +509,7 @@ struct ice_adapter {
 	struct rte_timecounter rx_tstamp_tc;
 	struct rte_timecounter tx_tstamp_tc;
 	bool ptp_ena;
+	uint64_t time_hw;
 #ifdef RTE_ARCH_X86
 	bool rx_use_avx2;
 	bool rx_use_avx512;
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index c3cad2f..2d771ea 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1581,6 +1581,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 	if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
 		return 0;
 
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+
 	/**
 	 * Scan LOOK_AHEAD descriptors at a time to determine which
 	 * descriptors reference packets that are ready to be received.
@@ -1614,15 +1617,15 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
 			ice_rxd_to_vlan_tci(mb, &rxdp[j]);
 			rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-			if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
-				ts_ns = ice_tstamp_convert_32b_64b(hw,
+			if (ice_timestamp_dynflag > 0) {
+				ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
+					rxq->hw_register_set,
 					rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
-				if (ice_timestamp_dynflag > 0) {
-					*RTE_MBUF_DYNFIELD(mb,
-						ice_timestamp_dynfield_offset,
-						rte_mbuf_timestamp_t *) = ts_ns;
-					mb->ol_flags |= ice_timestamp_dynflag;
-				}
+				rxq->hw_register_set = 0;
+				*RTE_MBUF_DYNFIELD(mb,
+					ice_timestamp_dynfield_offset,
+					rte_mbuf_timestamp_t *) = ts_ns;
+				mb->ol_flags |= ice_timestamp_dynflag;
 			}
 
 			if (ad->ptp_ena && ((mb->packet_type &
@@ -1822,6 +1825,10 @@ ice_recv_scattered_pkts(void *rx_queue,
 	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
+
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -1932,15 +1939,15 @@ ice_recv_scattered_pkts(void *rx_queue,
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw,
+		if (ice_timestamp_dynflag > 0) {
+			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
+				rxq->hw_register_set,
 				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			if (ice_timestamp_dynflag > 0) {
-				*RTE_MBUF_DYNFIELD(first_seg,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				first_seg->ol_flags |= ice_timestamp_dynflag;
-			}
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(first_seg,
+				ice_timestamp_dynfield_offset,
+				rte_mbuf_timestamp_t *) = ts_ns;
+			first_seg->ol_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
@@ -2312,6 +2319,10 @@ ice_recv_pkts(void *rx_queue,
 	uint64_t ts_ns;
 	struct ice_adapter *ad = rxq->vsi->adapter;
 #endif
+
+	if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+		rxq->hw_register_set = 1;
+
 	while (nb_rx < nb_pkts) {
 		rxdp = &rx_ring[rx_id];
 		rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
@@ -2363,15 +2374,15 @@ ice_recv_pkts(void *rx_queue,
 		rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
 		pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
 #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
-		if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
-			ts_ns = ice_tstamp_convert_32b_64b(hw,
+		if (ice_timestamp_dynflag > 0) {
+			ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
+				rxq->hw_register_set,
 				rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
-			if (ice_timestamp_dynflag > 0) {
-				*RTE_MBUF_DYNFIELD(rxm,
-					ice_timestamp_dynfield_offset,
-					rte_mbuf_timestamp_t *) = ts_ns;
-				rxm->ol_flags |= ice_timestamp_dynflag;
-			}
+			rxq->hw_register_set = 0;
+			*RTE_MBUF_DYNFIELD(rxm,
+				ice_timestamp_dynfield_offset,
+				rte_mbuf_timestamp_t *) = ts_ns;
+			rxm->ol_flags |= ice_timestamp_dynflag;
 		}
 
 		if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 146dc1f..ef58c0d 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -93,6 +93,7 @@ struct ice_rx_queue {
 	ice_rx_release_mbufs_t rx_rel_mbufs;
 	uint64_t offloads;
 	uint32_t time_high;
+	uint32_t hw_register_set;
 	const struct rte_memzone *mz;
 };
 
@@ -321,29 +322,36 @@ void ice_fdir_rx_parsing_enable(struct ice_adapter *ad, bool on)
 
 /* Helper function to convert a 32b nanoseconds timestamp to 64b. */
 static inline
-uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw, uint32_t in_timestamp)
+uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw, struct ice_adapter *ad,
+				    uint32_t flag, uint32_t in_timestamp)
 {
 	const uint64_t mask = 0xFFFFFFFF;
 	uint32_t hi, lo, lo2, delta;
-	uint64_t time, ns;
+	uint64_t ns;
 
-	lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
-	hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
-	lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
-
-	if (lo2 < lo) {
+	if (flag) {
 		lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
 		hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
-	}
 
-	time = ((uint64_t)hi << 32) | lo;
+		if (lo > UINT32_MAX)
+			lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
+		else
+			lo2 = lo;
+
+		if (lo2 < lo) {
+			lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
+			hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
+		}
+
+		ad->time_hw = ((uint64_t)hi << 32) | lo;
+	}
 
-	delta = (in_timestamp - (uint32_t)(time & mask));
+	delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
 	if (delta > (mask / 2)) {
-		delta = ((uint32_t)(time & mask) - in_timestamp);
-		ns = time - delta;
+		delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
+		ns = ad->time_hw - delta;
 	} else {
-		ns = time + delta;
+		ns = ad->time_hw + delta;
 	}
 
 	return ns;
-- 
2.9.5


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-11-01  1:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-28  7:58 [dpdk-dev] [PATCH] net/ice: fix performance issue for Rx timestamp Simei Su
2021-10-28  8:49 ` Zhang, Qi Z
2021-10-28 14:42 ` [dpdk-dev] [PATCH v2] " Simei Su
2021-10-29  2:24   ` Zhang, Qi Z
2021-10-29 10:15   ` [dpdk-dev] [PATCH v3] " Simei Su
2021-10-29 12:56     ` [dpdk-dev] [PATCH v4] " Simei Su
2021-11-01  1:20       ` Zhang, Qi Z

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).