DPDK patches and discussions
 help / color / mirror / Atom feed
From: Stephen Hemminger <stephen@networkplumber.org>
To: dev@dpdk.org
Cc: Stephen Hemminger <stephen@networkplumber.org>,
	Reshma Pattan <reshma.pattan@intel.com>,
	Quentin Armitage <quentin@armitage.org.uk>
Subject: [PATCH 4/4] pcapng: move timestamp calculation into pdump
Date: Wed, 20 Sep 2023 21:23:49 -0700	[thread overview]
Message-ID: <20230921042349.104150-5-stephen@networkplumber.org> (raw)
In-Reply-To: <20230921042349.104150-1-stephen@networkplumber.org>

The computation of timestamp is more easily done in pdump
than pcapng. The initialization is easier and makes the pcapng
library have no global state.

It also makes it easier to add HW timestamp support later.

Simplify the computation of nanoseconds from TSC to a two
step process which avoids numeric overflow issues. The previous
code was not thread safe as well.

Fixes: c882eb544842 ("pcapng: fix timestamp wrapping in output files")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/pcapng/rte_pcapng.c | 71 ++---------------------------------------
 lib/pcapng/rte_pcapng.h |  2 +-
 lib/pdump/rte_pdump.c   | 56 +++++++++++++++++++++++++++++---
 3 files changed, 55 insertions(+), 74 deletions(-)

diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index ddce7bc87141..f6b3bd0ca718 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -25,7 +25,6 @@
 #include <rte_mbuf.h>
 #include <rte_os_shim.h>
 #include <rte_pcapng.h>
-#include <rte_reciprocal.h>
 #include <rte_time.h>
 
 #include "pcapng_proto.h"
@@ -43,15 +42,6 @@ struct rte_pcapng {
 	uint32_t port_index[RTE_MAX_ETHPORTS];
 };
 
-/* For converting TSC cycles to PCAPNG ns format */
-static struct pcapng_time {
-	uint64_t ns;
-	uint64_t cycles;
-	uint64_t tsc_hz;
-	struct rte_reciprocal_u64 tsc_hz_inverse;
-} pcapng_time;
-
-
 #ifdef RTE_EXEC_ENV_WINDOWS
 /*
  * Windows does not have writev() call.
@@ -102,58 +92,6 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
 #define if_indextoname(ifindex, ifname) NULL
 #endif
 
-static inline void
-pcapng_init(void)
-{
-	struct timespec ts;
-
-	pcapng_time.cycles = rte_get_tsc_cycles();
-	clock_gettime(CLOCK_REALTIME, &ts);
-	pcapng_time.cycles = (pcapng_time.cycles + rte_get_tsc_cycles()) / 2;
-	pcapng_time.ns = rte_timespec_to_ns(&ts);
-
-	pcapng_time.tsc_hz = rte_get_tsc_hz();
-	pcapng_time.tsc_hz_inverse = rte_reciprocal_value_u64(pcapng_time.tsc_hz);
-}
-
-/* PCAPNG timestamps are in nanoseconds */
-static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
-{
-	uint64_t delta, secs;
-
-	if (!pcapng_time.tsc_hz)
-		pcapng_init();
-
-	/* In essence the calculation is:
-	 *   delta = (cycles - pcapng_time.cycles) * NSEC_PRE_SEC / rte_get_tsc_hz()
-	 * but this overflows within 4 to 8 seconds depending on TSC frequency.
-	 * Instead, if delta >= pcapng_time.tsc_hz:
-	 *   Increase pcapng_time.ns and pcapng_time.cycles by the number of
-	 *   whole seconds in delta and reduce delta accordingly.
-	 * delta will therefore always lie in the interval [0, pcapng_time.tsc_hz),
-	 * which will not overflow when multiplied by NSEC_PER_SEC provided the
-	 * TSC frequency < approx 18.4GHz.
-	 *
-	 * Currently all TSCs operate below 5GHz.
-	 */
-	delta = cycles - pcapng_time.cycles;
-	if (unlikely(delta >= pcapng_time.tsc_hz)) {
-		if (likely(delta < pcapng_time.tsc_hz * 2)) {
-			delta -= pcapng_time.tsc_hz;
-			pcapng_time.cycles += pcapng_time.tsc_hz;
-			pcapng_time.ns += NSEC_PER_SEC;
-		} else {
-			secs = rte_reciprocal_divide_u64(delta, &pcapng_time.tsc_hz_inverse);
-			delta -= secs * pcapng_time.tsc_hz;
-			pcapng_time.cycles += secs * pcapng_time.tsc_hz;
-			pcapng_time.ns += secs * NSEC_PER_SEC;
-		}
-	}
-
-	return pcapng_time.ns + rte_reciprocal_divide_u64(delta * NSEC_PER_SEC,
-							  &pcapng_time.tsc_hz_inverse);
-}
-
 /* length of option including padding */
 static uint16_t pcapng_optlen(uint16_t len)
 {
@@ -518,7 +456,7 @@ struct rte_mbuf *
 rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 		const struct rte_mbuf *md,
 		struct rte_mempool *mp,
-		uint32_t length, uint64_t cycles,
+		uint32_t length, uint64_t timestamp,
 		enum rte_pcapng_direction direction,
 		const char *comment)
 {
@@ -527,14 +465,11 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 	struct pcapng_option *opt;
 	uint16_t optlen;
 	struct rte_mbuf *mc;
-	uint64_t ns;
 	bool rss_hash;
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
 #endif
-	ns = pcapng_tsc_to_ns(cycles);
-
 	orig_len = rte_pktmbuf_pkt_len(md);
 
 	/* Take snapshot of the data */
@@ -639,8 +574,8 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 	/* Interface index is filled in later during write */
 	mc->port = port_id;
 
-	epb->timestamp_hi = ns >> 32;
-	epb->timestamp_lo = (uint32_t)ns;
+	epb->timestamp_hi = timestamp >> 32;
+	epb->timestamp_lo = (uint32_t)timestamp;
 	epb->capture_length = data_len;
 	epb->original_length = orig_len;
 
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
index 1225ed5536ff..b9a9ee23ad1d 100644
--- a/lib/pcapng/rte_pcapng.h
+++ b/lib/pcapng/rte_pcapng.h
@@ -122,7 +122,7 @@ enum rte_pcapng_direction {
  *   The upper limit on bytes to copy.  Passing UINT32_MAX
  *   means all data (after offset).
  * @param timestamp
- *   The timestamp in TSC cycles.
+ *   The timestamp in nanoseconds since 1/1/1970.
  * @param direction
  *   The direction of the packer: receive, transmit or unknown.
  * @param comment
diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c
index a70085bd0211..384abf5e27ad 100644
--- a/lib/pdump/rte_pdump.c
+++ b/lib/pdump/rte_pdump.c
@@ -10,7 +10,9 @@
 #include <rte_log.h>
 #include <rte_memzone.h>
 #include <rte_errno.h>
+#include <rte_reciprocal.h>
 #include <rte_string_fns.h>
+#include <rte_time.h>
 #include <rte_pcapng.h>
 
 #include "rte_pdump.h"
@@ -78,6 +80,33 @@ static struct {
 	const struct rte_memzone *mz;
 } *pdump_stats;
 
+/* Time conversion values */
+static struct {
+	uint64_t offset_ns;	/* ns since 1/1/1970 when initialized */
+	uint64_t tsc_base;	/* TSC when initialized */
+	uint64_t tsc_hz;	/* copy of rte_tsc_hz() */
+	struct rte_reciprocal_u64 tsc_hz_inverse; /* inverse of tsc_hz */
+} pdump_time;
+
+/* Convert from TSC (CPU cycles) to nanoseconds */
+static uint64_t pdump_timestamp(void)
+{
+	uint64_t delta, secs, ns;
+
+	delta = rte_get_tsc_cycles() - pdump_time.tsc_base;
+
+	/* Avoid numeric wraparound by computing seconds first */
+	secs = rte_reciprocal_divide_u64(delta, &pdump_time.tsc_hz_inverse);
+
+	/* Remove the seconds portion */
+	delta -= secs * pdump_time.tsc_hz;
+	ns = rte_reciprocal_divide_u64(delta * NS_PER_S,
+				       &pdump_time.tsc_hz_inverse);
+
+	return secs * NS_PER_S + ns + pdump_time.offset_ns;
+}
+
+
 /* Create a clone of mbuf to be placed into ring. */
 static void
 pdump_copy(uint16_t port_id, uint16_t queue,
@@ -90,7 +119,7 @@ pdump_copy(uint16_t port_id, uint16_t queue,
 	int ring_enq;
 	uint16_t d_pkts = 0;
 	struct rte_mbuf *dup_bufs[nb_pkts];
-	uint64_t ts;
+	uint64_t timestamp = 0;
 	struct rte_ring *ring;
 	struct rte_mempool *mp;
 	struct rte_mbuf *p;
@@ -99,7 +128,6 @@ pdump_copy(uint16_t port_id, uint16_t queue,
 	if (cbs->filter)
 		rte_bpf_exec_burst(cbs->filter, (void **)pkts, rcs, nb_pkts);
 
-	ts = rte_get_tsc_cycles();
 	ring = cbs->ring;
 	mp = cbs->mp;
 	for (i = 0; i < nb_pkts; i++) {
@@ -119,12 +147,17 @@ pdump_copy(uint16_t port_id, uint16_t queue,
 		 * If using pcapng then want to wrap packets
 		 * otherwise a simple copy.
 		 */
-		if (cbs->ver == V2)
+		if (cbs->ver == V2) {
+			/* calculate timestamp on first packet */
+			if (timestamp == 0)
+				timestamp = pdump_timestamp();
+
 			p = rte_pcapng_copy(port_id, queue,
 					    pkts[i], mp, cbs->snaplen,
-					    ts, direction, NULL);
-		else
+					    timestamp, direction, NULL);
+		} else {
 			p = rte_pktmbuf_copy(pkts[i], mp, 0, cbs->snaplen);
+		}
 
 		if (unlikely(p == NULL))
 			__atomic_fetch_add(&stats->nombuf, 1, __ATOMIC_RELAXED);
@@ -421,8 +454,21 @@ int
 rte_pdump_init(void)
 {
 	const struct rte_memzone *mz;
+	struct timespec ts;
+	uint64_t cycles;
 	int ret;
 
+	/* Compute time base offsets */
+	cycles = rte_get_tsc_cycles();
+	clock_gettime(CLOCK_REALTIME, &ts);
+
+	/* put initial TSC value in middle of clock_gettime() call */
+	pdump_time.tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+	pdump_time.offset_ns = rte_timespec_to_ns(&ts);
+
+	pdump_time.tsc_hz = rte_get_tsc_hz();
+	pdump_time.tsc_hz_inverse = rte_reciprocal_value_u64(pdump_time.tsc_hz);
+
 	mz = rte_memzone_reserve(MZ_RTE_PDUMP_STATS, sizeof(*pdump_stats),
 				 rte_socket_id(), 0);
 	if (mz == NULL) {
-- 
2.39.2


  parent reply	other threads:[~2023-09-21  4:24 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-21  4:23 [PATCH 0/4] pcapng fixes Stephen Hemminger
2023-09-21  4:23 ` [PATCH 1/4] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-09-21  4:23 ` [PATCH 2/4] dumpcap: allow multiple invocations Stephen Hemminger
2023-09-21  6:22   ` Morten Brørup
2023-09-21  7:10     ` Isaac Boukris
2023-11-07  2:34     ` Stephen Hemminger
2023-09-21  4:23 ` [PATCH 3/4] pcapng: change timestamp argument to write_stats Stephen Hemminger
2023-09-21  4:23 ` Stephen Hemminger [this message]
2023-10-02  8:15   ` [PATCH 4/4] pcapng: move timestamp calculation into pdump David Marchand
2023-10-04 17:13     ` Stephen Hemminger
2023-10-06  9:10       ` David Marchand
2023-10-06 14:59         ` Kevin Traynor
2023-10-05 23:06 ` [PATCH v2 0/4] dumpcap and pcapng fixes Stephen Hemminger
2023-10-05 23:06   ` [PATCH v2 1/4] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-10-05 23:06   ` [PATCH v2 2/4] dumpcap: allow multiple invocations Stephen Hemminger
2023-10-05 23:06   ` [PATCH v2 3/4] pcapng: modify timestamp calculation Stephen Hemminger
2023-10-05 23:06   ` [PATCH v2 4/4] test: cleanups to pcapng test Stephen Hemminger
2023-11-08 18:35 ` [PATCH v3 0/5] dumpcap and pcapng fixes Stephen Hemminger
2023-11-08 18:35   ` [PATCH v3 1/5] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-11-09  7:34     ` Morten Brørup
2023-11-08 18:35   ` [PATCH v3 2/5] dumpcap: allow multiple invocations Stephen Hemminger
2023-11-09  7:50     ` Morten Brørup
2023-11-09 15:40       ` Stephen Hemminger
2023-11-09 16:00         ` Morten Brørup
2023-11-09 17:16       ` Stephen Hemminger
2023-11-09 18:22         ` Morten Brørup
2023-11-08 18:35   ` [PATCH v3 3/5] pcapng: modify timestamp calculation Stephen Hemminger
2023-11-09  7:57     ` Morten Brørup
2023-11-08 18:35   ` [PATCH v3 4/5] pcapng: avoid using alloca() Stephen Hemminger
2023-11-09  8:21     ` Morten Brørup
2023-11-09 15:44       ` Stephen Hemminger
2023-11-09 16:25         ` Morten Brørup
2023-11-08 18:35   ` [PATCH v3 5/5] test: cleanups to pcapng test Stephen Hemminger
2023-11-09 17:34 ` [PATCH v4 0/5] dumpcap and pcapng fixes Stephen Hemminger
2023-11-09 17:34   ` [PATCH v4 1/5] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-11-09 17:34   ` [PATCH v4 2/5] dumpcap: allow multiple invocations Stephen Hemminger
2023-11-09 18:30     ` Morten Brørup
2023-11-09 17:34   ` [PATCH v4 3/5] pcapng: modify timestamp calculation Stephen Hemminger
2023-11-09 17:34   ` [PATCH v4 4/5] pcapng: avoid using alloca() Stephen Hemminger
2023-11-09 17:34   ` [PATCH v4 5/5] test: cleanups to pcapng test Stephen Hemminger
2023-11-09 19:45 ` [PATCH v5 0/5] dumpcap and pcapng fixes Stephen Hemminger
2023-11-09 19:45   ` [PATCH v5 1/5] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-11-09 19:45   ` [PATCH v5 2/5] dumpcap: allow multiple invocations Stephen Hemminger
2023-11-09 20:09     ` Morten Brørup
2023-11-09 19:45   ` [PATCH v5 3/5] pcapng: modify timestamp calculation Stephen Hemminger
2023-11-12 14:22     ` Thomas Monjalon
2023-11-09 19:45   ` [PATCH v5 4/5] pcapng: avoid using alloca() Stephen Hemminger
2023-11-09 19:45   ` [PATCH v5 5/5] test: cleanups to pcapng test Stephen Hemminger
2023-11-13 16:15 ` [PATCH v6 0/5] dumpcap and pcapng fixes Stephen Hemminger
2023-11-13 16:15   ` [PATCH v6 1/5] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-11-13 16:15   ` [PATCH v6 2/5] dumpcap: allow multiple invocations Stephen Hemminger
2023-11-13 16:15   ` [PATCH v6 3/5] pcapng: modify timestamp calculation Stephen Hemminger
2023-11-13 16:15   ` [PATCH v6 4/5] pcapng: avoid using alloca() Stephen Hemminger
2023-11-13 16:15   ` [PATCH v6 5/5] test: cleanups to pcapng test Stephen Hemminger
2023-11-17 16:35 ` [PATCH v7 0/5] dumpcap and pcapng fixes Stephen Hemminger
2023-11-17 16:35   ` [PATCH v7 1/5] pdump: fix setting rte_errno on mp error Stephen Hemminger
2023-11-17 16:35   ` [PATCH v7 2/5] dumpcap: allow multiple invocations Stephen Hemminger
2023-11-17 16:35   ` [PATCH v7 3/5] pcapng: modify timestamp calculation Stephen Hemminger
2023-11-17 16:35   ` [PATCH v7 4/5] pcapng: avoid using alloca() Stephen Hemminger
2023-11-17 16:35   ` [PATCH v7 5/5] test: cleanups to pcapng test Stephen Hemminger
2023-11-22 22:42   ` [PATCH v7 0/5] dumpcap and pcapng fixes Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230921042349.104150-5-stephen@networkplumber.org \
    --to=stephen@networkplumber.org \
    --cc=dev@dpdk.org \
    --cc=quentin@armitage.org.uk \
    --cc=reshma.pattan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).