* [RFC] pcapng: improve performance of timestamping
@ 2025-11-26 5:12 Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
0 siblings, 1 reply; 8+ messages in thread
From: Stephen Hemminger @ 2025-11-26 5:12 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
Avoid doing expensive divide operations when converting
timestamps from cycles (TSC) to pcapng scaled value (ns).
This logic was derived from the math used by Linux kernel
virtual system call with help from AI.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/pcapng/rte_pcapng.c | 61 ++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 7c3c400c71..283962fa2d 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -38,8 +38,14 @@
struct rte_pcapng {
int outfd; /* output file */
unsigned int ports; /* number of interfaces added */
- uint64_t offset_ns; /* ns since 1/1/1970 when initialized */
- uint64_t tsc_base; /* TSC when started */
+
+ struct pcapng_time_conv {
+ uint64_t tsc_base; /* TSC when started */
+ uint64_t ns_base; /* ns since 1/1/1970 when initialized */
+ uint64_t mult; /* scaling factor relative to TSC hz */
+ uint32_t shift; /* shift for scaling (24) */
+ uint64_t mask; /* mask of bits used (56) */
+ } tc;
/* DPDK port id to interface index in file */
uint32_t port_index[RTE_MAX_ETHPORTS];
@@ -95,21 +101,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
#define if_indextoname(ifindex, ifname) NULL
#endif
+/* Initialize time conversion based on logic similar to rte_cyclecounter */
+static void
+pcapng_timestamp_init(struct pcapng_time_conv *tc)
+{
+ struct timespec ts;
+ uint64_t cycles = rte_get_tsc_cycles();
+
+ /* record start time in ns since 1/1/1970 */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Compute baseline TSC which occured during clock_gettime */
+ tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+ tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+
+ /* Set conversion factors for reasonabl prescision with no overflow */
+ uint64_t tsc_hz = rte_get_tsc_hz();
+ tc->shift = 24;
+ tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz;
+ tc->mask = RTE_BIT64(56) - 1;
+}
+
/* Convert from TSC (CPU cycles) to nanoseconds */
static uint64_t
-pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
+pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles)
{
- uint64_t delta, rem, secs, ns;
- const uint64_t hz = rte_get_tsc_hz();
-
- delta = cycles - self->tsc_base;
+ /* Compute TSC delta with mask to avoid wraparound */
+ uint64_t delta = (cycles - tc->tsc_base) & tc->mask;
- /* Avoid numeric wraparound by computing seconds first */
- secs = delta / hz;
- rem = delta % hz;
- ns = (rem * NS_PER_S) / hz;
+ /* Convert TSC delta to nanoseconds (no division) */
+ uint64_t ns_delta = (delta * tc->mult) >> tc->shift;
- return secs * NS_PER_S + ns + self->offset_ns;
+ return tc->ns_base + ns_delta;
}
/* length of option including padding */
@@ -329,7 +352,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
{
struct pcapng_statistics *hdr;
struct pcapng_option *opt;
- uint64_t start_time = self->offset_ns;
+ uint64_t start_time = self->tc.ns_base;
uint64_t sample_time;
uint32_t optlen, len;
uint32_t *buf;
@@ -379,7 +402,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
hdr->block_length = len;
hdr->interface_id = self->port_index[port_id];
- sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
+ sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles());
hdr->timestamp_hi = sample_time >> 32;
hdr->timestamp_lo = (uint32_t)sample_time;
@@ -658,7 +681,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
/* adjust timestamp recorded in packet */
cycles = (uint64_t)epb->timestamp_hi << 32;
cycles += epb->timestamp_lo;
- timestamp = pcapng_timestamp(self, cycles);
+ timestamp = pcapng_timestamp(&self->tc, cycles);
epb->timestamp_hi = timestamp >> 32;
epb->timestamp_lo = (uint32_t)timestamp;
@@ -704,8 +727,6 @@ rte_pcapng_fdopen(int fd,
{
unsigned int i;
rte_pcapng_t *self;
- struct timespec ts;
- uint64_t cycles;
self = malloc(sizeof(*self));
if (!self) {
@@ -716,11 +737,7 @@ rte_pcapng_fdopen(int fd,
self->outfd = fd;
self->ports = 0;
- /* record start time in ns since 1/1/1970 */
- cycles = rte_get_tsc_cycles();
- clock_gettime(CLOCK_REALTIME, &ts);
- self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
- self->offset_ns = rte_timespec_to_ns(&ts);
+ pcapng_timestamp_init(&self->tc);
for (i = 0; i < RTE_MAX_ETHPORTS; i++)
self->port_index[i] = UINT32_MAX;
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 0/6] pcapng: timestamping and comment fixes
2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger
` (5 more replies)
0 siblings, 6 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
Performance improve performance and tests for pcapng.
Stephen Hemminger (6):
pcapng: use alloca instead of fixed buffer
pcapng: add additional mbuf if space required on copy
test: add more tests for comments in pcapng
test: vary size of packets in pcapng test
test: increase gap in pcapng test
pcapng: improve performance of timestamping
app/test/test_pcapng.c | 134 +++++++++++++++++++++++++++++-----------
lib/pcapng/rte_pcapng.c | 101 +++++++++++++++++++-----------
2 files changed, 163 insertions(+), 72 deletions(-)
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy Stephen Hemminger
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, Reshma Pattan
This is an API that accepts strings as options, and user could
potentially ask for very large string as comment.
The dynamic way to fix is to use alloca() to allocate the buffer
used to hold options.
Bugzilla ID: 1820
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/pcapng/rte_pcapng.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 21bc94cea1..3067033e89 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -34,9 +34,6 @@
/* conversion from DPDK speed to PCAPNG */
#define PCAPNG_MBPS_SPEED 1000000ull
-/* upper bound for section, stats and interface blocks (in uint32_t) */
-#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t))
-
/* Format of the capture file handle */
struct rte_pcapng {
int outfd; /* output file */
@@ -145,7 +142,7 @@ pcapng_section_block(rte_pcapng_t *self,
{
struct pcapng_section_header *hdr;
struct pcapng_option *opt;
- uint32_t buf[PCAPNG_BLKSIZ];
+ uint32_t *buf;
uint32_t len;
len = sizeof(*hdr);
@@ -162,7 +159,8 @@ pcapng_section_block(rte_pcapng_t *self,
len += pcapng_optlen(0);
len += sizeof(uint32_t);
- if (len > sizeof(buf))
+ buf = alloca(len);
+ if (buf == NULL)
return -1;
hdr = (struct pcapng_section_header *)buf;
@@ -214,7 +212,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type,
struct pcapng_option *opt;
const uint8_t tsresol = 9; /* nanosecond resolution */
uint32_t len;
- uint32_t buf[PCAPNG_BLKSIZ];
+ uint32_t *buf;
char ifname_buf[IF_NAMESIZE];
char ifhw[256];
uint64_t speed = 0;
@@ -268,7 +266,8 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type,
len += pcapng_optlen(0);
len += sizeof(uint32_t);
- if (len > sizeof(buf))
+ buf = alloca(len);
+ if (buf == NULL)
return -1;
hdr = (struct pcapng_interface_block *)buf;
@@ -333,7 +332,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
uint64_t start_time = self->offset_ns;
uint64_t sample_time;
uint32_t optlen, len;
- uint32_t buf[PCAPNG_BLKSIZ];
+ uint32_t *buf;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -353,7 +352,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
optlen += pcapng_optlen(0);
len = sizeof(*hdr) + optlen + sizeof(uint32_t);
- if (len > sizeof(buf))
+
+ buf = alloca(len);
+ if (buf == NULL)
return -1;
hdr = (struct pcapng_statistics *)buf;
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 3/6] test: add more tests for comments in pcapng Stephen Hemminger
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, Reshma Pattan
If comment is passed to rte_pcapng_copy(), additional space
maybe needed at the end of the mbuf. To handle this case generate
a segmented mbuf with additional space for the options.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/pcapng/rte_pcapng.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 3067033e89..7c3c400c71 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -540,11 +540,24 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue,
if (comment)
optlen += pcapng_optlen(strlen(comment));
- /* reserve trailing options and block length */
+ /*
+ * Try to put options at the end of this mbuf.
+ * If not use an mbuf chain.
+ */
opt = (struct pcapng_option *)
rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
- if (unlikely(opt == NULL))
- goto fail;
+ if (unlikely(opt == NULL)) {
+ struct rte_mbuf *ml = rte_pktmbuf_alloc(mp);
+
+ if (unlikely(ml == NULL))
+ goto fail;
+
+ opt = (struct pcapng_option *)rte_pktmbuf_append(ml, optlen + sizeof(uint32_t));
+ if (unlikely(opt == NULL || rte_pktmbuf_chain(mc, ml) != 0)) {
+ rte_pktmbuf_free(ml);
+ goto fail;
+ }
+ }
switch (direction) {
case RTE_PCAPNG_DIRECTION_IN:
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 3/6] test: add more tests for comments in pcapng
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 4/6] test: vary size of packets in pcapng test Stephen Hemminger
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, Reshma Pattan
Add some more cases where comment is set in pcapng file.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
app/test/test_pcapng.c | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index bcf99724fa..f2b49c31c7 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -125,8 +125,7 @@ test_setup(void)
/* Make a pool for cloned packets */
mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool",
- MAX_BURST * 32, 0, 0,
- rte_pcapng_mbuf_size(pkt_len) + 128,
+ MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len),
SOCKET_ID_ANY, "ring_mp_sc");
if (mp == NULL) {
fprintf(stderr, "Cannot create mempool\n");
@@ -149,6 +148,13 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
unsigned int burst_size;
unsigned int count;
ssize_t len;
+ const char *examples[] = {
+ "EAL init complete. May the cores be ever in your favor.",
+ "No packets were harmed in the making of this burst.",
+ "rte_eth_dev_start(): crossing fingers and enabling queues...",
+ "Congratulations, you’ve reached the end of the RX path. Please collect your free cache miss.",
+ "Lockless and fearless — that’s how we roll in userspace."
+ };
/* make a dummy packet */
mbuf1_prepare(&mbfs, pkt_len);
@@ -162,9 +168,14 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
burst_size = rte_rand_max(MAX_BURST) + 1;
for (i = 0; i < burst_size; i++) {
struct rte_mbuf *mc;
+ const char *comment = NULL;
+
+ /* Put comment on occasional packets */
+ if ((count + i) % 42 == 0)
+ comment = examples[rte_rand_max(RTE_DIM(examples))];
mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig),
- RTE_PCAPNG_DIRECTION_IN, NULL);
+ RTE_PCAPNG_DIRECTION_IN, comment);
if (mc == NULL) {
fprintf(stderr, "Cannot copy packet\n");
return -1;
@@ -386,7 +397,7 @@ static int
test_write_packets(void)
{
char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng";
- static rte_pcapng_t *pcapng;
+ rte_pcapng_t *pcapng = NULL;
int ret, tmp_fd, count;
uint64_t now = current_timestamp();
@@ -413,6 +424,13 @@ test_write_packets(void)
goto fail;
}
+ /* write a statistics block */
+ ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL);
+ if (ret <= 0) {
+ fprintf(stderr, "Write of statistics failed\n");
+ goto fail;
+ }
+
count = fill_pcapng_file(pcapng, TOTAL_PACKETS);
if (count < 0)
goto fail;
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 4/6] test: vary size of packets in pcapng test
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
` (2 preceding siblings ...)
2025-12-29 23:01 ` [PATCH v2 3/6] test: add more tests for comments in pcapng Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 5/6] test: increase gap " Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 6/6] pcapng: improve performance of timestamping Stephen Hemminger
5 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, Reshma Pattan
In order exercise more logic in pcapng, vary the size
of the packets.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
app/test/test_pcapng.c | 94 +++++++++++++++++++++++++++++-------------
1 file changed, 65 insertions(+), 29 deletions(-)
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index f2b49c31c7..5010355df5 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -28,10 +28,9 @@
#define TOTAL_PACKETS 4096
#define MAX_BURST 64
#define MAX_GAP_US 100000
-#define DUMMY_MBUF_NUM 3
+#define DUMMY_MBUF_NUM 2
static struct rte_mempool *mp;
-static const uint32_t pkt_len = 200;
static uint16_t port_id;
static const char null_dev[] = "net_null0";
@@ -41,13 +40,36 @@ struct dummy_mbuf {
uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE];
};
+#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM)
+
+/* RFC 864 chargen pattern used for comment testing */
+#define FILL_LINE_LENGTH 72
+#define FILL_START 0x21 /* ! */
+#define FILL_END 0x7e /* ~ */
+#define FILL_RANGE (FILL_END - FILL_START)
+
static void
-dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
- uint32_t data_len)
+fill_mbuf(struct rte_mbuf *mb)
{
- uint32_t i;
- uint8_t *db;
+ unsigned int len = rte_pktmbuf_tailroom(mb);
+ char *buf = rte_pktmbuf_append(mb, len);
+ unsigned int n = 0;
+
+ while (n < len - 1) {
+ char ch = FILL_START + (n % FILL_LINE_LENGTH) % FILL_RANGE;
+ for (unsigned int i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) {
+ buf[n++] = ch;
+ if (++ch == FILL_END)
+ ch = FILL_START;
+ }
+ if (n < len - 1)
+ buf[n++] = '\n';
+ }
+}
+static void
+dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len)
+{
mb->buf_addr = buf;
rte_mbuf_iova_set(mb, (uintptr_t)buf);
mb->buf_len = buf_len;
@@ -57,15 +79,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len,
mb->pool = (void *)buf;
rte_pktmbuf_reset(mb);
- db = (uint8_t *)rte_pktmbuf_append(mb, data_len);
-
- for (i = 0; i != data_len; i++)
- db[i] = i;
}
/* Make an IP packet consisting of chain of one packets */
static void
-mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen)
+mbuf1_prepare(struct dummy_mbuf *dm)
{
struct {
struct rte_ether_hdr eth;
@@ -84,32 +102,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen)
.dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST),
},
.udp = {
+ .src_port = rte_cpu_to_be_16(19), /* Chargen port */
.dst_port = rte_cpu_to_be_16(9), /* Discard port */
},
};
memset(dm, 0, sizeof(*dm));
- dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen);
+ dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]));
+ dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]));
rte_eth_random_addr(pkt.eth.src_addr.addr_bytes);
- plen -= sizeof(struct rte_ether_hdr);
+ memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt));
+
+ fill_mbuf(&dm->mb[1]);
+ rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]);
+
+ rte_mbuf_sanity_check(&dm->mb[0], 1);
+ rte_mbuf_sanity_check(&dm->mb[1], 0);
+}
- pkt.ip.total_length = rte_cpu_to_be_16(plen);
- pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip);
+static void
+mbuf1_resize(struct dummy_mbuf *dm, uint16_t len)
+{
+ struct {
+ struct rte_ether_hdr eth;
+ struct rte_ipv4_hdr ip;
+ struct rte_udp_hdr udp;
+ } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *);
- plen -= sizeof(struct rte_ipv4_hdr);
- pkt.udp.src_port = rte_rand();
- pkt.udp.dgram_len = rte_cpu_to_be_16(plen);
+ dm->mb[1].data_len = len;
+ dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len;
- memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt));
+ len += sizeof(struct rte_udp_hdr);
+ pkt->udp.dgram_len = rte_cpu_to_be_16(len);
- /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be
- * compressed into single mbuf to properly test store of chained mbufs
- */
- dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len);
- dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len);
- rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]);
- rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]);
+ len += sizeof(struct rte_ipv4_hdr);
+ pkt->ip.total_length = rte_cpu_to_be_16(len);
+ pkt->ip.hdr_checksum = 0;
+ pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip);
+
+ rte_mbuf_sanity_check(&dm->mb[0], 1);
+ rte_mbuf_sanity_check(&dm->mb[1], 0);
}
static int
@@ -125,7 +158,8 @@ test_setup(void)
/* Make a pool for cloned packets */
mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool",
- MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len),
+ MAX_BURST * 32, 0, 0,
+ rte_pcapng_mbuf_size(MAX_DATA_SIZE),
SOCKET_ID_ANY, "ring_mp_sc");
if (mp == NULL) {
fprintf(stderr, "Cannot create mempool\n");
@@ -156,8 +190,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
"Lockless and fearless — that’s how we roll in userspace."
};
- /* make a dummy packet */
- mbuf1_prepare(&mbfs, pkt_len);
+ mbuf1_prepare(&mbfs);
orig = &mbfs.mb[0];
for (count = 0; count < num_packets; count += burst_size) {
@@ -174,6 +207,9 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
if ((count + i) % 42 == 0)
comment = examples[rte_rand_max(RTE_DIM(examples))];
+ /* Vary the size of the packets */
+ mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE));
+
mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig),
RTE_PCAPNG_DIRECTION_IN, comment);
if (mc == NULL) {
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 5/6] test: increase gap in pcapng test
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
` (3 preceding siblings ...)
2025-12-29 23:01 ` [PATCH v2 4/6] test: vary size of packets in pcapng test Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 6/6] pcapng: improve performance of timestamping Stephen Hemminger
5 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, Reshma Pattan
Want to make sure that test takes long enough that 32 bit
counter wraps around.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
app/test/test_pcapng.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c
index 5010355df5..73557eb2f1 100644
--- a/app/test/test_pcapng.c
+++ b/app/test/test_pcapng.c
@@ -27,7 +27,6 @@
#define TOTAL_PACKETS 4096
#define MAX_BURST 64
-#define MAX_GAP_US 100000
#define DUMMY_MBUF_NUM 2
static struct rte_mempool *mp;
@@ -175,7 +174,7 @@ test_setup(void)
}
static int
-fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
+fill_pcapng_file(rte_pcapng_t *pcapng)
{
struct dummy_mbuf mbfs;
struct rte_mbuf *orig;
@@ -193,7 +192,15 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
mbuf1_prepare(&mbfs);
orig = &mbfs.mb[0];
- for (count = 0; count < num_packets; count += burst_size) {
+ /* How many microseconds does it take TSC to wrap around 32 bits */
+ const unsigned wrap_us
+ = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz();
+
+ /* Want overall test to take to wraparound at least twice. */
+ const unsigned int avg_gap = (2 * wrap_us)
+ / (TOTAL_PACKETS / (MAX_BURST / 2));
+
+ for (count = 0; count < TOTAL_PACKETS; count += burst_size) {
struct rte_mbuf *clones[MAX_BURST];
unsigned int i;
@@ -229,8 +236,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets)
return -1;
}
- /* Leave a small gap between packets to test for time wrap */
- usleep(rte_rand_max(MAX_GAP_US));
+ rte_delay_us_block(rte_rand_max(2 * avg_gap));
}
return count;
@@ -467,7 +473,7 @@ test_write_packets(void)
goto fail;
}
- count = fill_pcapng_file(pcapng, TOTAL_PACKETS);
+ count = fill_pcapng_file(pcapng);
if (count < 0)
goto fail;
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 6/6] pcapng: improve performance of timestamping
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
` (4 preceding siblings ...)
2025-12-29 23:01 ` [PATCH v2 5/6] test: increase gap " Stephen Hemminger
@ 2025-12-29 23:01 ` Stephen Hemminger
5 siblings, 0 replies; 8+ messages in thread
From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, Reshma Pattan
Avoid doing expensive divide operations when converting
timestamps from cycles (TSC) to pcapng scaled value (ns).
This logic was derived from the math used by Linux kernel
virtual system call with help from AI.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/pcapng/rte_pcapng.c | 63 ++++++++++++++++++++++++++---------------
1 file changed, 40 insertions(+), 23 deletions(-)
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 7c3c400c71..b12814e305 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -38,8 +38,14 @@
struct rte_pcapng {
int outfd; /* output file */
unsigned int ports; /* number of interfaces added */
- uint64_t offset_ns; /* ns since 1/1/1970 when initialized */
- uint64_t tsc_base; /* TSC when started */
+
+ struct pcapng_time_conv {
+ uint64_t tsc_base; /* TSC when started */
+ uint64_t ns_base; /* ns since 1/1/1970 when initialized */
+ uint64_t mult; /* scaling factor relative to TSC hz */
+ uint32_t shift; /* shift for scaling (24) */
+ uint64_t mask; /* mask of bits used (56) */
+ } tc;
/* DPDK port id to interface index in file */
uint32_t port_index[RTE_MAX_ETHPORTS];
@@ -95,21 +101,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
#define if_indextoname(ifindex, ifname) NULL
#endif
+/* Initialize time conversion based on logic similar to rte_cyclecounter */
+static void
+pcapng_timestamp_init(struct pcapng_time_conv *tc)
+{
+ struct timespec ts;
+ uint64_t cycles = rte_get_tsc_cycles();
+
+ /* record start time in ns since 1/1/1970 */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Compute baseline TSC which occurred during clock_gettime */
+ tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+ tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+
+ /* Set conversion factors for reasonable precision with no overflow */
+ uint64_t tsc_hz = rte_get_tsc_hz();
+ tc->shift = 24;
+ tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz;
+ tc->mask = RTE_BIT64(56) - 1;
+}
+
/* Convert from TSC (CPU cycles) to nanoseconds */
static uint64_t
-pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
+pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles)
{
- uint64_t delta, rem, secs, ns;
- const uint64_t hz = rte_get_tsc_hz();
-
- delta = cycles - self->tsc_base;
+ /* Compute TSC delta with mask to avoid wraparound */
+ uint64_t delta = (cycles - tc->tsc_base) & tc->mask;
- /* Avoid numeric wraparound by computing seconds first */
- secs = delta / hz;
- rem = delta % hz;
- ns = (rem * NS_PER_S) / hz;
+ /* Convert TSC delta to nanoseconds (no division) */
+ uint64_t ns_delta = (delta * tc->mult) >> tc->shift;
- return secs * NS_PER_S + ns + self->offset_ns;
+ return tc->ns_base + ns_delta;
}
/* length of option including padding */
@@ -309,7 +332,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type,
opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
- /* clone block_length after optionsa */
+ /* clone block_length after options */
memcpy(opt, &hdr->block_length, sizeof(uint32_t));
/* remember the file index */
@@ -329,7 +352,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
{
struct pcapng_statistics *hdr;
struct pcapng_option *opt;
- uint64_t start_time = self->offset_ns;
+ uint64_t start_time = self->tc.ns_base;
uint64_t sample_time;
uint32_t optlen, len;
uint32_t *buf;
@@ -379,7 +402,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
hdr->block_length = len;
hdr->interface_id = self->port_index[port_id];
- sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
+ sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles());
hdr->timestamp_hi = sample_time >> 32;
hdr->timestamp_lo = (uint32_t)sample_time;
@@ -658,7 +681,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
/* adjust timestamp recorded in packet */
cycles = (uint64_t)epb->timestamp_hi << 32;
cycles += epb->timestamp_lo;
- timestamp = pcapng_timestamp(self, cycles);
+ timestamp = pcapng_timestamp(&self->tc, cycles);
epb->timestamp_hi = timestamp >> 32;
epb->timestamp_lo = (uint32_t)timestamp;
@@ -704,8 +727,6 @@ rte_pcapng_fdopen(int fd,
{
unsigned int i;
rte_pcapng_t *self;
- struct timespec ts;
- uint64_t cycles;
self = malloc(sizeof(*self));
if (!self) {
@@ -716,11 +737,7 @@ rte_pcapng_fdopen(int fd,
self->outfd = fd;
self->ports = 0;
- /* record start time in ns since 1/1/1970 */
- cycles = rte_get_tsc_cycles();
- clock_gettime(CLOCK_REALTIME, &ts);
- self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
- self->offset_ns = rte_timespec_to_ns(&ts);
+ pcapng_timestamp_init(&self->tc);
for (i = 0; i < RTE_MAX_ETHPORTS; i++)
self->port_index[i] = UINT32_MAX;
--
2.51.0
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-12-29 23:03 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 3/6] test: add more tests for comments in pcapng Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 4/6] test: vary size of packets in pcapng test Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 5/6] test: increase gap " Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 6/6] pcapng: improve performance of timestamping Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).