From: "Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
To: <dev@dpdk.org>
Cc: "Erik Gabriel Carrillo" <erik.g.carrillo@intel.com>,
"David Marchand" <david.marchand@redhat.com>,
maria.lingemark@ericsson.com,
"Stefan Sundkvist" <stefan.sundkvist@ericsson.com>,
"Mattias Rönnblom" <mattias.ronnblom@ericsson.com>
Subject: [RFC 2/2] eal: add high-performance timer facility
Date: Tue, 28 Feb 2023 10:39:16 +0100 [thread overview]
Message-ID: <20230228093916.87206-3-mattias.ronnblom@ericsson.com> (raw)
In-Reply-To: <20230228093916.87206-1-mattias.ronnblom@ericsson.com>
The htimer library attempts at providing a timer facility with roughly
the same functionality, but less overhead and better scalability than
DPDK timer library.
The htimer library employs per-lcore hierachical timer wheels and a
message-based synchronization/MT-safety scheme.
Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
app/test/meson.build | 8 +-
app/test/test_htimer_mgr.c | 674 +++++++++++++++++++++++++++++++
app/test/test_htimer_mgr_perf.c | 324 +++++++++++++++
app/test/test_htw.c | 478 ++++++++++++++++++++++
app/test/test_htw_perf.c | 181 +++++++++
doc/api/doxy-api-index.md | 5 +-
doc/api/doxy-api.conf.in | 1 +
lib/htimer/meson.build | 7 +
lib/htimer/rte_htimer.h | 65 +++
lib/htimer/rte_htimer_mgr.c | 488 ++++++++++++++++++++++
lib/htimer/rte_htimer_mgr.h | 497 +++++++++++++++++++++++
lib/htimer/rte_htimer_msg.h | 44 ++
lib/htimer/rte_htimer_msg_ring.c | 18 +
lib/htimer/rte_htimer_msg_ring.h | 49 +++
lib/htimer/rte_htw.c | 437 ++++++++++++++++++++
lib/htimer/rte_htw.h | 49 +++
lib/htimer/version.map | 17 +
lib/meson.build | 1 +
18 files changed, 3341 insertions(+), 2 deletions(-)
create mode 100644 app/test/test_htimer_mgr.c
create mode 100644 app/test/test_htimer_mgr_perf.c
create mode 100644 app/test/test_htw.c
create mode 100644 app/test/test_htw_perf.c
create mode 100644 lib/htimer/meson.build
create mode 100644 lib/htimer/rte_htimer.h
create mode 100644 lib/htimer/rte_htimer_mgr.c
create mode 100644 lib/htimer/rte_htimer_mgr.h
create mode 100644 lib/htimer/rte_htimer_msg.h
create mode 100644 lib/htimer/rte_htimer_msg_ring.c
create mode 100644 lib/htimer/rte_htimer_msg_ring.h
create mode 100644 lib/htimer/rte_htw.c
create mode 100644 lib/htimer/rte_htw.h
create mode 100644 lib/htimer/version.map
diff --git a/app/test/meson.build b/app/test/meson.build
index 03811ff692..5a48775a60 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -143,6 +143,10 @@ test_sources = files(
'test_timer_perf.c',
'test_timer_racecond.c',
'test_timer_secondary.c',
+ 'test_htw.c',
+ 'test_htw_perf.c',
+ 'test_htimer_mgr_perf.c',
+ 'test_htimer_mgr.c',
'test_ticketlock.c',
'test_trace.c',
'test_trace_register.c',
@@ -165,7 +169,6 @@ fast_tests = [
['bpf_autotest', true, true],
['bpf_convert_autotest', true, true],
['bitops_autotest', true, true],
- ['bitset_autotest', true, true],
['byteorder_autotest', true, true],
['cksum_autotest', true, true],
['cmdline_autotest', true, true],
@@ -193,6 +196,7 @@ fast_tests = [
['fib6_autotest', true, true],
['func_reentrancy_autotest', false, true],
['hash_autotest', true, true],
+ ['htimer_mgr_autotest', true, true],
['interrupt_autotest', true, true],
['ipfrag_autotest', false, true],
['lcores_autotest', true, true],
@@ -265,6 +269,8 @@ perf_test_names = [
'memcpy_perf_autotest',
'hash_perf_autotest',
'timer_perf_autotest',
+ 'htimer_mgr_perf_autotest',
+ 'htw_perf_autotest',
'reciprocal_division',
'reciprocal_division_perf',
'lpm_perf_autotest',
diff --git a/app/test/test_htimer_mgr.c b/app/test/test_htimer_mgr.c
new file mode 100644
index 0000000000..4d82a5e8b0
--- /dev/null
+++ b/app/test/test_htimer_mgr.c
@@ -0,0 +1,674 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_htimer_mgr.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_random.h>
+
+static int
+timer_lcore(void *arg)
+{
+ bool *stop = arg;
+
+ while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
+ rte_htimer_mgr_manage();
+
+ return 0;
+}
+
+static void
+count_timer_cb(struct rte_htimer *timer __rte_unused, void *arg)
+{
+ unsigned int *count = arg;
+
+ __atomic_fetch_add(count, 1, __ATOMIC_RELAXED);
+}
+
+static void
+count_async_cb(struct rte_htimer *timer __rte_unused, int result,
+ void *cb_arg)
+{
+ unsigned int *count = cb_arg;
+
+ if (result == RTE_HTIMER_MGR_ASYNC_RESULT_ADDED)
+ __atomic_fetch_add(count, 1, __ATOMIC_RELAXED);
+}
+
+static uint64_t s_to_tsc(double s)
+{
+ return s * rte_get_tsc_hz();
+}
+
+#define ASYNC_ADD_TEST_EXPIRATION_TIME 0.25 /* s */
+#define ASYNC_TEST_TICK s_to_tsc(1e-6)
+
+static int
+test_htimer_mgr_async_add(unsigned int num_timers_per_lcore)
+{
+ struct rte_htimer *timers;
+ unsigned int timer_idx;
+ unsigned int lcore_id;
+ bool stop = false;
+ unsigned int timeout_count = 0;
+ unsigned int async_count = 0;
+ unsigned int num_workers = 0;
+ uint64_t expiration_time;
+ unsigned int num_total_timers;
+
+ rte_htimer_mgr_init(ASYNC_TEST_TICK);
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ if (rte_eal_remote_launch(timer_lcore, &stop, lcore_id) != 0)
+ rte_panic("Unable to launch timer lcore\n");
+ num_workers++;
+ }
+
+ num_total_timers = num_workers * num_timers_per_lcore;
+
+ timers = malloc(num_total_timers * sizeof(struct rte_htimer));
+ timer_idx = 0;
+
+ if (timers == NULL)
+ rte_panic("Unable to allocate heap memory\n");
+
+ expiration_time = rte_get_tsc_hz() * ASYNC_ADD_TEST_EXPIRATION_TIME;
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ unsigned int i;
+
+ for (i = 0; i < num_timers_per_lcore; i++) {
+ struct rte_htimer *timer = &timers[timer_idx++];
+
+ for (;;) {
+ int rc;
+
+ rc = rte_htimer_mgr_async_add(timer, lcore_id,
+ expiration_time,
+ 0,
+ count_timer_cb,
+ &timeout_count, 0,
+ count_async_cb,
+ &async_count);
+ if (unlikely(rc == -EBUSY))
+ rte_htimer_mgr_process();
+ else
+ break;
+ }
+ }
+ }
+
+ while (__atomic_load_n(&async_count, __ATOMIC_RELAXED) !=
+ num_total_timers ||
+ __atomic_load_n(&timeout_count, __ATOMIC_RELAXED) !=
+ num_total_timers)
+ rte_htimer_mgr_manage();
+
+ __atomic_store_n(&stop, true, __ATOMIC_RELAXED);
+
+ rte_eal_mp_wait_lcore();
+
+ rte_htimer_mgr_deinit();
+
+ free(timers);
+
+ return TEST_SUCCESS;
+}
+
+struct async_recorder_state {
+ bool timer_cb_run;
+ bool async_add_cb_run;
+ bool async_cancel_cb_run;
+ bool failed;
+};
+
+static void
+record_async_add_cb(struct rte_htimer *timer __rte_unused,
+ int result, void *cb_arg)
+{
+ struct async_recorder_state *state = cb_arg;
+
+ if (state->failed)
+ return;
+
+ if (state->async_add_cb_run ||
+ result != RTE_HTIMER_MGR_ASYNC_RESULT_ADDED) {
+ puts("async add run already");
+ state->failed = true;
+ }
+
+ state->async_add_cb_run = true;
+}
+
+static void
+record_async_cancel_cb(struct rte_htimer *timer __rte_unused,
+ int result, void *cb_arg)
+{
+ struct async_recorder_state *state = cb_arg;
+
+ if (state->failed)
+ return;
+
+ if (state->async_cancel_cb_run) {
+ state->failed = true;
+ return;
+ }
+
+ switch (result) {
+ case RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED:
+ if (!state->timer_cb_run)
+ state->failed = true;
+ break;
+ case RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED:
+ if (state->timer_cb_run)
+ state->failed = true;
+ break;
+ case RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED:
+ state->failed = true;
+ }
+
+ state->async_cancel_cb_run = true;
+}
+
+static int
+record_check_consistency(struct async_recorder_state *state)
+{
+ if (state->failed)
+ return -1;
+
+ return state->async_cancel_cb_run ? 1 : 0;
+}
+
+static int
+records_check_consistency(struct async_recorder_state *states,
+ unsigned int num_states)
+{
+ unsigned int i;
+ int canceled = 0;
+
+ for (i = 0; i < num_states; i++) {
+ int rc;
+
+ rc = record_check_consistency(&states[i]);
+
+ if (rc < 0)
+ return -1;
+ canceled += rc;
+ }
+
+ return canceled;
+}
+
+static void
+log_timer_expiry_cb(struct rte_htimer *timer __rte_unused,
+ void *arg)
+{
+ bool *timer_run = arg;
+
+ *timer_run = true;
+}
+
+
+#define ASYNC_ADD_CANCEL_TEST_EXPIRATION_TIME_MAX 10e-3 /* s */
+
+static int
+test_htimer_mgr_async_add_cancel(unsigned int num_timers_per_lcore)
+{
+ struct rte_htimer *timers;
+ struct async_recorder_state *recorder_states;
+ unsigned int timer_idx = 0;
+ unsigned int lcore_id;
+ uint64_t now;
+ unsigned int num_workers = 0;
+ bool stop = false;
+ uint64_t max_expiration_time =
+ s_to_tsc(ASYNC_ADD_CANCEL_TEST_EXPIRATION_TIME_MAX);
+ unsigned int num_total_timers;
+ int canceled = 0;
+
+ rte_htimer_mgr_init(ASYNC_TEST_TICK);
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ if (rte_eal_remote_launch(timer_lcore, &stop, lcore_id) != 0)
+ rte_panic("Unable to launch timer lcore\n");
+ num_workers++;
+ }
+
+ num_total_timers = num_workers * num_timers_per_lcore;
+
+ timers = malloc(num_total_timers * sizeof(struct rte_htimer));
+ recorder_states =
+ malloc(num_total_timers * sizeof(struct async_recorder_state));
+
+ if (timers == NULL || recorder_states == NULL)
+ rte_panic("Unable to allocate heap memory\n");
+
+ now = rte_get_tsc_cycles();
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ unsigned int i;
+
+ for (i = 0; i < num_timers_per_lcore; i++) {
+ struct rte_htimer *timer = &timers[timer_idx];
+ struct async_recorder_state *state =
+ &recorder_states[timer_idx];
+
+ timer_idx++;
+
+ *state = (struct async_recorder_state) {};
+
+ uint64_t expiration_time =
+ now + rte_rand_max(max_expiration_time);
+
+ for (;;) {
+ int rc;
+
+ rc = rte_htimer_mgr_async_add(timer, lcore_id,
+ expiration_time,
+ 0,
+ log_timer_expiry_cb,
+ &state->timer_cb_run,
+ 0,
+ record_async_add_cb,
+ state);
+
+ if (unlikely(rc == -EBUSY))
+ rte_htimer_mgr_process();
+ else
+ break;
+ }
+ }
+ }
+
+ timer_idx = 0;
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ unsigned int i;
+
+ for (i = 0; i < num_timers_per_lcore; i++) {
+ struct rte_htimer *timer = &timers[timer_idx];
+ struct async_recorder_state *state =
+ &recorder_states[timer_idx];
+
+ timer_idx++;
+
+ /* cancel roughly half of the timers */
+ if (rte_rand_max(2) == 0)
+ continue;
+
+ for (;;) {
+ int rc;
+
+ rc = rte_htimer_mgr_async_cancel(timer,
+ record_async_cancel_cb,
+ state);
+
+ if (unlikely(rc == -EBUSY)) {
+ puts("busy");
+ rte_htimer_mgr_process();
+ } else
+ break;
+ }
+
+ canceled++;
+ }
+ }
+
+ for (;;) {
+ int cancel_completed;
+
+ cancel_completed = records_check_consistency(recorder_states,
+ num_total_timers);
+
+ if (cancel_completed < 0) {
+ puts("Inconstinency found");
+ return TEST_FAILED;
+ }
+
+ if (cancel_completed == canceled)
+ break;
+
+ rte_htimer_mgr_process();
+ }
+
+ __atomic_store_n(&stop, true, __ATOMIC_RELAXED);
+
+ rte_eal_mp_wait_lcore();
+
+ rte_htimer_mgr_deinit();
+
+ free(timers);
+ free(recorder_states);
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * This is a test case where one thread asynchronously adds two timers,
+ * with the same expiration time; one on the local lcore and one on a
+ * remote lcore. This creates a tricky situation for the timer
+ * manager, and for the application as well, if the htimer struct is
+ * dynamically allocated.
+ */
+
+struct test_timer {
+ uint32_t ref_cnt;
+ uint64_t expiration_time; /* in TSC, not tick */
+ uint32_t *timeout_count;
+ bool *failure_occured;
+ struct rte_htimer htimer;
+};
+
+
+static struct test_timer *
+test_timer_create(uint64_t expiration_time, uint32_t *timeout_count,
+ bool *failure_occured)
+{
+ struct test_timer *timer;
+
+ timer = malloc(sizeof(struct test_timer));
+
+ if (timer == NULL)
+ rte_panic("Unable to allocate timer memory\n");
+
+ timer->ref_cnt = 1;
+ timer->expiration_time = expiration_time;
+ timer->timeout_count = timeout_count;
+ timer->failure_occured = failure_occured;
+
+ return timer;
+}
+
+static void
+test_timer_inc_ref_cnt(struct test_timer *timer)
+{
+ __atomic_add_fetch(&timer->ref_cnt, 1, __ATOMIC_RELEASE);
+}
+
+static void
+test_timer_dec_ref_cnt(struct test_timer *timer)
+{
+ if (timer != NULL) {
+ uint32_t cnt = __atomic_sub_fetch(&timer->ref_cnt, 1,
+ __ATOMIC_RELEASE);
+ if (cnt == 0)
+ free(timer);
+ }
+}
+
+static void
+test_timer_cb(struct rte_htimer *timer, void *arg __rte_unused)
+{
+ struct test_timer *test_timer =
+ container_of(timer, struct test_timer, htimer);
+ uint64_t now = rte_get_tsc_cycles();
+
+ if (now < test_timer->expiration_time)
+ *(test_timer->failure_occured) = true;
+
+ __atomic_fetch_add(test_timer->timeout_count, 1, __ATOMIC_RELAXED);
+
+ test_timer_dec_ref_cnt(test_timer);
+}
+
+static int
+worker_lcore(void *arg)
+{
+ bool *stop = arg;
+
+ while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
+ rte_htimer_mgr_manage();
+
+ return 0;
+}
+
+struct cancel_timer {
+ bool cancel;
+ struct rte_htimer *target_timer;
+ uint32_t *cancel_count;
+ uint32_t *expired_count;
+ bool *failure_occured;
+ struct rte_htimer htimer;
+};
+
+static struct cancel_timer *
+cancel_timer_create(bool cancel, struct rte_htimer *target_timer,
+ uint32_t *cancel_count, uint32_t *expired_count,
+ bool *failure_occured)
+{
+ struct cancel_timer *timer;
+
+ timer = malloc(sizeof(struct cancel_timer));
+
+ if (timer == NULL)
+ rte_panic("Unable to allocate timer memory\n");
+
+ timer->cancel = cancel;
+ timer->target_timer = target_timer;
+ timer->cancel_count = cancel_count;
+ timer->expired_count = expired_count;
+ timer->failure_occured = failure_occured;
+
+ return timer;
+}
+
+static void
+async_cancel_cb(struct rte_htimer *timer, int result, void *cb_arg)
+{
+ struct test_timer *test_timer =
+ container_of(timer, struct test_timer, htimer);
+ struct cancel_timer *cancel_timer = cb_arg;
+ bool *failure_occured = cancel_timer->failure_occured;
+
+ if (!cancel_timer->cancel || cancel_timer->target_timer != timer)
+ *failure_occured = true;
+
+ if (result == RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED) {
+ uint32_t *cancel_count = cancel_timer->cancel_count;
+
+ /* decrease target lcore's ref count */
+ test_timer_dec_ref_cnt(test_timer);
+ (*cancel_count)++;
+ } else if (result == RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED) {
+ uint32_t *expired_count = cancel_timer->expired_count;
+
+ (*expired_count)++;
+ } else
+ *failure_occured = true;
+
+ /* source lcore's ref count */
+ test_timer_dec_ref_cnt(test_timer);
+
+ free(cancel_timer);
+}
+
+static void
+cancel_timer_cb(struct rte_htimer *timer, void *arg __rte_unused)
+{
+ struct cancel_timer *cancel_timer =
+ container_of(timer, struct cancel_timer, htimer);
+
+ if (cancel_timer->cancel) {
+ int rc;
+
+ rc = rte_htimer_mgr_async_cancel(cancel_timer->target_timer,
+ async_cancel_cb, cancel_timer);
+
+ if (rc == -EBUSY)
+ rte_htimer_mgr_add(timer, 0, 0, cancel_timer_cb,
+ NULL, 0);
+ } else
+ free(cancel_timer);
+}
+
+#define REF_CNT_TEST_TICK s_to_tsc(10e-9)
+#define REF_CNT_AVG_EXPIRATION_TIME (50e-6)
+#define REF_CNT_MAX_EXPIRATION_TIME (2 * REF_CNT_AVG_EXPIRATION_TIME)
+#define REF_CNT_CANCEL_FUZZ(expiration_time) \
+ ((uint64_t)((expiration_time) * (rte_drand()/10 + 0.95)))
+
+static int
+test_htimer_mgr_ref_cnt_timers(unsigned int num_timers_per_lcore)
+{
+ unsigned int lcore_id;
+ bool stop = false;
+ unsigned int num_workers = 0;
+ struct test_timer **timers;
+ struct cancel_timer **cancel_timers;
+ unsigned int num_timers;
+ uint32_t timeout_count = 0;
+ uint32_t cancel_count = 0;
+ uint32_t expired_count = 0;
+ bool failure_occured = false;
+ unsigned int timer_idx;
+ unsigned int expected_cancel_attempts;
+ uint64_t deadline;
+ uint64_t now;
+
+ rte_htimer_mgr_init(REF_CNT_TEST_TICK);
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ if (rte_eal_remote_launch(worker_lcore, &stop, lcore_id) != 0)
+ rte_panic("Unable to launch timer lcore\n");
+ num_workers++;
+ }
+
+ /* give the workers a chance to get going */
+ rte_delay_us_block(10*1000);
+
+ num_timers = num_timers_per_lcore * num_workers;
+
+ timers = malloc(sizeof(struct test_timer *) * num_timers);
+ cancel_timers = malloc(sizeof(struct cancel_timer *) * num_timers);
+
+ if (timers == NULL || cancel_timers == NULL)
+ rte_panic("Unable to allocate memory\n");
+
+ timer_idx = 0;
+ expected_cancel_attempts = 0;
+
+ RTE_LCORE_FOREACH_WORKER(lcore_id) {
+ unsigned int i;
+
+ for (i = 0; i < num_timers_per_lcore; i++) {
+ uint64_t expiration_time;
+ struct test_timer *timer;
+ struct rte_htimer *htimer;
+ bool cancel;
+ struct cancel_timer *cancel_timer;
+ uint64_t cancel_expiration_time;
+
+ expiration_time =
+ s_to_tsc(REF_CNT_MAX_EXPIRATION_TIME *
+ rte_drand());
+
+ timer = test_timer_create(expiration_time,
+ &timeout_count,
+ &failure_occured);
+ htimer = &timer->htimer;
+
+ timers[timer_idx++] = timer;
+
+ /* for the target lcore's usage of this time */
+ test_timer_inc_ref_cnt(timer);
+
+ for (;;) {
+ int rc;
+
+ rc = rte_htimer_mgr_async_add(htimer, lcore_id,
+ expiration_time,
+ 0, test_timer_cb,
+ NULL, 0, NULL,
+ NULL);
+ if (unlikely(rc == -EBUSY))
+ rte_htimer_mgr_process();
+ else
+ break;
+ }
+
+ cancel = rte_rand_max(2);
+
+ cancel_timer =
+ cancel_timer_create(cancel, &timer->htimer,
+ &cancel_count,
+ &expired_count,
+ &failure_occured);
+
+ cancel_expiration_time =
+ REF_CNT_CANCEL_FUZZ(expiration_time);
+
+ rte_htimer_mgr_add(&cancel_timer->htimer,
+ cancel_expiration_time, 0,
+ cancel_timer_cb, NULL, 0);
+
+ if (cancel)
+ expected_cancel_attempts++;
+ }
+ }
+
+ deadline = rte_get_tsc_cycles() + REF_CNT_MAX_EXPIRATION_TIME +
+ s_to_tsc(0.25);
+
+ do {
+ now = rte_get_tsc_cycles();
+
+ rte_htimer_mgr_manage_time(now);
+
+ } while (now < deadline);
+
+ __atomic_store_n(&stop, true, __ATOMIC_RELAXED);
+
+ rte_eal_mp_wait_lcore();
+
+ if (failure_occured)
+ return TEST_FAILED;
+
+ if ((cancel_count + expired_count) != expected_cancel_attempts)
+ return TEST_FAILED;
+
+ if (timeout_count != (num_timers - cancel_count))
+ return TEST_FAILED;
+
+ rte_htimer_mgr_deinit();
+
+ return TEST_SUCCESS;
+}
+
+static int
+test_htimer_mgr(void)
+{
+ int rc;
+
+ rc = test_htimer_mgr_async_add(1);
+ if (rc != TEST_SUCCESS)
+ return rc;
+
+ rc = test_htimer_mgr_async_add(100000);
+ if (rc != TEST_SUCCESS)
+ return rc;
+
+ rc = test_htimer_mgr_async_add_cancel(100);
+ if (rc != TEST_SUCCESS)
+ return rc;
+
+ rc = test_htimer_mgr_ref_cnt_timers(10);
+ if (rc != TEST_SUCCESS)
+ return rc;
+
+ rc = test_htimer_mgr_ref_cnt_timers(10000);
+ if (rc != TEST_SUCCESS)
+ return rc;
+
+ return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htimer_mgr_autotest, test_htimer_mgr);
diff --git a/app/test/test_htimer_mgr_perf.c b/app/test/test_htimer_mgr_perf.c
new file mode 100644
index 0000000000..179b0ba6e1
--- /dev/null
+++ b/app/test/test_htimer_mgr_perf.c
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_htimer_mgr.h>
+#include <rte_launch.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+
+static void
+nop_cb(struct rte_htimer *, void *)
+{
+}
+
+static uint64_t
+add_rand_timers(struct rte_htimer *timers, uint64_t num,
+ uint64_t timeout_start, uint64_t max_timeout)
+{
+ uint64_t i;
+ uint64_t expiration_times[num];
+ uint64_t start_ts;
+ uint64_t end_ts;
+
+ for (i = 0; i < num; i++)
+ expiration_times[i] =
+ 1 + timeout_start + rte_rand_max(max_timeout - 1);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (i = 0; i < num; i++)
+ rte_htimer_mgr_add(&timers[i], expiration_times[i], 0, nop_cb,
+ NULL, RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+
+ /* make sure the timers are actually scheduled in the wheel */
+ rte_htimer_mgr_process();
+
+ end_ts = rte_get_tsc_cycles();
+
+ return end_ts - start_ts;
+}
+
+#define TIME_STEP 16
+
+static void
+test_add_manage_perf(const char *scenario_name, uint64_t num_timers,
+ uint64_t timespan)
+{
+ uint64_t manage_calls;
+ struct rte_htimer *timers;
+ uint64_t start;
+ uint64_t now;
+ uint64_t start_ts;
+ uint64_t end_ts;
+ uint64_t add_latency;
+ uint64_t manage_latency;
+
+ rte_htimer_mgr_init(1);
+
+ manage_calls = timespan / TIME_STEP;
+
+ printf("Scenario: %s\n", scenario_name);
+ printf(" Configuration:\n");
+ printf(" Timers: %"PRIu64"\n", num_timers);
+ printf(" Max timeout: %"PRIu64" ticks\n", timespan);
+ printf(" Average timeouts/manage call: %.3f\n",
+ num_timers / (double)manage_calls);
+ printf(" Time advance per manage call: %d\n", TIME_STEP);
+
+ printf(" Results:\n");
+
+ timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers,
+ 0);
+
+ if (timers == NULL)
+ rte_panic("Unable to allocate memory\n");
+
+ start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+ rte_htimer_mgr_manage_time(start - 1);
+
+ add_latency = add_rand_timers(timers, num_timers, start, timespan);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (now = start; now < (start + timespan); now += TIME_STEP)
+ rte_htimer_mgr_manage_time(now);
+
+ end_ts = rte_get_tsc_cycles();
+
+ manage_latency = end_ts - start_ts;
+
+ printf(" %.0f TSC cycles / add op\n",
+ (double)add_latency / num_timers);
+ printf(" %.0f TSC cycles / manage call\n",
+ (double)manage_latency / manage_calls);
+ printf(" %.1f TSC cycles / tick\n",
+ (double)manage_latency / timespan);
+
+ rte_htimer_mgr_deinit();
+
+ rte_free(timers);
+}
+
+#define ITERATIONS 500
+
+static int
+test_del_perf(uint64_t num_timers, uint64_t timespan)
+{
+ struct rte_htimer *timers;
+ uint64_t start;
+ uint64_t i, j;
+ uint64_t start_ts;
+ uint64_t end_ts;
+ uint64_t latency = 0;
+
+ rte_htimer_mgr_init(1);
+
+ timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers,
+ 0);
+
+ if (timers == NULL)
+ rte_panic("Unable to allocate memory\n");
+
+ start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+ for (i = 0; i < ITERATIONS; i++) {
+ rte_htimer_mgr_manage_time(start - 1);
+
+ add_rand_timers(timers, num_timers, start, timespan);
+
+ /* A manage (or process) call is required to get all
+ * timers scheduled, which may in turn make them a
+ * little more expensive to remove.
+ */
+ rte_htimer_mgr_manage_time(start);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (j = 0; j < num_timers; j++)
+ if (rte_htimer_mgr_cancel(&timers[j]) < 0)
+ return TEST_FAILED;
+
+ end_ts = rte_get_tsc_cycles();
+
+ latency += (end_ts - start_ts);
+
+ start += (timespan + 1);
+ }
+
+ printf("Timer delete: %.0f TSC cycles / call\n",
+ (double)latency / (double)ITERATIONS / (double)num_timers);
+
+ rte_htimer_mgr_deinit();
+
+ rte_free(timers);
+
+ return TEST_SUCCESS;
+}
+
+static int
+target_lcore(void *arg)
+{
+ bool *stop = arg;
+
+ while (!__atomic_load_n(stop, __ATOMIC_RELAXED))
+ rte_htimer_mgr_manage();
+
+ return 0;
+}
+
+static void
+count_async_cb(struct rte_htimer *timer __rte_unused, int result,
+ void *cb_arg)
+{
+ unsigned int *count = cb_arg;
+
+ if (result == RTE_HTIMER_MGR_ASYNC_RESULT_ADDED)
+ (*count)++;
+}
+
+static uint64_t
+s_to_tsc(double s)
+{
+ return s * rte_get_tsc_hz();
+}
+
+static uint64_t
+tsc_to_us(uint64_t tsc)
+{
+ return (double)tsc / (double)rte_get_tsc_hz() * 1e6;
+}
+
+#define ASYNC_ADD_TEST_TICK s_to_tsc(500e-9)
+/*
+ * The number of test timers must be kept less than size of the
+ * htimer-internal message ring for this test case to work.
+ */
+#define ASYNC_ADD_TEST_NUM_TIMERS 1000
+#define ASYNC_ADD_TEST_MIN_TIMEOUT (ASYNC_ADD_TEST_NUM_TIMERS * s_to_tsc(1e-6))
+#define ASYNC_ADD_TEST_MAX_TIMEOUT (2 * ASYNC_ADD_TEST_MIN_TIMEOUT)
+
+static void
+test_async_add_perf(void)
+{
+ uint64_t max_timeout = ASYNC_ADD_TEST_MAX_TIMEOUT;
+ uint64_t min_timeout = ASYNC_ADD_TEST_MIN_TIMEOUT;
+ unsigned int num_timers = ASYNC_ADD_TEST_NUM_TIMERS;
+ struct rte_htimer *timers;
+ bool *stop;
+ unsigned int lcore_id = rte_lcore_id();
+ unsigned int target_lcore_id =
+ rte_get_next_lcore(lcore_id, true, true);
+ uint64_t now;
+ uint64_t request_latency = 0;
+ uint64_t response_latency = 0;
+ unsigned int i;
+
+ rte_htimer_mgr_init(ASYNC_ADD_TEST_TICK);
+
+ timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers,
+ RTE_CACHE_LINE_SIZE);
+ stop = rte_malloc(NULL, sizeof(bool), RTE_CACHE_LINE_SIZE);
+
+ if (timers == NULL || stop == NULL)
+ rte_panic("Unable to allocate memory\n");
+
+ *stop = false;
+
+ if (rte_eal_remote_launch(target_lcore, stop, target_lcore_id) != 0)
+ rte_panic("Unable to launch worker lcore\n");
+
+ /* wait for launch to complete */
+ rte_delay_us_block(100);
+
+ for (i = 0; i < ITERATIONS; i++) {
+ uint64_t expiration_times[num_timers];
+ unsigned int j;
+ uint64_t start_ts;
+ uint64_t end_ts;
+ unsigned int count = 0;
+
+ now = rte_get_tsc_cycles();
+
+ for (j = 0; j < num_timers; j++)
+ expiration_times[j] = now + min_timeout +
+ rte_rand_max(max_timeout - min_timeout);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (j = 0; j < num_timers; j++)
+ rte_htimer_mgr_async_add(&timers[j], target_lcore_id,
+ expiration_times[j], 0,
+ nop_cb, NULL,
+ RTE_HTIMER_FLAG_ABSOLUTE_TIME,
+ count_async_cb, &count);
+
+ end_ts = rte_get_tsc_cycles();
+
+ request_latency += (end_ts - start_ts);
+
+ /* wait long-enough for the target lcore to answered */
+ rte_delay_us_block(1 * num_timers);
+
+ start_ts = rte_get_tsc_cycles();
+
+ while (count != num_timers)
+ rte_htimer_mgr_process();
+
+ end_ts = rte_get_tsc_cycles();
+
+ response_latency += (end_ts - start_ts);
+
+ /* wait until all timeouts have fired */
+ rte_delay_us_block(tsc_to_us(max_timeout));
+ }
+
+ __atomic_store_n(stop, true, __ATOMIC_RELAXED);
+
+ rte_eal_mp_wait_lcore();
+
+ rte_free(timers);
+
+ rte_htimer_mgr_deinit();
+
+ printf("Timer async add:\n");
+ printf(" Configuration:\n");
+ printf(" Timers: %d\n", ASYNC_ADD_TEST_NUM_TIMERS);
+ printf(" Results:\n");
+ printf(" Source lcore cost: %.0f TSC cycles / add request\n",
+ (double)request_latency / (double)ITERATIONS / num_timers);
+ printf(" %.0f TSC cycles / async add "
+ "response\n",
+ (double)response_latency / (double)ITERATIONS / num_timers);
+}
+
+static int
+test_htimer_mgr_perf(void)
+{
+ rte_delay_us_block(10000);
+
+ test_add_manage_perf("Sparse", 100000, 10000000);
+
+ test_add_manage_perf("Dense", 100000, 200000);
+
+ test_add_manage_perf("Idle", 10, 100000);
+
+ test_add_manage_perf("Small", 1000, 100000);
+
+ if (test_del_perf(100000, 100000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ test_async_add_perf();
+
+ return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htimer_mgr_perf_autotest, test_htimer_mgr_perf);
diff --git a/app/test/test_htw.c b/app/test/test_htw.c
new file mode 100644
index 0000000000..3cddfaed7f
--- /dev/null
+++ b/app/test/test_htw.c
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_htw.h>
+#include <rte_random.h>
+
+struct recorder {
+ struct rte_htimer_list timeout_list;
+ uint64_t num_timeouts;
+};
+
+static void
+recorder_init(struct recorder *recorder)
+{
+ recorder->num_timeouts = 0;
+ LIST_INIT(&recorder->timeout_list);
+}
+
+static void
+recorder_cb(struct rte_htimer *timer, void *arg)
+{
+ struct recorder *recorder = arg;
+
+ recorder->num_timeouts++;
+
+ LIST_INSERT_HEAD(&recorder->timeout_list, timer, entry);
+}
+
+static int
+recorder_verify(struct recorder *recorder, uint64_t min_expiry,
+ uint64_t max_expiry)
+{
+ struct rte_htimer *timer;
+
+ LIST_FOREACH(timer, &recorder->timeout_list, entry) {
+ if (timer->expiration_time > max_expiry)
+ return TEST_FAILED;
+
+ if (timer->expiration_time < min_expiry)
+ return TEST_FAILED;
+ }
+
+ return TEST_SUCCESS;
+}
+
+static void
+add_rand_timers(struct rte_htw *htw, struct rte_htimer *timers,
+ uint64_t num, uint64_t timeout_start, uint64_t max_timeout,
+ rte_htimer_cb_t cb, void *cb_arg)
+{
+ uint64_t i;
+
+ for (i = 0; i < num; i++) {
+ struct rte_htimer *timer = &timers[i];
+ bool use_absolute = rte_rand() & 1;
+ unsigned int flags = 0;
+ uint64_t expiration_time;
+
+ expiration_time = timeout_start + rte_rand_max(max_timeout);
+
+ if (use_absolute)
+ flags |= RTE_HTIMER_FLAG_ABSOLUTE_TIME;
+ else {
+ uint64_t htw_current_time;
+
+ htw_current_time = rte_htw_current_time(htw);
+
+ if (expiration_time < htw_current_time)
+ expiration_time = 0;
+ else
+ expiration_time -= htw_current_time;
+ }
+
+ rte_htw_add(htw, timer, expiration_time, 0, cb, cb_arg, flags);
+ }
+}
+
+#define ADVANCE_TIME_MAX_STEP 16
+
+static int
+test_rand_timers(uint64_t in_flight_timers, uint64_t max_timeout,
+ uint64_t runtime)
+{
+ struct recorder recorder;
+ struct rte_htimer *timers;
+ uint64_t fired = 0;
+ uint64_t start;
+ uint64_t now;
+ struct rte_htw *htw;
+ uint64_t added;
+
+ recorder_init(&recorder);
+
+ timers = malloc(sizeof(struct rte_htimer) * in_flight_timers);
+
+ if (timers == NULL)
+ rte_panic("Unable to allocate heap memory\n");
+
+ start = rte_rand_max(UINT64_MAX - max_timeout);
+
+ htw = rte_htw_create();
+
+ if (htw == NULL)
+ return TEST_FAILED;
+
+ added = in_flight_timers;
+ add_rand_timers(htw, timers, added, start + 1, max_timeout,
+ recorder_cb, &recorder);
+
+ for (now = start; now < (start + runtime); ) {
+ uint64_t advance;
+
+ advance = rte_rand_max(ADVANCE_TIME_MAX_STEP);
+
+ now += advance;
+
+ rte_htw_manage(htw, now);
+
+ if (recorder.num_timeouts > 0) {
+ struct rte_htimer *timer;
+
+ if (advance == 0)
+ return TEST_FAILED;
+
+ if (recorder_verify(&recorder, now - advance + 1, now)
+ != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ while ((timer = LIST_FIRST(&recorder.timeout_list))
+ != NULL) {
+ LIST_REMOVE(timer, entry);
+
+ add_rand_timers(htw, timer, 1,
+ now + 1, max_timeout,
+ recorder_cb, &recorder);
+ added++;
+ fired++;
+ }
+
+ recorder.num_timeouts = 0;
+ }
+ }
+
+ /* finish the remaining timeouts */
+
+ rte_htw_manage(htw, now + max_timeout);
+
+ if (recorder_verify(&recorder, now, now + max_timeout) != TEST_SUCCESS)
+ return TEST_FAILED;
+ fired += recorder.num_timeouts;
+
+ if (fired != added)
+ return TEST_FAILED;
+
+ rte_htw_destroy(htw);
+
+ free(timers);
+
+ return TEST_SUCCESS;
+}
+
+struct counter_state {
+ int calls;
+ struct rte_htw *htw;
+ bool cancel;
+};
+
+static void
+count_timeouts_cb(struct rte_htimer *timer __rte_unused, void *arg)
+{
+ struct counter_state *state = arg;
+
+ state->calls++;
+
+ if (state->cancel)
+ rte_htw_cancel(state->htw, timer);
+}
+
+static int
+test_single_timeout_type(uint64_t now, uint64_t distance, bool use_absolute)
+{
+ struct rte_htw *htw;
+ struct counter_state cstate = {};
+ struct rte_htimer timer;
+ uint64_t expiration_time;
+ unsigned int flags = 0;
+
+ htw = rte_htw_create();
+
+ rte_htw_manage(htw, now);
+
+ if (use_absolute) {
+ expiration_time = now + distance;
+ flags |= RTE_HTIMER_FLAG_ABSOLUTE_TIME;
+ } else
+ expiration_time = distance;
+
+ rte_htw_add(htw, &timer, expiration_time, 0, count_timeouts_cb,
+ &cstate, flags);
+
+ rte_htw_manage(htw, now);
+
+ if (cstate.calls != 0)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + distance - 1);
+
+ if (cstate.calls != 0)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + distance);
+
+
+ if (cstate.calls != 1)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + distance);
+
+ if (cstate.calls != 1)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + distance + 1);
+
+ if (cstate.calls != 1)
+ return TEST_FAILED;
+
+ rte_htw_destroy(htw);
+
+ return TEST_SUCCESS;
+}
+
+static int
+test_single_timeout(uint64_t now, uint64_t distance)
+{
+
+ int rc;
+
+ rc = test_single_timeout_type(now, distance, true);
+ if (rc < 0)
+ return rc;
+
+ rc = test_single_timeout_type(now, distance, false);
+ if (rc < 0)
+ return rc;
+
+ return TEST_SUCCESS;
+}
+
+static int
+test_periodical_timer(uint64_t now, uint64_t start, uint64_t period)
+{
+ struct rte_htw *htw;
+ struct counter_state cstate;
+ struct rte_htimer timer;
+
+ htw = rte_htw_create();
+
+ cstate = (struct counter_state) {
+ .htw = htw
+ };
+
+ rte_htw_manage(htw, now);
+
+ rte_htw_add(htw, &timer, start, period, count_timeouts_cb,
+ &cstate, RTE_HTIMER_FLAG_PERIODICAL);
+
+ rte_htw_manage(htw, now);
+
+ if (cstate.calls != 0)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + start - 1);
+
+ if (cstate.calls != 0)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + start);
+
+ if (cstate.calls != 1)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + start + 1);
+
+ if (cstate.calls != 1)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + start + period);
+
+ if (cstate.calls != 2)
+ return TEST_FAILED;
+
+ cstate.cancel = true;
+
+ rte_htw_manage(htw, now + start + 2 * period);
+
+ if (cstate.calls != 3)
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now + start + 3 * period);
+
+ if (cstate.calls != 3)
+ return TEST_FAILED;
+
+ rte_htw_destroy(htw);
+
+ return TEST_SUCCESS;
+}
+
+#define CANCEL_ITERATIONS 1000
+#define CANCEL_NUM_TIMERS 1000
+#define CANCEL_MAX_DISTANCE 10000
+
+static int
+test_cancel_timer(void)
+{
+ uint64_t now;
+ struct rte_htw *htw;
+ int i;
+ struct rte_htimer timers[CANCEL_NUM_TIMERS];
+ struct counter_state timeouts[CANCEL_NUM_TIMERS];
+
+ now = rte_rand_max(UINT64_MAX / 2);
+
+ htw = rte_htw_create();
+
+ for (i = 0; i < CANCEL_ITERATIONS; i++) {
+ int j;
+ int target;
+
+ for (j = 0; j < CANCEL_NUM_TIMERS; j++) {
+ struct rte_htimer *timer = &timers[j];
+ uint64_t expiration_time;
+
+ timeouts[j] = (struct counter_state) {};
+
+ expiration_time = now + 1 +
+ rte_rand_max(CANCEL_MAX_DISTANCE);
+
+ rte_htw_add(htw, timer, expiration_time, 0,
+ count_timeouts_cb, &timeouts[j],
+ RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+ }
+
+ target = rte_rand_max(CANCEL_NUM_TIMERS);
+
+ rte_htw_cancel(htw, &timers[target]);
+
+ now += CANCEL_MAX_DISTANCE;
+
+ rte_htw_manage(htw, now);
+
+ for (j = 0; j < CANCEL_NUM_TIMERS; j++) {
+ if (j != target) {
+ if (timeouts[j].calls != 1)
+ return TEST_FAILED;
+ } else {
+ if (timeouts[j].calls > 0)
+ return TEST_FAILED;
+ }
+ }
+ }
+
+ rte_htw_destroy(htw);
+
+ return TEST_SUCCESS;
+}
+
+static void
+nop_cb(struct rte_htimer *timer __rte_unused, void *arg __rte_unused)
+{
+}
+
+#define NEXT_NUM_TIMERS 1000
+#define NEXT_MAX_DISTANCE 10000
+
+static int
+test_next_timeout(void)
+{
+ uint64_t now;
+ struct rte_htw *htw;
+ int i;
+ struct rte_htimer timers[NEXT_NUM_TIMERS];
+ uint64_t last_expiration;
+
+ now = rte_rand_max(NEXT_MAX_DISTANCE);
+
+ htw = rte_htw_create();
+
+ if (rte_htw_next_timeout(htw, UINT64_MAX) != UINT64_MAX)
+ return TEST_FAILED;
+ if (rte_htw_next_timeout(htw, now + 1) != (now + 1))
+ return TEST_FAILED;
+
+ rte_htw_manage(htw, now);
+
+ last_expiration = now + NEXT_MAX_DISTANCE * NEXT_NUM_TIMERS;
+
+ for (i = 0; i < NEXT_NUM_TIMERS; i++) {
+ struct rte_htimer *timer = &timers[i];
+ uint64_t expiration;
+ uint64_t upper_bound;
+
+ /* add timers, each new one closer than the last */
+
+ expiration = last_expiration - rte_rand_max(NEXT_MAX_DISTANCE);
+
+ rte_htw_add(htw, timer, expiration, 0, nop_cb, NULL,
+ RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+
+ if (rte_htw_next_timeout(htw, UINT64_MAX) != expiration)
+ return TEST_FAILED;
+
+ upper_bound = expiration + rte_rand_max(100000);
+
+ if (rte_htw_next_timeout(htw, upper_bound) != expiration)
+ return TEST_FAILED;
+
+ upper_bound = expiration - rte_rand_max(expiration);
+
+ if (rte_htw_next_timeout(htw, upper_bound) != upper_bound)
+ return TEST_FAILED;
+
+ last_expiration = expiration;
+ }
+
+ rte_htw_destroy(htw);
+
+ return TEST_SUCCESS;
+}
+
+static int
+test_htw(void)
+{
+ if (test_single_timeout(0, 10) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_single_timeout(0, 254) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_single_timeout(0, 255) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_single_timeout(255, 1) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_single_timeout(254, 2) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_periodical_timer(10000, 500, 2) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_periodical_timer(1234567, 12345, 100000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_cancel_timer() != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_rand_timers(1000, 100000, 100000000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_rand_timers(100000, 100000, 1000000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_next_timeout() != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htw_autotest, test_htw);
diff --git a/app/test/test_htw_perf.c b/app/test/test_htw_perf.c
new file mode 100644
index 0000000000..65901f0874
--- /dev/null
+++ b/app/test/test_htw_perf.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include "test.h"
+
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <rte_cycles.h>
+#include <rte_htw.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+
+static void
+nop_cb(struct rte_htimer *timer __rte_unused, void *arg __rte_unused)
+{
+}
+
+static void
+add_rand_timers(struct rte_htw *htw, struct rte_htimer *timers,
+ uint64_t num, uint64_t timeout_start, uint64_t max_timeout)
+{
+ uint64_t i;
+ uint64_t expiration_times[num];
+ uint64_t start_ts;
+ uint64_t end_ts;
+
+ for (i = 0; i < num; i++)
+ expiration_times[i] = timeout_start + rte_rand_max(max_timeout);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (i = 0; i < num; i++) {
+ struct rte_htimer *timer = &timers[i];
+
+ rte_htw_add(htw, timer, expiration_times[i], 0, nop_cb, NULL,
+ RTE_HTIMER_FLAG_ABSOLUTE_TIME);
+ }
+
+ /* actually install the timers */
+ rte_htw_process(htw);
+
+ end_ts = rte_get_tsc_cycles();
+
+ printf(" %.0f TSC cycles / add op\n",
+ (double)(end_ts - start_ts) / num);
+}
+
+#define TIME_STEP 16
+
+static int
+test_add_manage_perf(const char *scenario_name, uint64_t num_timers,
+ uint64_t timespan)
+{
+ uint64_t manage_calls;
+ struct rte_htimer *timers;
+ uint64_t start;
+ uint64_t now;
+ struct rte_htw *htw;
+ uint64_t start_ts;
+ uint64_t end_ts;
+ double latency;
+
+ manage_calls = timespan / TIME_STEP;
+
+ printf("Scenario: %s\n", scenario_name);
+ printf(" Configuration:\n");
+ printf(" Timers: %"PRIu64"\n", num_timers);
+ printf(" Max timeout: %"PRIu64" ticks\n", timespan);
+ printf(" Average timeouts/manage call: %.3f\n",
+ num_timers / (double)manage_calls);
+ printf(" Time advance per manage call: %d\n", TIME_STEP);
+
+ printf(" Results:\n");
+
+ timers = rte_malloc(NULL, sizeof(struct rte_htimer) *
+ num_timers, 0);
+
+ if (timers == NULL)
+ rte_panic("Unable to allocate memory\n");
+
+ htw = rte_htw_create();
+
+ if (htw == NULL)
+ return TEST_FAILED;
+
+ start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+ rte_htw_manage(htw, start - 1);
+
+ add_rand_timers(htw, timers, num_timers, start, timespan);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (now = start; now < (start + timespan); now += TIME_STEP)
+ rte_htw_manage(htw, now);
+
+ end_ts = rte_get_tsc_cycles();
+
+ latency = end_ts - start_ts;
+
+ printf(" %.0f TSC cycles / manage call\n",
+ latency / manage_calls);
+ printf(" %.1f TSC cycles / tick\n", latency / timespan);
+
+ rte_htw_destroy(htw);
+
+ rte_free(timers);
+
+ return TEST_SUCCESS;
+}
+
+static int
+test_cancel_perf(uint64_t num_timers, uint64_t timespan)
+{
+ struct rte_htimer *timers;
+ uint64_t start;
+ struct rte_htw *htw;
+ uint64_t i;
+ uint64_t start_ts;
+ uint64_t end_ts;
+ double latency;
+
+ timers = rte_malloc(NULL, sizeof(struct rte_htimer) * num_timers, 0);
+
+ if (timers == NULL)
+ rte_panic("Unable to allocate memory\n");
+
+ htw = rte_htw_create();
+
+ if (htw == NULL)
+ return TEST_FAILED;
+
+ start = 1 + rte_rand_max(UINT64_MAX / 2);
+
+ rte_htw_manage(htw, start - 1);
+
+ add_rand_timers(htw, timers, num_timers, start, timespan);
+
+ start_ts = rte_get_tsc_cycles();
+
+ for (i = 0; i < num_timers; i++)
+ rte_htw_cancel(htw, &timers[i]);
+
+ end_ts = rte_get_tsc_cycles();
+
+ latency = end_ts - start_ts;
+
+ printf("Timer delete: %.0f TSC cycles / call\n",
+ latency / num_timers);
+
+ rte_htw_destroy(htw);
+
+ rte_free(timers);
+
+ return TEST_SUCCESS;
+}
+
+static int
+test_htw_perf(void)
+{
+ rte_delay_us_block(100);
+
+ if (test_add_manage_perf("Sparse", 100000, 10000000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_add_manage_perf("Dense", 100000, 200000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_add_manage_perf("Idle", 10, 100000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ if (test_cancel_perf(100000, 100000) != TEST_SUCCESS)
+ return TEST_FAILED;
+
+ return TEST_SUCCESS;
+}
+
+REGISTER_TEST_COMMAND(htw_perf_autotest, test_htw_perf);
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 2deec7ea19..5ea1dfa262 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -67,6 +67,8 @@ The public API headers are grouped by topics:
- **timers**:
[cycles](@ref rte_cycles.h),
[timer](@ref rte_timer.h),
+ [htimer_mgr](@ref rte_htimer_mgr.h),
+ [htimer](@ref rte_htimer.h),
[alarm](@ref rte_alarm.h)
- **locks**:
@@ -163,7 +165,8 @@ The public API headers are grouped by topics:
[ring](@ref rte_ring.h),
[stack](@ref rte_stack.h),
[tailq](@ref rte_tailq.h),
- [bitmap](@ref rte_bitmap.h)
+ [bitmap](@ref rte_bitmap.h),
+ [bitset](@ref rte_bitset.h)
- **packet framework**:
* [port](@ref rte_port.h):
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index e859426099..c0cd64db34 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -45,6 +45,7 @@ INPUT = @TOPDIR@/doc/api/doxy-api-index.md \
@TOPDIR@/lib/gro \
@TOPDIR@/lib/gso \
@TOPDIR@/lib/hash \
+ @TOPDIR@/lib/htimer \
@TOPDIR@/lib/ip_frag \
@TOPDIR@/lib/ipsec \
@TOPDIR@/lib/jobstats \
diff --git a/lib/htimer/meson.build b/lib/htimer/meson.build
new file mode 100644
index 0000000000..2dd5d6a24b
--- /dev/null
+++ b/lib/htimer/meson.build
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 Ericsson AB
+
+sources = files('rte_htw.c', 'rte_htimer_msg_ring.c', 'rte_htimer_mgr.c')
+headers = files('rte_htimer_mgr.h', 'rte_htimer.h')
+
+deps += ['ring']
diff --git a/lib/htimer/rte_htimer.h b/lib/htimer/rte_htimer.h
new file mode 100644
index 0000000000..e245b30c65
--- /dev/null
+++ b/lib/htimer/rte_htimer.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_H_
+#define _RTE_HTIMER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+struct rte_htimer;
+
+typedef void (*rte_htimer_cb_t)(struct rte_htimer *, void *);
+
+struct rte_htimer {
+ /**
+ * Absolute timer expiration time (in ticks).
+ */
+ uint64_t expiration_time;
+ /**
+ * Time between expirations (in ticks). Zero for one-shot timers.
+ */
+ uint64_t period;
+ /**
+ * Owning lcore (in ticks). Zero for one-shot timers. May safely
+ * be read from any thread.
+ */
+ uint32_t owner_lcore_id;
+ /**
+ * The current state of the timer.
+ */
+ uint32_t state:4;
+ /**
+ * Flags set on this timer.
+ */
+ uint32_t flags:28;
+ /**
+ * User-specified callback function pointer.
+ */
+ rte_htimer_cb_t cb;
+ /**
+ * Argument for user callback.
+ */
+ void *cb_arg;
+ /**
+ * Pointers used to add timer to various internal lists.
+ */
+ LIST_ENTRY(rte_htimer) entry;
+};
+
+#define RTE_HTIMER_FLAG_ABSOLUTE_TIME (UINT32_C(1) << 0)
+#define RTE_HTIMER_FLAG_PERIODICAL (UINT32_C(1) << 1)
+
+#define RTE_HTIMER_STATE_PENDING 1
+#define RTE_HTIMER_STATE_EXPIRED 2
+#define RTE_HTIMER_STATE_CANCELED 3
+
+LIST_HEAD(rte_htimer_list, rte_htimer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HTIMER_H_ */
diff --git a/lib/htimer/rte_htimer_mgr.c b/lib/htimer/rte_htimer_mgr.c
new file mode 100644
index 0000000000..7bb1630680
--- /dev/null
+++ b/lib/htimer/rte_htimer_mgr.c
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#include <stdbool.h>
+#include <sys/queue.h>
+#include <inttypes.h>
+#include <unistd.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_htw.h>
+#include <rte_prefetch.h>
+#include <rte_ring_elem.h>
+
+#include "rte_htimer_mgr.h"
+#include "rte_htimer_msg.h"
+#include "rte_htimer_msg_ring.h"
+
+#define MAX_MSG_BATCH_SIZE 16
+
+struct htimer_mgr {
+ struct rte_htimer_msg_ring *msg_ring;
+ struct rte_htw *htw;
+
+ unsigned int async_msgs_idx __rte_cache_aligned;
+ unsigned int num_async_msgs;
+ struct rte_htimer_msg async_msgs[MAX_MSG_BATCH_SIZE];
+} __rte_cache_aligned;
+
+static uint64_t tsc_per_tick;
+
+static struct htimer_mgr mgrs[RTE_MAX_LCORE + 1];
+
+#define MAX_ASYNC_TRANSACTIONS 1024
+#define MSG_RING_SIZE MAX_ASYNC_TRANSACTIONS
+
+static inline uint64_t
+tsc_to_tick(uint64_t tsc)
+{
+ return tsc / tsc_per_tick;
+}
+
+static inline uint64_t
+tsc_to_tick_round_up(uint64_t tsc)
+{
+ uint64_t tick;
+ uint64_t remainder;
+
+ tick = tsc / tsc_per_tick;
+ remainder = tsc % tsc_per_tick;
+
+ if (likely(remainder > 0))
+ tick++;
+
+ return tick;
+}
+
+static uint64_t
+tick_to_tsc(uint64_t tick)
+{
+ return tick * tsc_per_tick;
+}
+
+static struct htimer_mgr *
+mgr_get(unsigned int lcore_id)
+{
+ return &mgrs[lcore_id];
+}
+
+static int
+mgr_init(unsigned int lcore_id)
+{
+ char ring_name[RTE_RING_NAMESIZE];
+ unsigned int socket_id;
+ struct htimer_mgr *mgr = &mgrs[lcore_id];
+
+ socket_id = rte_lcore_to_socket_id(lcore_id);
+
+ snprintf(ring_name, sizeof(ring_name), "htimer_%d", lcore_id);
+
+ mgr->msg_ring =
+ rte_htimer_msg_ring_create(ring_name, MSG_RING_SIZE, socket_id,
+ RING_F_SC_DEQ);
+
+ if (mgr->msg_ring == NULL)
+ goto err;
+
+ mgr->htw = rte_htw_create();
+
+ if (mgr->htw == NULL)
+ goto err_free_ring;
+
+ mgr->async_msgs_idx = 0;
+ mgr->num_async_msgs = 0;
+
+ return 0;
+
+err_free_ring:
+ rte_htimer_msg_ring_free(mgr->msg_ring);
+err:
+ return -ENOMEM;
+}
+
+static void
+mgr_deinit(unsigned int lcore_id)
+{
+ struct htimer_mgr *mgr = &mgrs[lcore_id];
+
+ rte_htw_destroy(mgr->htw);
+
+ rte_htimer_msg_ring_free(mgr->msg_ring);
+}
+
+static volatile bool initialized;
+
+static void
+assure_initialized(void)
+{
+ RTE_ASSERT(initialized);
+}
+
+int
+rte_htimer_mgr_init(uint64_t _tsc_per_tick)
+{
+ unsigned int lcore_id;
+
+ RTE_VERIFY(!initialized);
+
+ tsc_per_tick = _tsc_per_tick;
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ int rc;
+
+ rc = mgr_init(lcore_id);
+
+ if (rc < 0) {
+ unsigned int deinit_lcore_id;
+
+ for (deinit_lcore_id = 0; deinit_lcore_id < lcore_id;
+ deinit_lcore_id++)
+ mgr_deinit(deinit_lcore_id);
+
+ return rc;
+ }
+ }
+
+ initialized = true;
+
+ return 0;
+}
+
+void
+rte_htimer_mgr_deinit(void)
+{
+ unsigned int lcore_id;
+
+ assure_initialized();
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+ mgr_deinit(lcore_id);
+
+ initialized = false;
+}
+
+void
+rte_htimer_mgr_add(struct rte_htimer *timer, uint64_t expiration_time_tsc,
+ uint64_t period_tsc, rte_htimer_cb_t timer_cb,
+ void *timer_cb_arg, uint32_t flags)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct htimer_mgr *mgr = mgr_get(lcore_id);
+ uint64_t expiration_time_tick =
+ tsc_to_tick_round_up(expiration_time_tsc);
+ uint64_t period_tick =
+ tsc_to_tick_round_up(period_tsc);
+
+ assure_initialized();
+
+ rte_htw_add(mgr->htw, timer, expiration_time_tick, period_tick,
+ timer_cb, timer_cb_arg, flags);
+
+ timer->owner_lcore_id = lcore_id;
+}
+
+int
+rte_htimer_mgr_cancel(struct rte_htimer *timer)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct htimer_mgr *mgr = mgr_get(lcore_id);
+
+ assure_initialized();
+
+ RTE_ASSERT(timer->owner_lcore_id == lcore_id);
+
+ switch (timer->state) {
+ case RTE_HTIMER_STATE_PENDING:
+ rte_htw_cancel(mgr->htw, timer);
+ return 0;
+ case RTE_HTIMER_STATE_EXPIRED:
+ return -ETIME;
+ default:
+ RTE_ASSERT(timer->state == RTE_HTIMER_STATE_CANCELED);
+ return -ENOENT;
+ }
+}
+
+static int
+send_msg(unsigned int receiver_lcore_id, enum rte_htimer_msg_type msg_type,
+ struct rte_htimer *timer, rte_htimer_mgr_async_op_cb_t async_cb,
+ void *async_cb_arg, const struct rte_htimer_msg_request *request,
+ const struct rte_htimer_msg_response *response)
+{
+ struct htimer_mgr *receiver_mgr;
+ struct rte_htimer_msg_ring *receiver_ring;
+ struct rte_htimer_msg msg = (struct rte_htimer_msg) {
+ .msg_type = msg_type,
+ .timer = timer,
+ .async_cb = async_cb,
+ .async_cb_arg = async_cb_arg
+ };
+ int rc;
+
+ if (request != NULL)
+ msg.request = *request;
+ else
+ msg.response = *response;
+
+ receiver_mgr = mgr_get(receiver_lcore_id);
+
+ receiver_ring = receiver_mgr->msg_ring;
+
+ rc = rte_htimer_msg_ring_enqueue(receiver_ring, &msg);
+
+ return rc;
+}
+
+static int
+send_request(unsigned int receiver_lcore_id, enum rte_htimer_msg_type msg_type,
+ struct rte_htimer *timer,
+ rte_htimer_mgr_async_op_cb_t async_cb, void *async_cb_arg)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct rte_htimer_msg_request request = {
+ .source_lcore_id = lcore_id
+ };
+
+ return send_msg(receiver_lcore_id, msg_type, timer, async_cb,
+ async_cb_arg, &request, NULL);
+}
+
+static int
+send_response(unsigned int receiver_lcore_id, enum rte_htimer_msg_type msg_type,
+ struct rte_htimer *timer,
+ rte_htimer_mgr_async_op_cb_t async_cb, void *async_cb_arg,
+ int result)
+{
+ struct rte_htimer_msg_response response = {
+ .result = result
+ };
+
+ return send_msg(receiver_lcore_id, msg_type, timer, async_cb,
+ async_cb_arg, NULL, &response);
+}
+
+int
+rte_htimer_mgr_async_add(struct rte_htimer *timer,
+ unsigned int target_lcore_id,
+ uint64_t expiration_time, uint64_t period,
+ rte_htimer_cb_t timer_cb, void *timer_cb_arg,
+ uint32_t flags,
+ rte_htimer_mgr_async_op_cb_t async_cb,
+ void *async_cb_arg)
+{
+ *timer = (struct rte_htimer) {
+ .expiration_time = expiration_time,
+ .period = period,
+ .owner_lcore_id = target_lcore_id,
+ .flags = flags,
+ .cb = timer_cb,
+ .cb_arg = timer_cb_arg
+ };
+
+ assure_initialized();
+
+ if (send_request(target_lcore_id, rte_htimer_msg_type_add_request,
+ timer, async_cb, async_cb_arg) < 0)
+ return -EBUSY;
+
+ return 0;
+}
+
+int
+rte_htimer_mgr_async_cancel(struct rte_htimer *timer,
+ rte_htimer_mgr_async_op_cb_t async_cb,
+ void *async_cb_arg)
+{
+ if (send_request(timer->owner_lcore_id,
+ rte_htimer_msg_type_cancel_request,
+ timer, async_cb, async_cb_arg) < 0)
+ return -EBUSY;
+
+ return 0;
+}
+
+static int
+process_add_request(struct rte_htimer_msg *request)
+{
+ struct rte_htimer *timer = request->timer;
+
+ if (request->async_cb != NULL &&
+ send_response(request->request.source_lcore_id,
+ rte_htimer_msg_type_add_response, timer,
+ request->async_cb, request->async_cb_arg,
+ RTE_HTIMER_MGR_ASYNC_RESULT_ADDED) < 0)
+ return -EBUSY;
+
+ rte_htimer_mgr_add(timer, timer->expiration_time, timer->period,
+ timer->cb, timer->cb_arg, timer->flags);
+
+ return 0;
+}
+
+static int
+process_cancel_request(struct rte_htimer_msg *request)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct htimer_mgr *mgr = mgr_get(lcore_id);
+ struct rte_htimer *timer = request->timer;
+ int result;
+
+ switch (timer->state) {
+ case RTE_HTIMER_STATE_PENDING:
+ result = RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED;
+ break;
+ case RTE_HTIMER_STATE_CANCELED:
+ result = RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED;
+ break;
+ case RTE_HTIMER_STATE_EXPIRED:
+ result = RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED;
+ break;
+ default:
+ RTE_ASSERT(0);
+ result = -1;
+ }
+
+ if (request->async_cb != NULL &&
+ send_response(request->request.source_lcore_id,
+ rte_htimer_msg_type_cancel_response, timer,
+ request->async_cb, request->async_cb_arg,
+ result) < 0)
+ return -EBUSY;
+
+ if (timer->state == RTE_HTIMER_STATE_PENDING)
+ rte_htw_cancel(mgr->htw, timer);
+
+ return 0;
+}
+
+static int
+process_response(struct rte_htimer_msg *msg)
+{
+ struct rte_htimer_msg_response *response = &msg->response;
+
+ if (msg->async_cb != NULL)
+ msg->async_cb(msg->timer, response->result, msg->async_cb_arg);
+
+ return 0;
+}
+
+static int
+process_msg(struct rte_htimer_msg *msg)
+{
+ switch (msg->msg_type) {
+ case rte_htimer_msg_type_add_request:
+ return process_add_request(msg);
+ case rte_htimer_msg_type_cancel_request:
+ return process_cancel_request(msg);
+ case rte_htimer_msg_type_add_response:
+ case rte_htimer_msg_type_cancel_response:
+ return process_response(msg);
+ default:
+ RTE_ASSERT(0);
+ return -EBUSY;
+ }
+}
+
+static void
+dequeue_async_msgs(struct htimer_mgr *mgr)
+{
+ if (mgr->num_async_msgs == 0) {
+ unsigned int i;
+
+ mgr->async_msgs_idx = 0;
+
+ mgr->num_async_msgs =
+ rte_htimer_msg_ring_dequeue_burst(mgr->msg_ring,
+ mgr->async_msgs,
+ MAX_MSG_BATCH_SIZE);
+
+ for (i = 0; i < mgr->num_async_msgs; i++)
+ rte_prefetch1(mgr->async_msgs[i].timer);
+ }
+}
+
+static void
+process_async(struct htimer_mgr *mgr)
+{
+ for (;;) {
+ struct rte_htimer_msg *msg;
+
+ dequeue_async_msgs(mgr);
+
+ if (mgr->num_async_msgs == 0)
+ break;
+
+ msg = &mgr->async_msgs[mgr->async_msgs_idx];
+
+ if (process_msg(msg) < 0)
+ break;
+
+ mgr->num_async_msgs--;
+ mgr->async_msgs_idx++;
+ }
+}
+
+void
+rte_htimer_mgr_manage_time(uint64_t current_time)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct htimer_mgr *mgr = mgr_get(lcore_id);
+ uint64_t current_tick;
+
+ assure_initialized();
+
+ process_async(mgr);
+
+ current_tick = tsc_to_tick(current_time);
+
+ rte_htw_manage(mgr->htw, current_tick);
+}
+
+void
+rte_htimer_mgr_manage(void)
+{
+ uint64_t current_time;
+
+ assure_initialized();
+
+ current_time = rte_get_tsc_cycles();
+
+ rte_htimer_mgr_manage_time(current_time);
+}
+
+void
+rte_htimer_mgr_process(void)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct htimer_mgr *mgr = mgr_get(lcore_id);
+
+ process_async(mgr);
+ assure_initialized();
+
+ rte_htw_process(mgr->htw);
+}
+
+uint64_t
+rte_htimer_mgr_current_time(void)
+{
+ uint64_t current_tick;
+
+ current_tick = rte_htimer_mgr_current_tick();
+
+ return tick_to_tsc(current_tick);
+}
+
+uint64_t
+rte_htimer_mgr_current_tick(void)
+{
+ unsigned int lcore_id = rte_lcore_id();
+ struct htimer_mgr *mgr = mgr_get(lcore_id);
+ uint64_t current_tick;
+
+ current_tick = rte_htw_current_time(mgr->htw);
+
+ return current_tick;
+}
diff --git a/lib/htimer/rte_htimer_mgr.h b/lib/htimer/rte_htimer_mgr.h
new file mode 100644
index 0000000000..1fbd69dbf6
--- /dev/null
+++ b/lib/htimer/rte_htimer_mgr.h
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_MGR_H_
+#define _RTE_HTIMER_MGR_H_
+
+/**
+ * @file
+ *
+ * RTE High-performance Timer Manager
+ *
+ * The high-performance timer manager (htimer_mgr) API provides access
+ * to a low-overhead, scalable timer service.
+ *
+ * The functionality offered similar to that of <rte_timer.h>, but the
+ * internals differs significantly, and there are slight differences
+ * in the programming interface as well.
+ *
+ * Core timer management is implemented by means of a hierarchical
+ * timer wheel (HWT), as per the Varghese and Lauck paper <em>Hashed
+ * and Hierarchical Timing Wheels: Data Structures for the Efficient
+ * Implementation of a Timer Facility</em>.
+ *
+ * Varghese et al's approach is further enhanced by the placement of a
+ * bitset in front of each wheel's slots. Each slot has a
+ * corresponding bit in the bitset. If a bit is clear, there are no
+ * pending timers scheduled for that slot. A set bit means there
+ * potentially are timers scheduled for that slot. This scheme reduces
+ * the overhead of the rte_htimer_mgr_manage() function, where slots
+ * of one or more of the wheels of the thread's HWT are scanned if
+ * time has progressed since last call. This improves performance is
+ * all cases, except for very densely populated timer wheels.
+ *
+ * One such HWT is instantiated for each lcore (EAL thread), and
+ * instances are also available for registered non-EAL threads.
+ *
+ * The <rte_htimer_mgr.h> API may not be called from unregistered
+ * non-EAL threads.
+ *
+ * The per-lcore-id HWT instance is private to that thread.
+ *
+ * The htimer API supports scheduling timers to a different thread
+ * (and thus, a different HWT) than the caller's. It is also possible
+ * to cancel timers managed by a "remote" timer wheel.
+ *
+ * All interaction (i.e., adding timers to or removing timers from) a
+ * remote HWT is done by sending a request, in the form of message on
+ * a DPDK ring, to that instance. Such requests are processed and, if
+ * required, acknowledged when the remote (target) thread calls
+ * rte_htimer_mgr_manage(), rte_htimer_mgr_manage_time() or
+ * rte_htimer_mgr_process().
+ *
+ * This message-based interaction avoid comparatively heavy-weight
+ * synchronization primitives such as spinlocks. Only release-acquire
+ * type synchronization on the rings are needed.
+ *
+ * Timer memory management is the responsibility of the
+ * application. After library-level initialization has completed, no
+ * more dynamic memory is allocated by the htimer library. When
+ * installing timers on remote lcores, care must be taken by the
+ * application to avoid race conditions, in particular use-after-free
+ * (or use-after-recycle) issues of the rte_timer structure. A timer
+ * struct may only be deallocated and/or recycled if the application
+ * can guarantee that there are no cancel requests in flight.
+ *
+ * The htimer library is able to give a definitive answer to the
+ * question if a remote timer's had expired or not, at the time of
+ * cancellation.
+ *
+ * The htimer library uses TSC as the default time source. A different
+ * time source may be used, in which case the application must
+ * explicitly provide the time using rte_htimer_mgr_manage_time().
+ * This function may also be used even if TSC is the time source, in
+ * cases where the application for some other purpose already is in
+ * possession of the current TSC time, avoiding the overhead of the
+ * `rdtsc` instruction (or its equivalent on non-x86 ISAs).
+ *
+ * The htimer supports periodic and single-shot timers.
+ *
+ * The timer tick defines a quantum of time in the htimer library. The
+ * length of a tick (quantified in TSC) is left to the application to
+ * specify. The core HWT implementation allows for all 64 bits to be
+ * used.
+ *
+ * Very fine-grained ticks increase the HWT overhead (since more slots
+ * needs to be scanned). Long ticks will only allow for very
+ * course-grained timers, and in timer-heavy application may cause
+ * load spikes when time advances into a new tick.
+ *
+ * Seemingly reasonable timer tick length range in between 100 ns and
+ * 100 us (or maybe up to as high as 1 ms), depending on the
+ * application.
+ */
+
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_htimer.h>
+
+/**
+ * The timer has been added to the timer manager on the target lcore.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_ADDED 1
+
+/**
+ * The timer cancellation request has completed, before the timer expired
+ * on the target lcore.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_CANCELED 2
+
+/**
+ * The timer cancellation request was denied, since the timer was
+ * already marked as canceled.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_ALREADY_CANCELED 3
+
+/**
+ * At the time of the cancellation request process on the target
+ * lcore, the timer had already expired.
+ */
+#define RTE_HTIMER_MGR_ASYNC_RESULT_EXPIRED 4
+
+typedef void (*rte_htimer_mgr_async_op_cb_t)(struct rte_htimer *timer,
+ int result, void *cb_arg);
+
+/**
+ * Initialize the htimer library.
+ *
+ * Instantiates per-lcore (or per-registered non-EAL thread) timer
+ * wheels and other htimer library data structures, for all current
+ * and future threads.
+ *
+ * This function must be called prior to any other <rte_htimer.h> API
+ * call.
+ *
+ * This function may not be called if the htimer library is already
+ * initialized, but may be called multiple times, provided the library
+ * is deinitialized in between rte_htimer_mgr_init() calls.
+ *
+ * For applications not using TSC as the time source, the \c tsc_per_tick
+ * parameter will denote the number of such application time-source-units
+ * per tick.
+ *
+ * This function is not multi-thread safe.
+ *
+ * @param tsc_per_tick
+ * The length (in TSC) of a HWT tick.
+ *
+ * @return
+ * - 0: Success
+ * - -ENOMEM: Unable to allocate memory needed to initialize timer
+ * subsystem
+ *
+ * @see rte_htimer_mgr_deinit()
+ * @see rte_get_tsc_hz()
+ */
+
+__rte_experimental
+int
+rte_htimer_mgr_init(uint64_t tsc_per_tick);
+
+/**
+ * Deinitialize the htimer library.
+ *
+ * This function deallocates all dynamic memory used by the library,
+ * including HWT instances used by other threads than the caller.
+ *
+ * After this call has been made, no <rte_htimer.h> API call may be
+ * made, except rte_htimer_mgr_init().
+ *
+ * This function may not be called if the htimer library has never be
+ * initialized, or has been be deinitialized but not yet initialized
+ * again.
+ *
+ * This function is not multi-thread safe. In particular, no thread
+ * may call any <rte_htimer.h> functions (e.g., rte_htimer_mgr_manage())
+ * during (or after) the htimer library is deinitialized, except if it
+ * is initialized again.
+ *
+ * @see rte_htimer_mgr_init()
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_deinit(void);
+
+/**
+ * Adds a timer to the calling thread's timer wheel.
+ *
+ * This function schedules a timer on the calling thread's HWT.
+ *
+ * The \c timer_cb callback is called at a point when this thread
+ * calls rte_htimer_mgr_process(), rte_htimer_mgr_manage(), or
+ * rte_htimer_mgr_manage_time() and the expiration time has passed the
+ * current time (either as retrieved by rte_htimer_mgr_manage() or
+ * specified by the application in rte_htimer_mgr_manage_time().
+ *
+ * The HWT trackes times in units of \c ticks, which are likely more
+ * coarse-grained than the TSC resolution.
+ *
+ * The \c expiration_time is specified in units of TSC, and rounded up
+ * to the nearest tick. Thus, a timer with a certain expiration time
+ * (specified in TSC) maybe not expire even though this time
+ * (specified in TSC) was supplied in rte_timer_manage_time(). The
+ * maximum error is the length of one tick (not including any delays
+ * caused by infrequent manage calls).
+ *
+ * This timer may be canceled using rte_htimer_mgr_cancel() or
+ * rte_htimer_mgr_async_cancel().
+ *
+ * rte_htimer_mgr_add() is multi-thread safe, and may only be called
+ * from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ * The chunk of memory used for managing this timer. This memory
+ * must not be read or written (or free'd) by the application until
+ * this timer has expired, or any cancellation attempts have
+ * completed.
+ * @param expiration_time
+ * The expiration time (measured in TSC). For periodical timers,
+ * this time represent the first expiration time.
+ * @param period
+ * The time in between periodic timer expirations (measured in TSC).
+ * Must be set to zero unless the RTE_HTIMER_FLAG_PERIODICAL flag is set,
+ * in case it must be a positive integer.
+ * @param timer_cb
+ * The timer callback to be called upon timer expiration.
+ * @param timer_cb_arg
+ * A pointer which will be supplied back to the application in the
+ * timer callback call.
+ * @param flags
+ * RTE_HTIMER_FLAG_ABSOLUTE_TIME and/or RTE_HTIMER_FLAG_PERIODICAL.
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_add(struct rte_htimer *timer, uint64_t expiration_time,
+ uint64_t period, rte_htimer_cb_t timer_cb,
+ void *timer_cb_arg, uint32_t flags);
+
+/**
+ * Cancel a timer scheduled in the calling thread's timer wheel.
+ *
+ * This function cancel a timer scheduled on the calling thread's HWT.
+ *
+ * rte_htimer_mgr_cancel() may be called on a timer which has already
+ * (synchronously or asynchronously) been canceled, or may have expired.
+ * However, the \c rte_htimer struct pointed to by \c timer may not
+ * have been freed or recycled since.
+ *
+ * rte_htimer_mgr_cancel() may not be called for a timer that was
+ * never (or, not yet) added.
+ *
+ * A timer added using rte_htimer_mgr_async_add() may be not be
+ * canceled using this function until after the add operation has
+ * completed (i.e, the completion callback has been run).
+ *
+ * rte_htimer_mgr_cancel() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ * The timer to be canceled.
+ * @return
+ * - 0: Success
+ * - -ETIME: Timer has expired, and thus could not be canceled.
+ * - -ENOENT: Timer was already canceled.
+ */
+
+__rte_experimental
+int
+rte_htimer_mgr_cancel(struct rte_htimer *timer);
+
+/**
+ * Asynchronuosly add a timer to the specified lcore's timer wheel.
+ *
+ * This function is the equivalent of rte_htimer_mgr_add(), but allows
+ * the calling ("source") thread to scheduled a timer in a HWT other
+ * than it's own. The operation is asynchronous.
+ *
+ * The timer works the same as a timer added locally. Thus, the \c
+ * timer_cb callback is called by the target thread, and it may be
+ * canceled using rte_htimer_mgr_cancel().
+ *
+ * The source thread may be the same as the target thread.
+ *
+ * Only EAL threads or registered non-EAL thread may be targeted.
+ *
+ * A successful rte_htimer_mgr_async_add() call guarantees that the
+ * timer will be scheduled on the target lcore at some future time,
+ * provided the target thread calls either rte_htimer_mgr_process(),
+ * rte_htimer_mgr_manage(), and/or rte_htimer_mgr_manage_time().
+ *
+ * The \c async_cb callback is called on the source thread as a part
+ * of its rte_htimer_mgr_process(), rte_htimer_mgr_manage(), or
+ * rte_htimer_mgr_manage_time() call, when the asynchronous add
+ * operation has completed (i.e., the timer is scheduled in the target
+ * HWT).
+ *
+ * \c async_cb may be NULL, in which case no notification is given.
+ *
+ * An asynchronously added timer may be asynchronously canceled (i.e.,
+ * using rte_htimer_mgr_async_cancel()) at any point, by any thread,
+ * after the rte_htimer_mgr_async_add() call. A asynchronously added
+ * timer may be not be canceled using rte_htimer_mgr_cancel() until
+ * after the completion callback has been executed.
+ *
+ * rte_htimer_mgr_async_add() is multi-thread safe, and may only be called
+ * from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ * The chunk of memory used for managing this timer. This memory
+ * must not be read or written (or free'd) by the application until
+ * this timer has expired, or any cancellation attempts have
+ * completed.
+ * @param target_lcore_id
+ * The lcore id of the thread which HWT will be manage this timer.
+ * @param expiration_time
+ * The expiration time (measured in TSC). For periodical timers,
+ * this time represent the first expiration time.
+ * @param period
+ * The time in between periodic timer expirations (measured in TSC).
+ * Must be set to zero unless the RTE_HTIMER_FLAG_PERIODICAL flag is set,
+ * in case it must be a positive integer.
+ * @param timer_cb
+ * The timer callback to be called upon timer expiration.
+ * @param timer_cb_arg
+ * A pointer which will be supplied back to the application in the
+ * timer callback call.
+ * @param async_cb
+ * The asynchronous operationg callback to be called when the
+ * add operation is completed.
+ * @param async_cb_arg
+ * A pointer which will be supplied back to the application in the
+ * \c async_cb callback call.
+ * @param flags
+ * RTE_HTIMER_FLAG_ABSOLUTE_TIME and/or RTE_HTIMER_FLAG_PERIODICAL.
+ * @return
+ * - 0: Success
+ * - -EBUSY: The maximum number of concurrently queued asynchronous
+ * operations has been reached.
+ */
+__rte_experimental
+int
+rte_htimer_mgr_async_add(struct rte_htimer *timer,
+ unsigned int target_lcore_id,
+ uint64_t expiration_time, uint64_t period,
+ rte_htimer_cb_t timer_cb, void *timer_cb_arg,
+ uint32_t flags,
+ rte_htimer_mgr_async_op_cb_t async_cb,
+ void *async_cb_arg);
+
+/**
+ * Asynchronuosly cancel a timer in any thread's timer wheel.
+ *
+ * This function is the equivalent of rte_htimer_mgr_cancel(), but
+ * allows the calling ("source") thread to also cancel a timer in a
+ * HWT other than it's own. The operation is asynchronous.
+ *
+ * A thread may asynchronously cancel a timer scheduled on its own
+ * HWT.
+ *
+ * The \c async_cb callback is called on the source thread as a part
+ * of its rte_htimer_mgr_process(), rte_htimer_mgr_manage(), or
+ * rte_htimer_mgr_manage_time() call, when the asynchronous add
+ * operation has completed (i.e., the timer is scheduled in the target
+ * HWT).
+ *
+ * \c async_cb may be NULL, in which case no notification is given.
+ *
+ * A timer may be asynchronously canceled at any point, by any thread,
+ * after it has been either synchronously or asynchronously added.
+ *
+ * rte_htimer_mgr_async_cancel() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ *
+ * @param timer
+ * The memory used for managing this timer. This memory must not be
+ * read or written (or free'd) by the application until this timer
+ * has expired, or any cancellation attempts have completed.
+ * @param async_cb
+ * The asynchronous operationg callback to be called when the
+ * add operation is completed.
+ * @param async_cb_arg
+ * A pointer which will be supplied back to the application in the
+ * \c async_cb callback call.
+ * @return
+ * - 0: Success
+ * - -EBUSY: The maximum number of concurrently queued asynchronous
+ * operations has been reached.
+ */
+__rte_experimental
+int
+rte_htimer_mgr_async_cancel(struct rte_htimer *timer,
+ rte_htimer_mgr_async_op_cb_t async_cb,
+ void *async_cb_arg);
+
+/**
+ * Update HWT time and perform timer expiry and asyncronous operation
+ * processing.
+ *
+ * This function is the equivalent of retrieving the current TSC time,
+ * and calling rte_htimer_mgr_manage_time().
+ *
+ * rte_htimer_mgr_manage() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_manage(void);
+
+/**
+ * Progress HWT time, and perform timer expiry and asynchronous
+ * operation processing in the process.
+ *
+ * This function progress the calling thread's HWT up to the point
+ * specified by \c current_time, calling the callbacks of any expired
+ * timers.
+ *
+ * The time source must be a monotonic clock, and thus each new \c
+ * current_time must be equal or greater than the time supplied in the
+ * previous call.
+ *
+ * The timer precision for timers scheduled on a particular thread's
+ * HWT depends on that threads call frequency to this function.
+ *
+ * rte_htimer_mgr_manage_time() also performs asynchronous operation
+ * processing. See rte_htimer_mgr_process() for details.
+ *
+ * rte_htimer_mgr_manage_time() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ *
+ * @param current_time
+ * The current time (usually in TSC).
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_manage_time(uint64_t current_time);
+
+/**
+ * Perform asynchronous operation processing.
+ *
+ * rte_htimer_mgr_process() serves pending asynchronous add or cancel
+ * requests, and produces the necessary responses. The timer callbacks
+ * of already-expired timers added are called.
+ *
+ * This function also processes asynchronous operation response
+ * messages received, and calls the asynchronous callbacks, if such
+ * was provided by the application.
+ *
+ * rte_htimer_mgr_process() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+void
+rte_htimer_mgr_process(void);
+
+/**
+ * Return the current local HWT time in TSC.
+ *
+ * This functino returns the most recent time provided by this thread,
+ * either via rte_htimer_mgr_manage_time(), or as sampled by
+ * rte_htimer_mgr_manage().
+ *
+ * The initial time, prior to any manage-calls, is 0.
+ *
+ * rte_htimer_mgr_current_time() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+uint64_t
+rte_htimer_mgr_current_time(void);
+
+/**
+ * Return the current local HWT time in ticks.
+ *
+ * This function returns the current time of the calling thread's HWT. The
+ * tick is the current time provided by the application (via
+ * rte_htimer_mgr_manage_time()), or as retrieved (using
+ * rte_timer_get_tsc_cycles() in rte_htimer_mgr_manage()), divided by the
+ * tick length (as provided in rte_htimer_mgr_init()).
+ *
+ * The initial time, prior to any manage-calls, is 0.
+ *
+ * rte_htimer_mgr_current_tick() is multi-thread safe, and may only be
+ * called from an EAL thread or a registered non-EAL thread.
+ */
+
+__rte_experimental
+uint64_t
+rte_htimer_mgr_current_tick(void);
+
+#endif
diff --git a/lib/htimer/rte_htimer_msg.h b/lib/htimer/rte_htimer_msg.h
new file mode 100644
index 0000000000..ceb106e263
--- /dev/null
+++ b/lib/htimer/rte_htimer_msg.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_MSG_
+#define _RTE_HTIMER_MSG_
+
+#include <rte_htimer.h>
+
+typedef void (*rte_htimer_msg_async_op_cb_t)(struct rte_htimer *timer,
+ int result, void *cb_arg);
+
+typedef rte_htimer_msg_async_op_cb_t async_cb;
+
+enum rte_htimer_msg_type {
+ rte_htimer_msg_type_add_request,
+ rte_htimer_msg_type_add_response,
+ rte_htimer_msg_type_cancel_request,
+ rte_htimer_msg_type_cancel_response
+};
+
+struct rte_htimer_msg_request {
+ unsigned int source_lcore_id;
+};
+
+struct rte_htimer_msg_response {
+ int result;
+};
+
+struct rte_htimer_msg {
+ enum rte_htimer_msg_type msg_type;
+
+ struct rte_htimer *timer;
+
+ rte_htimer_msg_async_op_cb_t async_cb;
+ void *async_cb_arg;
+
+ union {
+ struct rte_htimer_msg_request request;
+ struct rte_htimer_msg_response response;
+ };
+};
+
+#endif
diff --git a/lib/htimer/rte_htimer_msg_ring.c b/lib/htimer/rte_htimer_msg_ring.c
new file mode 100644
index 0000000000..4019b7819a
--- /dev/null
+++ b/lib/htimer/rte_htimer_msg_ring.c
@@ -0,0 +1,18 @@
+#include "rte_htimer_msg_ring.h"
+
+struct rte_htimer_msg_ring *
+rte_htimer_msg_ring_create(const char *name, unsigned int count, int socket_id,
+ unsigned int flags)
+{
+ struct rte_ring *ring =
+ rte_ring_create_elem(name, sizeof(struct rte_htimer_msg),
+ count, socket_id, flags);
+
+ return (struct rte_htimer_msg_ring *)ring;
+}
+
+void
+rte_htimer_msg_ring_free(struct rte_htimer_msg_ring *msg_ring)
+{
+ rte_ring_free((struct rte_ring *)msg_ring);
+}
diff --git a/lib/htimer/rte_htimer_msg_ring.h b/lib/htimer/rte_htimer_msg_ring.h
new file mode 100644
index 0000000000..0e408991d1
--- /dev/null
+++ b/lib/htimer/rte_htimer_msg_ring.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTIMER_MSG_RING_
+#define _RTE_HTIMER_MSG_RING_
+
+#include <rte_ring.h>
+
+#include "rte_htimer_msg.h"
+
+struct rte_htimer_msg_ring {
+ struct rte_ring ring;
+};
+
+struct rte_htimer_msg_ring *
+rte_htimer_msg_ring_create(const char *name, unsigned int count, int socket_id,
+ unsigned int flags);
+
+void
+rte_htimer_msg_ring_free(struct rte_htimer_msg_ring *msg_ring);
+
+static inline unsigned int
+rte_htimer_msg_ring_dequeue_burst(struct rte_htimer_msg_ring *msg_ring,
+ struct rte_htimer_msg *msgs,
+ unsigned int n)
+{
+ unsigned int dequeued;
+
+ dequeued = rte_ring_dequeue_burst_elem(&msg_ring->ring, msgs,
+ sizeof(struct rte_htimer_msg),
+ n, NULL);
+
+ return dequeued;
+}
+
+static inline unsigned int
+rte_htimer_msg_ring_enqueue(struct rte_htimer_msg_ring *msg_ring,
+ struct rte_htimer_msg *msg)
+{
+ int rc;
+
+ rc = rte_ring_enqueue_elem(&msg_ring->ring, msg,
+ sizeof(struct rte_htimer_msg));
+
+ return rc;
+}
+
+#endif
diff --git a/lib/htimer/rte_htw.c b/lib/htimer/rte_htw.c
new file mode 100644
index 0000000000..0104dced34
--- /dev/null
+++ b/lib/htimer/rte_htw.c
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+/*
+ * This is an implementation of a hierarchical timer wheel based on
+ * Hashed and Hierarchical Timing Wheels: Data Structures
+ * for the Efficient Implementation of a Timer Facility by Varghese and
+ * Lauck.
+ *
+ * To improve efficiency when the slots are sparsely populate (i.e.,
+ * many ticks do not have any timers), each slot is represented by a
+ * bit in a separately-managed, per-wheel, bitset. This allows for
+ * very efficient scanning. The cost of managing this bitset is small.
+ */
+
+/* XXX: remove */
+#include <inttypes.h>
+
+#include <rte_bitset.h>
+#include <rte_branch_prediction.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+
+#include "rte_htw.h"
+
+#define TICK_BITS 64
+
+#define WHEEL_BITS 8
+#define WHEEL_SLOTS (1U << WHEEL_BITS)
+#define WHEEL_LEVELS (TICK_BITS / WHEEL_BITS)
+
+struct wheel {
+ uint64_t wheel_time;
+ RTE_BITSET_DECLARE(used_slots, WHEEL_SLOTS);
+ struct rte_htimer_list slots[WHEEL_SLOTS];
+};
+
+struct rte_htw {
+ uint64_t current_time;
+
+ struct wheel wheels[WHEEL_LEVELS];
+
+ struct rte_htimer_list added;
+ struct rte_htimer_list expiring;
+
+ struct rte_htimer *running_timer;
+};
+
+static uint64_t
+time_to_wheel_time(uint64_t t, uint16_t level)
+{
+ return t >> (level * WHEEL_BITS);
+}
+
+static uint64_t
+wheel_time_to_time(uint64_t wheel_time, uint16_t level)
+{
+ return wheel_time << (level * WHEEL_BITS);
+}
+
+static void
+wheel_init(struct wheel *wheel)
+{
+ uint16_t i;
+
+ wheel->wheel_time = 0;
+
+ rte_bitset_init(wheel->used_slots, WHEEL_SLOTS);
+
+ for (i = 0; i < WHEEL_SLOTS; i++)
+ LIST_INIT(&wheel->slots[i]);
+}
+
+static uint64_t
+list_next_timeout(struct rte_htimer_list *timers)
+{
+ struct rte_htimer *timer;
+ uint64_t candidate = UINT64_MAX;
+
+ LIST_FOREACH(timer, timers, entry)
+ candidate = RTE_MIN(timer->expiration_time, candidate);
+
+ return candidate;
+}
+
+static uint16_t
+wheel_time_to_slot(uint64_t wheel_time)
+{
+ return wheel_time % WHEEL_SLOTS;
+}
+
+static uint64_t
+wheel_current_slot_time(struct wheel *wheel, uint16_t level)
+{
+ return wheel->wheel_time << (level * WHEEL_BITS);
+}
+
+static uint64_t
+wheel_next_timeout(struct wheel *wheel, uint16_t level, uint64_t upper_bound)
+{
+ uint16_t start_slot;
+ ssize_t slot;
+
+ start_slot = wheel_current_slot_time(wheel, level);
+
+ if (wheel_time_to_time(wheel->wheel_time, level) >= upper_bound)
+ return upper_bound;
+
+ RTE_BITSET_FOREACH_SET_WRAP(slot, wheel->used_slots, WHEEL_SLOTS,
+ start_slot, WHEEL_SLOTS) {
+ struct rte_htimer_list *timers = &wheel->slots[slot];
+ uint64_t next_timeout;
+
+ next_timeout = list_next_timeout(timers);
+
+ if (next_timeout != UINT64_MAX)
+ return next_timeout;
+ }
+
+ return UINT64_MAX;
+}
+
+static uint16_t
+get_slot(uint64_t t, uint16_t level)
+{
+ uint64_t wheel_time;
+ uint16_t slot;
+
+ wheel_time = time_to_wheel_time(t, level);
+ slot = wheel_time_to_slot(wheel_time);
+
+ return slot;
+}
+
+struct rte_htw *
+rte_htw_create(void)
+{
+ struct rte_htw *htw;
+ uint16_t level;
+
+ RTE_BUILD_BUG_ON((TICK_BITS % WHEEL_BITS) != 0);
+ RTE_BUILD_BUG_ON(sizeof(uint16_t) * CHAR_BIT <= WHEEL_BITS);
+
+ htw = rte_malloc(NULL, sizeof(struct rte_htw), RTE_CACHE_LINE_SIZE);
+
+ if (htw == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ htw->current_time = 0;
+
+ LIST_INIT(&htw->added);
+ LIST_INIT(&htw->expiring);
+
+ for (level = 0; level < WHEEL_LEVELS; level++)
+ wheel_init(&htw->wheels[level]);
+
+ return htw;
+}
+
+void
+rte_htw_destroy(struct rte_htw *htw)
+{
+ rte_free(htw);
+}
+
+static uint16_t
+get_level(uint64_t remaining_time)
+{
+ int last_set = 64 - __builtin_clzll(remaining_time);
+
+ return (last_set - 1) / WHEEL_BITS;
+}
+
+static void
+mark_added(struct rte_htw *htw, struct rte_htimer *timer)
+{
+ timer->state = RTE_HTIMER_STATE_PENDING;
+ LIST_INSERT_HEAD(&htw->added, timer, entry);
+}
+
+void
+rte_htw_add(struct rte_htw *htw, struct rte_htimer *timer,
+ uint64_t expiration_time, uint64_t period,
+ rte_htimer_cb_t timer_cb, void *timer_cb_arg, uint32_t flags)
+{
+ RTE_ASSERT(rte_htimer_is_periodical(timer) ?
+ period > 0 : period == 0);
+
+ if (flags & RTE_HTIMER_FLAG_ABSOLUTE_TIME)
+ timer->expiration_time = expiration_time;
+ else
+ timer->expiration_time = htw->current_time + expiration_time;
+
+ timer->period = period;
+ timer->flags = flags;
+ timer->cb = timer_cb;
+ timer->cb_arg = timer_cb_arg;
+
+ mark_added(htw, timer);
+}
+
+void
+rte_htw_cancel(struct rte_htw *htw, struct rte_htimer *timer)
+{
+ /*
+ * One could consider clearing the relevant used_slots bit in
+ * case this was the last entry in the wheel's slot
+ * list. However, from a correctness point of view, a "false
+ * positive" is not an issue. From a performance perspective,
+ * checking the list head and clearing the bit is likely more
+ * expensive than just deferring a minor cost to a future
+ * rte_htw_manage() call.
+ */
+
+ RTE_ASSERT(timer->state == RTE_HTIMER_STATE_PENDING ||
+ timer->state == RTE_HTIMER_STATE_EXPIRED);
+
+ if (likely(timer->state == RTE_HTIMER_STATE_PENDING)) {
+ LIST_REMOVE(timer, entry);
+ timer->state = RTE_HTIMER_STATE_CANCELED;
+ } else if (timer == htw->running_timer) {
+ /* periodical timer being canceled by its own callback */
+ RTE_ASSERT(timer->flags & RTE_HTIMER_FLAG_PERIODICAL);
+
+ timer->state = RTE_HTIMER_STATE_CANCELED;
+
+ /* signals running timer canceled */
+ htw->running_timer = NULL;
+ }
+}
+
+static void
+mark_expiring(struct rte_htw *htw, struct rte_htimer *timer)
+{
+ LIST_INSERT_HEAD(&htw->expiring, timer, entry);
+}
+
+static void
+schedule_timer(struct rte_htw *htw, struct rte_htimer *timer)
+{
+ uint64_t remaining_time;
+ uint16_t level;
+ struct wheel *wheel;
+ uint16_t slot;
+ struct rte_htimer_list *slot_timers;
+
+ remaining_time = timer->expiration_time - htw->current_time;
+
+ level = get_level(remaining_time);
+
+ wheel = &htw->wheels[level];
+
+ slot = get_slot(timer->expiration_time, level);
+
+ slot_timers = &htw->wheels[level].slots[slot];
+
+ LIST_INSERT_HEAD(slot_timers, timer, entry);
+
+ rte_bitset_set(wheel->used_slots, slot);
+}
+
+static void
+process_added(struct rte_htw *htw)
+{
+ struct rte_htimer *timer;
+
+ while ((timer = LIST_FIRST(&htw->added)) != NULL) {
+ LIST_REMOVE(timer, entry);
+
+ if (timer->expiration_time > htw->current_time)
+ schedule_timer(htw, timer);
+ else
+ mark_expiring(htw, timer);
+ }
+}
+
+static void
+process_expiring(struct rte_htw *htw)
+{
+ struct rte_htimer *timer;
+
+ while ((timer = LIST_FIRST(&htw->expiring)) != NULL) {
+ bool is_periodical;
+ bool running_timer_canceled;
+
+ /*
+ * The timer struct may cannot be safely accessed
+ * after the callback has been called (except for
+ * non-canceled periodical timers), since the callback
+ * may have free'd (or reused) the memory.
+ */
+
+ LIST_REMOVE(timer, entry);
+
+ is_periodical = timer->flags & RTE_HTIMER_FLAG_PERIODICAL;
+
+ timer->state = RTE_HTIMER_STATE_EXPIRED;
+
+ htw->running_timer = timer;
+
+ timer->cb(timer, timer->cb_arg);
+
+ running_timer_canceled = htw->running_timer == NULL;
+
+ htw->running_timer = NULL;
+
+ if (is_periodical && !running_timer_canceled) {
+ timer->expiration_time += timer->period;
+ mark_added(htw, timer);
+ }
+ }
+}
+
+uint64_t
+rte_htw_current_time(struct rte_htw *htw)
+{
+ return htw->current_time;
+}
+
+uint64_t
+rte_htw_next_timeout(struct rte_htw *htw, uint64_t upper_bound)
+{
+ uint16_t level;
+
+ /* scheduling timeouts will sort them in temporal order */
+ process_added(htw);
+
+ if (!LIST_EMPTY(&htw->expiring))
+ return 0;
+
+ for (level = 0; level < WHEEL_LEVELS; level++) {
+ uint64_t wheel_timeout;
+
+ wheel_timeout = wheel_next_timeout(&htw->wheels[level],
+ level, upper_bound);
+ if (wheel_timeout != UINT64_MAX)
+ return RTE_MIN(wheel_timeout, upper_bound);
+ }
+
+ return upper_bound;
+}
+
+static __rte_always_inline void
+process_slot(struct rte_htw *htw, uint16_t level, struct wheel *wheel,
+ uint16_t slot)
+{
+ struct rte_htimer_list *slot_timers;
+ struct rte_htimer *timer;
+
+ slot_timers = &wheel->slots[slot];
+
+ rte_bitset_clear(wheel->used_slots, slot);
+
+ while ((timer = LIST_FIRST(slot_timers)) != NULL) {
+ LIST_REMOVE(timer, entry);
+
+ if (level == 0 || timer->expiration_time <= htw->current_time)
+ mark_expiring(htw, timer);
+ else
+ schedule_timer(htw, timer);
+ }
+}
+
+static __rte_always_inline void
+process_slots(struct rte_htw *htw, uint16_t level, struct wheel *wheel,
+ uint16_t start_slot, uint16_t num_slots)
+{
+ ssize_t slot;
+
+ RTE_BITSET_FOREACH_SET_WRAP(slot, wheel->used_slots, WHEEL_SLOTS,
+ start_slot, num_slots)
+ process_slot(htw, level, wheel, slot);
+}
+
+static void
+advance(struct rte_htw *htw)
+{
+ uint16_t level;
+
+ for (level = 0; level < WHEEL_LEVELS; level++) {
+ struct wheel *wheel = &htw->wheels[level];
+ uint64_t new_wheel_time;
+ uint16_t start_slot;
+ uint16_t num_slots;
+
+ new_wheel_time = time_to_wheel_time(htw->current_time, level);
+
+ if (new_wheel_time == wheel->wheel_time)
+ break;
+
+ start_slot = wheel_time_to_slot(wheel->wheel_time + 1);
+ num_slots = RTE_MIN(new_wheel_time - wheel->wheel_time,
+ WHEEL_SLOTS);
+
+ wheel->wheel_time = new_wheel_time;
+
+ process_slots(htw, level, wheel, start_slot, num_slots);
+ }
+}
+
+void
+rte_htw_manage(struct rte_htw *htw, uint64_t new_time)
+{
+ RTE_VERIFY(new_time >= htw->current_time);
+
+ /*
+ * Scheduling added timers, core timer wheeling processing and
+ * expiry callback execution is kept as separate stages, to
+ * avoid having the core wheel traversal code to deal with a
+ * situation where a timeout callbacks re-adding the timer.
+ * This split also results in seemingly reasonable semantics
+ * in regards to the execution of the callbacks of
+ * already-expired timeouts (e.g., with time 0) being added in
+ * a timeout callback. Instead of creating an end-less loop,
+ * with rte_htw_manage() never returning, it defers the
+ * execution of the timer until the next rte_htw_manage()
+ * call.
+ */
+
+ process_added(htw);
+
+ htw->current_time = new_time;
+
+ advance(htw);
+
+ process_expiring(htw);
+}
+
+void
+rte_htw_process(struct rte_htw *htw)
+{
+ process_added(htw);
+ process_expiring(htw);
+}
diff --git a/lib/htimer/rte_htw.h b/lib/htimer/rte_htw.h
new file mode 100644
index 0000000000..c93358bb13
--- /dev/null
+++ b/lib/htimer/rte_htw.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Ericsson AB
+ */
+
+#ifndef _RTE_HTW_H_
+#define _RTE_HTW_H_
+
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_htimer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rte_htw;
+
+struct rte_htw *
+rte_htw_create(void);
+
+void
+rte_htw_destroy(struct rte_htw *htw);
+
+void
+rte_htw_add(struct rte_htw *htw, struct rte_htimer *timer,
+ uint64_t expiration_time, uint64_t period,
+ rte_htimer_cb_t cb, void *cb_arg, uint32_t flags);
+
+void
+rte_htw_cancel(struct rte_htw *htw, struct rte_htimer *timer);
+
+uint64_t
+rte_htw_current_time(struct rte_htw *htw);
+
+uint64_t
+rte_htw_next_timeout(struct rte_htw *htw, uint64_t upper_bound);
+
+void
+rte_htw_manage(struct rte_htw *htw, uint64_t new_time);
+
+void
+rte_htw_process(struct rte_htw *htw);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HTW_H_ */
diff --git a/lib/htimer/version.map b/lib/htimer/version.map
new file mode 100644
index 0000000000..0e71dc7d57
--- /dev/null
+++ b/lib/htimer/version.map
@@ -0,0 +1,17 @@
+EXPERIMENTAL {
+ global:
+
+ rte_htimer_mgr_init;
+ rte_htimer_mgr_deinit;
+ rte_htimer_mgr_add;
+ rte_htimer_mgr_cancel;
+ rte_htimer_mgr_async_add;
+ rte_htimer_mgr_async_cancel;
+ rte_htimer_mgr_manage;
+ rte_htimer_mgr_manage_time;
+ rte_htimer_mgr_process;
+ rte_htimer_mgr_current_time;
+ rte_htimer_mgr_current_tick;
+
+ local: *;
+};
diff --git a/lib/meson.build b/lib/meson.build
index 2bc0932ad5..c7c0e42ae8 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -37,6 +37,7 @@ libraries = [
'gpudev',
'gro',
'gso',
+ 'htimer',
'ip_frag',
'jobstats',
'kni',
--
2.34.1
next prev parent reply other threads:[~2023-02-28 9:45 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-28 9:39 [RFC 0/2] Add " Mattias Rönnblom
2023-02-28 9:39 ` [RFC 1/2] eal: add bitset type Mattias Rönnblom
2023-02-28 18:46 ` Tyler Retzlaff
2023-03-02 6:31 ` Mattias Rönnblom
2023-03-02 20:39 ` Tyler Retzlaff
2023-02-28 9:39 ` Mattias Rönnblom [this message]
2023-03-05 17:25 ` [RFC 2/2] eal: add high-performance timer facility Stephen Hemminger
2023-03-09 15:20 ` Mattias Rönnblom
2023-02-28 16:01 ` [RFC 0/2] Add " Morten Brørup
2023-03-01 11:18 ` Mattias Rönnblom
2023-03-01 13:31 ` Morten Brørup
2023-03-01 15:50 ` Mattias Rönnblom
2023-03-01 17:06 ` Morten Brørup
2023-03-15 17:03 ` [RFC v2 " Mattias Rönnblom
2023-03-15 17:03 ` [RFC v2 1/2] eal: add bitset type Mattias Rönnblom
2023-03-15 17:20 ` Stephen Hemminger
2023-03-15 18:27 ` Mattias Rönnblom
2023-03-15 17:03 ` [RFC v2 2/2] eal: add high-performance timer facility Mattias Rönnblom
2023-03-16 3:55 ` Tyler Retzlaff
2023-03-17 1:58 ` Stephen Hemminger
2023-03-22 12:18 ` Morten Brørup
2023-04-03 12:04 ` Mattias Rönnblom
2023-04-04 7:32 ` Morten Brørup
2023-03-24 16:00 ` Morten Brørup
2023-07-06 22:41 ` Stephen Hemminger
2023-07-12 8:58 ` Mattias Rönnblom
2024-10-03 18:36 ` [RFC v2 0/2] Add " Stephen Hemminger
2024-10-03 21:32 ` Morten Brørup
2024-10-06 13:02 ` Mattias Rönnblom
2024-10-06 13:43 ` Morten Brørup
2024-10-06 14:43 ` Mattias Rönnblom
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230228093916.87206-3-mattias.ronnblom@ericsson.com \
--to=mattias.ronnblom@ericsson.com \
--cc=david.marchand@redhat.com \
--cc=dev@dpdk.org \
--cc=erik.g.carrillo@intel.com \
--cc=maria.lingemark@ericsson.com \
--cc=stefan.sundkvist@ericsson.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).