From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Cc: Bruce Richardson <bruce.richardson@intel.com>,
konstantin.ananyev@intel.com, thomas@monjalon.net
Subject: [dpdk-dev] [PATCH 7/8] eal/bsdapp: add alarm support
Date: Fri, 15 Jun 2018 15:25:07 +0100 [thread overview]
Message-ID: <56b9f2abddf68d6007aa4f3a0176a37f0b0f91c0.1529071026.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1529071026.git.anatoly.burakov@intel.com>
In-Reply-To: <cover.1529071026.git.anatoly.burakov@intel.com>
Implement EAL alarm API support for FreeBSD. The implementation
is largely identical to that of Linux version, with one key
difference.
The alarm API is a little Linux-centric in that it is expecting
the alarm API to manage alarm timeouts without involvement of the
interrupt thread. This works on Linux because in Linux, there's
timerfd API which allows waiting for timer events on an fd.
On FreeBSD, however, there are no timerfd's, and timer events are
set up directly in kevent. There is no way to pass information from
the alarm API to the interrupt thread, so we also add a little
back-channel magic to get soonest alarm timeout from the alarm API.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
lib/librte_eal/bsdapp/eal/eal_alarm.c | 299 +++++++++++++++++-
lib/librte_eal/bsdapp/eal/eal_alarm_private.h | 19 ++
lib/librte_eal/bsdapp/eal/eal_interrupts.c | 29 +-
3 files changed, 334 insertions(+), 13 deletions(-)
create mode 100644 lib/librte_eal/bsdapp/eal/eal_alarm_private.h
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
index eb3913c97..55763e520 100644
--- a/lib/librte_eal/bsdapp/eal/eal_alarm.c
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -1,31 +1,314 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2018 Intel Corporation
*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
+#include <time.h>
#include <errno.h>
#include <rte_alarm.h>
+#include <rte_cycles.h>
#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+
#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct rte_intr_handle handle;
+ struct timespec time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile uint8_t executing;
+ volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
int
rte_eal_alarm_init(void)
{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+ /* on FreeBSD, timers don't use fd's, and their identifiers are stored
+ * in separate namespace from fd's, so using any value is OK. however,
+ * EAL interrupts handler expects fd's to be unique, so use an actual fd
+ * to guarantee unique timer identifier.
+ */
+ intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+ return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+ if (now->tv_sec < at->tv_sec)
+ return -1;
+ if (now->tv_sec > at->tv_sec)
+ return 1;
+ if (now->tv_nsec < at->tv_nsec)
+ return -1;
+ if (now->tv_nsec > at->tv_nsec)
+ return 1;
+ return 0;
+}
+
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+ uint64_t now_ns, at_ns;
+
+ if (timespec_cmp(now, at) >= 0)
+ return 0;
+
+ now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+ at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+ return at_ns - now_ns;
+}
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val)
+{
+ struct alarm_entry *ap;
+ struct timespec now;
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return -1;
+
+ if (LIST_EMPTY(&alarm_list))
+ return -1;
+
+ ap = LIST_FIRST(&alarm_list);
+
+ *val = diff_ns(&now, &ap->time);
+
return 0;
}
+static int
+unregister_current_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ do {
+ ret = rte_intr_callback_unregister(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ } while (ret == -EAGAIN);
+ }
+
+ return ret;
+}
+
+static int
+register_first_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ /* register a new callback */
+ ret = rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ }
+ return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+ struct timespec now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ ap = LIST_FIRST(&alarm_list);
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return;
+
+ while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+ ap->executing = 1;
+ ap->executing_id = pthread_self();
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ LIST_REMOVE(ap, next);
+ free(ap);
+
+ ap = LIST_FIRST(&alarm_list);
+ }
+
+ /* timer has been deleted from the kqueue, so recreate it if needed */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
int
-rte_eal_alarm_set(uint64_t us __rte_unused,
- rte_eal_alarm_callback cb_fn __rte_unused,
- void *cb_arg __rte_unused)
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
{
- return -ENOTSUP;
+ struct alarm_entry *ap, *new_alarm;
+ struct timespec now;
+ uint64_t ns;
+ int ret = 0;
+
+ /* check parameters, also ensure us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = calloc(1, sizeof(*new_alarm));
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ clock_gettime(CLOCK_TYPE_ID, &now);
+
+ ns = us * NS_PER_US;
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ /* re-register first callback just in case */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return ret;
}
int
-rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
- void *cb_arg __rte_unused)
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
{
- return -ENOTSUP;
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+ int err = 0;
+ int executing;
+
+ if (!cb_fn) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ do {
+ executing = 0;
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while (1) {
+ ap = LIST_FIRST(&alarm_list);
+ if (ap == NULL)
+ break;
+ if (cb_fn != ap->cb_fn)
+ break;
+ if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+ break;
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ } else {
+ /* If calling from other context, mark that
+ * alarm is executing so loop can spin till it
+ * finish. Otherwise we are trying to cancel
+ * ourselves - mark it by EINPROGRESS.
+ */
+ if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+
+ break;
+ }
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 ||
+ cb_arg == ap->cb_arg)) {
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ ap = ap_prev;
+ } else if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0) {
+ executing++;
+ } else {
+ err = EINPROGRESS;
+ }
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ } while (executing != 0);
+
+ if (count == 0 && err == 0)
+ rte_errno = ENOENT;
+ else if (err)
+ rte_errno = err;
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ /* unregister if no alarms left, otherwise re-register first */
+ if (LIST_EMPTY(&alarm_list))
+ unregister_current_callback();
+ else
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return count;
}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
new file mode 100644
index 000000000..65c711518
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index 0fe068894..70ef844ff 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -16,6 +16,7 @@
#include <rte_interrupts.h>
#include "eal_private.h"
+#include "eal_alarm_private.h"
#define MAX_INTR_EVENTS 16
@@ -56,7 +57,22 @@ static volatile int kq = -1;
static int
intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
{
- ke->filter = EVFILT_READ;
+ /* alarm callbacks are special case */
+ if (ih->type == RTE_INTR_HANDLE_ALARM) {
+ uint64_t timeout_ns;
+
+ /* get soonest alarm timeout */
+ if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+ return -1;
+
+ ke->filter = EVFILT_TIMER;
+ /* timers are one shot */
+ ke->flags |= EV_ONESHOT;
+ ke->fflags = NOTE_NSECONDS;
+ ke->data = timeout_ns;
+ } else {
+ ke->filter = EVFILT_READ;
+ }
ke->ident = ih->fd;
return 0;
@@ -122,8 +138,10 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
}
}
- /* add events to the queue */
- if (add_event) {
+ /* add events to the queue. timer events are special as we need to
+ * re-set the timer.
+ */
+ if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
struct kevent ke;
memset(&ke, 0, sizeof(ke));
@@ -218,8 +236,9 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
src->intr_handle.fd, strerror(errno));
- ret = -errno;
- goto out;
+ /* removing non-existent even is an expected condition
+ * in some circumstances (e.g. oneshot events).
+ */
}
/*walk through the callbacks and remove all that match. */
--
2.17.1
next prev parent reply other threads:[~2018-06-15 14:25 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-15 14:25 [dpdk-dev] [PATCH 0/8] Remove IPC threads Anatoly Burakov
2018-06-15 14:25 ` [dpdk-dev] [PATCH 1/8] eal/linux: use glibc malloc in alarm Anatoly Burakov
2018-06-15 14:25 ` [dpdk-dev] [PATCH 2/8] eal/linux: use glibc malloc in interrupt handling Anatoly Burakov
2018-06-15 14:25 ` [dpdk-dev] [PATCH 3/8] ipc: remove IPC thread for async requests Anatoly Burakov
2018-06-15 14:25 ` [dpdk-dev] [PATCH 4/8] eal: bring forward init of interrupt handling Anatoly Burakov
2018-06-15 14:25 ` [dpdk-dev] [PATCH 5/8] eal: add IPC type for interrupt thread Anatoly Burakov
2018-06-15 14:25 ` [dpdk-dev] [PATCH 6/8] eal/bsdapp: add " Anatoly Burakov
2018-06-15 14:25 ` Anatoly Burakov [this message]
2018-06-15 14:25 ` [dpdk-dev] [PATCH 8/8] ipc: remove main IPC thread Anatoly Burakov
2018-06-26 1:19 ` [dpdk-dev] [PATCH 0/8] Remove IPC threads Zhang, Qi Z
2018-06-26 7:03 ` Zhang, Qi Z
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 0/7] Remove asynchronous IPC thread Anatoly Burakov
2018-07-13 10:44 ` Thomas Monjalon
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 1/7] eal/linux: use glibc malloc in alarm Anatoly Burakov
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 2/7] eal/linux: use glibc malloc in interrupt handling Anatoly Burakov
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 3/7] eal/bsdapp: add interrupt thread Anatoly Burakov
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 4/7] eal/bsdapp: add alarm support Anatoly Burakov
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 5/7] eal: bring forward init of interrupt handling Anatoly Burakov
2018-07-12 22:36 ` Thomas Monjalon
2018-07-13 7:41 ` Burakov, Anatoly
2018-07-13 8:09 ` David Marchand
2018-07-13 9:10 ` Tiwei Bie
2018-07-13 11:28 ` David Marchand
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 6/7] ipc: remove IPC thread for async requests Anatoly Burakov
2018-06-26 10:53 ` [dpdk-dev] [PATCH v2 7/7] doc: document IPC callback limitations Anatoly Burakov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56b9f2abddf68d6007aa4f3a0176a37f0b0f91c0.1529071026.git.anatoly.burakov@intel.com \
--to=anatoly.burakov@intel.com \
--cc=bruce.richardson@intel.com \
--cc=dev@dpdk.org \
--cc=konstantin.ananyev@intel.com \
--cc=thomas@monjalon.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).