RE: [EXT] Re: [PATCH v11 1/4] lib: add generic support for reading PMU events

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
To: Tomasz Duszynski <tduszynski@marvell.com>,
	Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>,
	"dev@dpdk.org" <dev@dpdk.org>
Subject: RE: [EXT] Re: [PATCH v11 1/4] lib: add generic support for reading PMU events
Date: Tue, 28 Feb 2023 11:58:44 +0000	[thread overview]
Message-ID: <48ff47d1eb994c4fa78640658b752ec6@huawei.com> (raw)
In-Reply-To: <DM4PR18MB4368F4B71A87C256AF4E3268D2AC9@DM4PR18MB4368.namprd18.prod.outlook.com>



> >> >> Add support for programming PMU counters and reading their values
> >> >> in runtime bypassing kernel completely.
> >> >>
> >> >> This is especially useful in cases where CPU cores are isolated i.e
> >> >> run dedicated tasks. In such cases one cannot use standard perf
> >> >> utility without sacrificing latency and performance.
> >> >>
> >> >> Signed-off-by: Tomasz Duszynski <tduszynski@marvell.com>
> >> >> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> >> >
> >> >Few more comments/questions below.
> >> >
> >> >
> >> >> diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c new file mode
> >> >> 100644 index 0000000000..950f999cb7
> >> >> --- /dev/null
> >> >> +++ b/lib/pmu/rte_pmu.c
> >> >> @@ -0,0 +1,460 @@
> >> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> >> + * Copyright(C) 2023 Marvell International Ltd.
> >> >> + */
> >> >> +
> >> >> +#include <ctype.h>
> >> >> +#include <dirent.h>
> >> >> +#include <errno.h>
> >> >> +#include <regex.h>
> >> >> +#include <stdlib.h>
> >> >> +#include <string.h>
> >> >> +#include <sys/ioctl.h>
> >> >> +#include <sys/mman.h>
> >> >> +#include <sys/queue.h>
> >> >> +#include <sys/syscall.h>
> >> >> +#include <unistd.h>
> >> >> +
> >> >> +#include <rte_atomic.h>
> >> >> +#include <rte_per_lcore.h>
> >> >> +#include <rte_pmu.h>
> >> >> +#include <rte_spinlock.h>
> >> >> +#include <rte_tailq.h>
> >> >> +
> >> >> +#include "pmu_private.h"
> >> >> +
> >> >> +#define EVENT_SOURCE_DEVICES_PATH "/sys/bus/event_source/devices"
> >> >> +
> >> >> +#define GENMASK_ULL(h, l) ((~0ULL - (1ULL << (l)) + 1) & (~0ULL >>
> >> >> +((64 - 1 - (h))))) #define FIELD_PREP(m, v) (((uint64_t)(v) <<
> >> >> +(__builtin_ffsll(m) - 1)) & (m))
> >> >> +
> >> >> +RTE_DEFINE_PER_LCORE(struct rte_pmu_event_group, _event_group);
> >> >> +struct rte_pmu rte_pmu;
> >> >> +
> >> >> +/*
> >> >> + * Following __rte_weak functions provide default no-op.
> >> >> +Architectures should override them if
> >> >> + * necessary.
> >> >> + */
> >> >> +
> >> >> +int
> >> >> +__rte_weak pmu_arch_init(void)
> >> >> +{
> >> >> +	return 0;
> >> >> +}
> >> >> +
> >> >> +void
> >> >> +__rte_weak pmu_arch_fini(void)
> >> >> +{
> >> >> +}
> >> >> +
> >> >> +void
> >> >> +__rte_weak pmu_arch_fixup_config(uint64_t __rte_unused config[3])
> >> >> +{ }
> >> >> +
> >> >> +static int
> >> >> +get_term_format(const char *name, int *num, uint64_t *mask) {
> >> >> +	char path[PATH_MAX];
> >> >> +	char *config = NULL;
> >> >> +	int high, low, ret;
> >> >> +	FILE *fp;
> >> >> +
> >> >> +	*num = *mask = 0;
> >> >> +	snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/format/%s", rte_pmu.name,
> >name);
> >> >> +	fp = fopen(path, "r");
> >> >> +	if (fp == NULL)
> >> >> +		return -errno;
> >> >> +
> >> >> +	errno = 0;
> >> >> +	ret = fscanf(fp, "%m[^:]:%d-%d", &config, &low, &high);
> >> >> +	if (ret < 2) {
> >> >> +		ret = -ENODATA;
> >> >> +		goto out;
> >> >> +	}
> >> >> +	if (errno) {
> >> >> +		ret = -errno;
> >> >> +		goto out;
> >> >> +	}
> >> >> +
> >> >> +	if (ret == 2)
> >> >> +		high = low;
> >> >> +
> >> >> +	*mask = GENMASK_ULL(high, low);
> >> >> +	/* Last digit should be [012]. If last digit is missing 0 is implied. */
> >> >> +	*num = config[strlen(config) - 1];
> >> >> +	*num = isdigit(*num) ? *num - '0' : 0;
> >> >> +
> >> >> +	ret = 0;
> >> >> +out:
> >> >> +	free(config);
> >> >> +	fclose(fp);
> >> >> +
> >> >> +	return ret;
> >> >> +}
> >> >> +
> >> >> +static int
> >> >> +parse_event(char *buf, uint64_t config[3]) {
> >> >> +	char *token, *term;
> >> >> +	int num, ret, val;
> >> >> +	uint64_t mask;
> >> >> +
> >> >> +	config[0] = config[1] = config[2] = 0;
> >> >> +
> >> >> +	token = strtok(buf, ",");
> >> >> +	while (token) {
> >> >> +		errno = 0;
> >> >> +		/* <term>=<value> */
> >> >> +		ret = sscanf(token, "%m[^=]=%i", &term, &val);
> >> >> +		if (ret < 1)
> >> >> +			return -ENODATA;
> >> >> +		if (errno)
> >> >> +			return -errno;
> >> >> +		if (ret == 1)
> >> >> +			val = 1;
> >> >> +
> >> >> +		ret = get_term_format(term, &num, &mask);
> >> >> +		free(term);
> >> >> +		if (ret)
> >> >> +			return ret;
> >> >> +
> >> >> +		config[num] |= FIELD_PREP(mask, val);
> >> >> +		token = strtok(NULL, ",");
> >> >> +	}
> >> >> +
> >> >> +	return 0;
> >> >> +}
> >> >> +
> >> >> +static int
> >> >> +get_event_config(const char *name, uint64_t config[3]) {
> >> >> +	char path[PATH_MAX], buf[BUFSIZ];
> >> >> +	FILE *fp;
> >> >> +	int ret;
> >> >> +
> >> >> +	snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/events/%s", rte_pmu.name,
> >name);
> >> >> +	fp = fopen(path, "r");
> >> >> +	if (fp == NULL)
> >> >> +		return -errno;
> >> >> +
> >> >> +	ret = fread(buf, 1, sizeof(buf), fp);
> >> >> +	if (ret == 0) {
> >> >> +		fclose(fp);
> >> >> +
> >> >> +		return -EINVAL;
> >> >> +	}
> >> >> +	fclose(fp);
> >> >> +	buf[ret] = '\0';
> >> >> +
> >> >> +	return parse_event(buf, config);
> >> >> +}
> >> >> +
> >> >> +static int
> >> >> +do_perf_event_open(uint64_t config[3], int group_fd) {
> >> >> +	struct perf_event_attr attr = {
> >> >> +		.size = sizeof(struct perf_event_attr),
> >> >> +		.type = PERF_TYPE_RAW,
> >> >> +		.exclude_kernel = 1,
> >> >> +		.exclude_hv = 1,
> >> >> +		.disabled = 1,
> >> >> +	};
> >> >> +
> >> >> +	pmu_arch_fixup_config(config);
> >> >> +
> >> >> +	attr.config = config[0];
> >> >> +	attr.config1 = config[1];
> >> >> +	attr.config2 = config[2];
> >> >> +
> >> >> +	return syscall(SYS_perf_event_open, &attr, 0, -1, group_fd, 0); }
> >> >> +
> >> >> +static int
> >> >> +open_events(struct rte_pmu_event_group *group) {
> >> >> +	struct rte_pmu_event *event;
> >> >> +	uint64_t config[3];
> >> >> +	int num = 0, ret;
> >> >> +
> >> >> +	/* group leader gets created first, with fd = -1 */
> >> >> +	group->fds[0] = -1;
> >> >> +
> >> >> +	TAILQ_FOREACH(event, &rte_pmu.event_list, next) {
> >> >> +		ret = get_event_config(event->name, config);
> >> >> +		if (ret)
> >> >> +			continue;
> >> >> +
> >> >> +		ret = do_perf_event_open(config, group->fds[0]);
> >> >> +		if (ret == -1) {
> >> >> +			ret = -errno;
> >> >> +			goto out;
> >> >> +		}
> >> >> +
> >> >> +		group->fds[event->index] = ret;
> >> >> +		num++;
> >> >> +	}
> >> >> +
> >> >> +	return 0;
> >> >> +out:
> >> >> +	for (--num; num >= 0; num--) {
> >> >> +		close(group->fds[num]);
> >> >> +		group->fds[num] = -1;
> >> >> +	}
> >> >> +
> >> >> +
> >> >> +	return ret;
> >> >> +}
> >> >> +
> >> >> +static int
> >> >> +mmap_events(struct rte_pmu_event_group *group) {
> >> >> +	long page_size = sysconf(_SC_PAGE_SIZE);
> >> >> +	unsigned int i;
> >> >> +	void *addr;
> >> >> +	int ret;
> >> >> +
> >> >> +	for (i = 0; i < rte_pmu.num_group_events; i++) {
> >> >> +		addr = mmap(0, page_size, PROT_READ, MAP_SHARED, group->fds[i], 0);
> >> >> +		if (addr == MAP_FAILED) {
> >> >> +			ret = -errno;
> >> >> +			goto out;
> >> >> +		}
> >> >> +
> >> >> +		group->mmap_pages[i] = addr;
> >> >> +		if (!group->mmap_pages[i]->cap_user_rdpmc) {
> >> >> +			ret = -EPERM;
> >> >> +			goto out;
> >> >> +		}
> >> >> +	}
> >> >> +
> >> >> +	return 0;
> >> >> +out:
> >> >> +	for (; i; i--) {
> >> >> +		munmap(group->mmap_pages[i - 1], page_size);
> >> >> +		group->mmap_pages[i - 1] = NULL;
> >> >> +	}
> >> >> +
> >> >> +	return ret;
> >> >> +}
> >> >> +
> >> >> +static void
> >> >> +cleanup_events(struct rte_pmu_event_group *group) {
> >> >> +	unsigned int i;
> >> >> +
> >> >> +	if (group->fds[0] != -1)
> >> >> +		ioctl(group->fds[0], PERF_EVENT_IOC_DISABLE,
> >> >> +PERF_IOC_FLAG_GROUP);
> >> >> +
> >> >> +	for (i = 0; i < rte_pmu.num_group_events; i++) {
> >> >> +		if (group->mmap_pages[i]) {
> >> >> +			munmap(group->mmap_pages[i], sysconf(_SC_PAGE_SIZE));
> >> >> +			group->mmap_pages[i] = NULL;
> >> >> +		}
> >> >> +
> >> >> +		if (group->fds[i] != -1) {
> >> >> +			close(group->fds[i]);
> >> >> +			group->fds[i] = -1;
> >> >> +		}
> >> >> +	}
> >> >> +
> >> >> +	group->enabled = false;
> >> >> +}
> >> >> +
> >> >> +int
> >> >> +__rte_pmu_enable_group(void)
> >> >> +{
> >> >> +	struct rte_pmu_event_group *group = &RTE_PER_LCORE(_event_group);
> >> >> +	int ret;
> >> >> +
> >> >> +	if (rte_pmu.num_group_events == 0)
> >> >> +		return -ENODEV;
> >> >> +
> >> >> +	ret = open_events(group);
> >> >> +	if (ret)
> >> >> +		goto out;
> >> >> +
> >> >> +	ret = mmap_events(group);
> >> >> +	if (ret)
> >> >> +		goto out;
> >> >> +
> >> >> +	if (ioctl(group->fds[0], PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
> >> >> +		ret = -errno;
> >> >> +		goto out;
> >> >> +	}
> >> >> +
> >> >> +	if (ioctl(group->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
> >> >> +		ret = -errno;
> >> >> +		goto out;
> >> >> +	}
> >> >> +
> >> >> +	rte_spinlock_lock(&rte_pmu.lock);
> >> >> +	TAILQ_INSERT_TAIL(&rte_pmu.event_group_list, group, next);
> >> >
> >> >Hmm.. so we insert pointer to TLS variable into the global list?
> >> >Wonder what would happen if that thread get terminated?
> >>
> >> Nothing special. Any pointers to that thread-local in that thread are invalided.
> >>
> >> >Can memory from its TLS block get re-used (by other thread or for other purposes)?
> >> >
> >>
> >> Why would any other thread reuse that?
> >> Eventually main thread will need that data to do the cleanup.
> >
> >I understand that main thread would need to access that data.
> >I am not sure that it would be able to.
> >Imagine thread calls rte_pmu_read(...) and then terminates, while program continues to run.
> >As I understand address of its RTE_PER_LCORE(_event_group) will still remain in
> >rte_pmu.event_group_list, even if it is probably not valid any more.
> >
> 
> Okay got your point. In DPDK that will not happen. We do not spawn/kill lcores in runtime.

Well, yes usually DPDK app doesn't do that, but in theory there is an API to register/unregister
non-eal threads as lcores: rte_thread_register()/rte_thread_unregister().
Also besides of lcores there are control threads, some house-keeping threads, plus user is free
to spawn/kill his own threads.
Are you saying that this library doesn't support none of them?
If so, then at least that should be very clearly documented.
Though I think a proper way is  handle this situation somehow -
either return error at __rte_pmu_enable_group(), or change the code to allow it to work
properly from any thread. I don't think it is that hard.

> In other scenarios such approach will not work because once thread terminates it's per-thread-data
> becomes invalid.
> 
> >> >
> >> >> +	rte_spinlock_unlock(&rte_pmu.lock);
> >> >> +	group->enabled = true;
> >> >> +
> >> >> +	return 0;
> >> >> +
> >> >> +out:
> >> >> +	cleanup_events(group);
> >> >> +
> >> >> +	return ret;
> >> >> +}
> >> >> +
> >> >> +static int
> >> >> +scan_pmus(void)
> >> >> +{
> >> >> +	char path[PATH_MAX];
> >> >> +	struct dirent *dent;
> >> >> +	const char *name;
> >> >> +	DIR *dirp;
> >> >> +
> >> >> +	dirp = opendir(EVENT_SOURCE_DEVICES_PATH);
> >> >> +	if (dirp == NULL)
> >> >> +		return -errno;
> >> >> +
> >> >> +	while ((dent = readdir(dirp))) {
> >> >> +		name = dent->d_name;
> >> >> +		if (name[0] == '.')
> >> >> +			continue;
> >> >> +
> >> >> +		/* sysfs entry should either contain cpus or be a cpu */
> >> >> +		if (!strcmp(name, "cpu"))
> >> >> +			break;
> >> >> +
> >> >> +		snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/cpus", name);
> >> >> +		if (access(path, F_OK) == 0)
> >> >> +			break;
> >> >> +	}
> >> >> +
> >> >> +	if (dent) {
> >> >> +		rte_pmu.name = strdup(name);
> >> >> +		if (rte_pmu.name == NULL) {
> >> >> +			closedir(dirp);
> >> >> +
> >> >> +			return -ENOMEM;
> >> >> +		}
> >> >> +	}
> >> >> +
> >> >> +	closedir(dirp);
> >> >> +
> >> >> +	return rte_pmu.name ? 0 : -ENODEV; }
> >> >> +
> >> >> +static struct rte_pmu_event *
> >> >> +new_event(const char *name)
> >> >> +{
> >> >> +	struct rte_pmu_event *event;
> >> >> +
> >> >> +	event = calloc(1, sizeof(*event));
> >> >> +	if (event == NULL)
> >> >> +		goto out;
> >> >> +
> >> >> +	event->name = strdup(name);
> >> >> +	if (event->name == NULL) {
> >> >> +		free(event);
> >> >> +		event = NULL;
> >> >> +	}
> >> >> +
> >> >> +out:
> >> >> +	return event;
> >> >> +}
> >> >> +
> >> >> +static void
> >> >> +free_event(struct rte_pmu_event *event) {
> >> >> +	free(event->name);
> >> >> +	free(event);
> >> >> +}
> >> >> +
> >> >> +int
> >> >> +rte_pmu_add_event(const char *name) {
> >> >> +	struct rte_pmu_event *event;
> >> >> +	char path[PATH_MAX];
> >> >> +
> >> >> +	if (rte_pmu.name == NULL)
> >> >> +		return -ENODEV;
> >> >> +
> >> >> +	if (rte_pmu.num_group_events + 1 >= MAX_NUM_GROUP_EVENTS)
> >> >> +		return -ENOSPC;
> >> >> +
> >> >> +	snprintf(path, sizeof(path), EVENT_SOURCE_DEVICES_PATH "/%s/events/%s", rte_pmu.name,
> >name);
> >> >> +	if (access(path, R_OK))
> >> >> +		return -ENODEV;
> >> >> +
> >> >> +	TAILQ_FOREACH(event, &rte_pmu.event_list, next) {
> >> >> +		if (!strcmp(event->name, name))
> >> >> +			return event->index;
> >> >> +		continue;
> >> >> +	}
> >> >> +
> >> >> +	event = new_event(name);
> >> >> +	if (event == NULL)
> >> >> +		return -ENOMEM;
> >> >> +
> >> >> +	event->index = rte_pmu.num_group_events++;
> >> >> +	TAILQ_INSERT_TAIL(&rte_pmu.event_list, event, next);
> >> >> +
> >> >> +	return event->index;
> >> >> +}
> >> >> +
> >> >> +int
> >> >> +rte_pmu_init(void)
> >> >> +{
> >> >> +	int ret;
> >> >> +
> >> >> +	/* Allow calling init from multiple contexts within a single thread. This simplifies
> >> >> +	 * resource management a bit e.g in case fast-path tracepoint has already been enabled
> >> >> +	 * via command line but application doesn't care enough and performs init/fini again.
> >> >> +	 */
> >> >> +	if (rte_pmu.initialized != 0) {
> >> >> +		rte_pmu.initialized++;
> >> >> +		return 0;
> >> >> +	}
> >> >> +
> >> >> +	ret = scan_pmus();
> >> >> +	if (ret)
> >> >> +		goto out;
> >> >> +
> >> >> +	ret = pmu_arch_init();
> >> >> +	if (ret)
> >> >> +		goto out;
> >> >> +
> >> >> +	TAILQ_INIT(&rte_pmu.event_list);
> >> >> +	TAILQ_INIT(&rte_pmu.event_group_list);
> >> >> +	rte_spinlock_init(&rte_pmu.lock);
> >> >> +	rte_pmu.initialized = 1;
> >> >> +
> >> >> +	return 0;
> >> >> +out:
> >> >> +	free(rte_pmu.name);
> >> >> +	rte_pmu.name = NULL;
> >> >> +
> >> >> +	return ret;
> >> >> +}
> >> >> +
> >> >> +void
> >> >> +rte_pmu_fini(void)
> >> >> +{
> >> >> +	struct rte_pmu_event_group *group, *tmp_group;
> >> >> +	struct rte_pmu_event *event, *tmp_event;
> >> >> +
> >> >> +	/* cleanup once init count drops to zero */
> >> >> +	if (rte_pmu.initialized == 0 || --rte_pmu.initialized != 0)
> >> >> +		return;
> >> >> +
> >> >> +	RTE_TAILQ_FOREACH_SAFE(event, &rte_pmu.event_list, next, tmp_event) {
> >> >> +		TAILQ_REMOVE(&rte_pmu.event_list, event, next);
> >> >> +		free_event(event);
> >> >> +	}
> >> >> +
> >> >> +	RTE_TAILQ_FOREACH_SAFE(group, &rte_pmu.event_group_list, next, tmp_group) {
> >> >> +		TAILQ_REMOVE(&rte_pmu.event_group_list, group, next);
> >> >> +		cleanup_events(group);
> >> >> +	}
> >> >> +
> >> >> +	pmu_arch_fini();
> >> >> +	free(rte_pmu.name);
> >> >> +	rte_pmu.name = NULL;
> >> >> +	rte_pmu.num_group_events = 0;
> >> >> +}
> >> >> diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h new file mode
> >> >> 100644 index 0000000000..6b664c3336
> >> >> --- /dev/null
> >> >> +++ b/lib/pmu/rte_pmu.h
> >> >> @@ -0,0 +1,212 @@
> >> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> >> + * Copyright(c) 2023 Marvell
> >> >> + */
> >> >> +
> >> >> +#ifndef _RTE_PMU_H_
> >> >> +#define _RTE_PMU_H_
> >> >> +
> >> >> +/**
> >> >> + * @file
> >> >> + *
> >> >> + * PMU event tracing operations
> >> >> + *
> >> >> + * This file defines generic API and types necessary to setup PMU
> >> >> +and
> >> >> + * read selected counters in runtime.
> >> >> + */
> >> >> +
> >> >> +#ifdef __cplusplus
> >> >> +extern "C" {
> >> >> +#endif
> >> >> +
> >> >> +#include <linux/perf_event.h>
> >> >> +
> >> >> +#include <rte_atomic.h>
> >> >> +#include <rte_branch_prediction.h> #include <rte_common.h>
> >> >> +#include <rte_compat.h> #include <rte_spinlock.h>
> >> >> +
> >> >> +/** Maximum number of events in a group */ #define
> >> >> +MAX_NUM_GROUP_EVENTS 8
> >> >
> >> >forgot RTE_ prefix.
> >> >In fact, do you really need number of events in group to be hard-coded?
> >> >Couldn't mmap_pages[] and fds[] be allocated dynamically by enable_group()?
> >> >
> >>
> >> 8 is reasonable number I think. X86/ARM have actually less that that (was that something like
> >4?).
> >> Moreover events are scheduled as a group so there must be enough hw
> >> counters available for that to succeed. So this number should cover current needs.
> >
> >If you think 8 will be enough to cover all possible future cases - I am ok either way.
> >Still need RTE_ prefix for it.
> >
> 
> Okay that can be added.
> 
> >> >> +
> >> >> +/**
> >> >> + * A structure describing a group of events.
> >> >> + */
> >> >> +struct rte_pmu_event_group {
> >> >> +	struct perf_event_mmap_page *mmap_pages[MAX_NUM_GROUP_EVENTS]; /**< array of user pages
> >*/
> >> >> +	int fds[MAX_NUM_GROUP_EVENTS]; /**< array of event descriptors */
> >> >> +	bool enabled; /**< true if group was enabled on particular lcore */
> >> >> +	TAILQ_ENTRY(rte_pmu_event_group) next; /**< list entry */ }
> >> >> +__rte_cache_aligned;
> >> >> +
> >> >
> >> >Even if we'd decide to keep rte_pmu_read() as static inline (still
> >> >not sure it is a good idea),
> >>
> >> We want to save as much cpu cycles as we possibly can and inlining
> >> does helps in that matter.
> >
> >Ok, so asking same question from different thread: how many cycles it will save?
> >What is the difference in terms of performance when you have this function inlined vs not inlined?
> >
> 
> On x86 setup which is not under load, no cpusets configured, etc *just* not inlining rte_pmu_read()
> decreases performance by roughly 24% (44 vs 58 cpu cycles). At least that is reported by
> trace_perf_autotest.

From my perspective 14 cycles is not that much...
Considering that user will probably not call it very often, and by enabling measurements he
probably already prepared to get some hit. 

> 
> >> >why these two struct below (rte_pmu_event and rte_pmu) have to be public?
> >> >I think both can be safely moved away from public headers.
> >> >
> >>
> >> struct rte_pmu_event can be hidden I guess.
> >> struct rte_pmu is used in this header hence cannot be moved elsewhere.
> >
> >Not sure why?
> >Is that because you use it inside rte_pmu_read()?
> >But that check I think can be safely moved into __rte_pmu_enable_group() or probably even into
> >rte_pmu_add_event().
> 
> No, we should not do that. Otherwise we'll need to call function. Even though check will happen
> early on still function prologue/epilogue will happen. This takes cycles.

Not necessary. You can store this value in pmu_group itself,
and by this value decide is pmu and group initialized, etc.   
 
> >
> >> >
> >> >> +/**
> >> >> + * A structure describing an event.
> >> >> + */
> >> >> +struct rte_pmu_event {
> >> >> +	char *name; /**< name of an event */
> >> >> +	unsigned int index; /**< event index into fds/mmap_pages */
> >> >> +	TAILQ_ENTRY(rte_pmu_event) next; /**< list entry */ };
> >> >
> >> >> +
> >> >> +/**
> >> >> + * A PMU state container.
> >> >> + */
> >> >> +struct rte_pmu {
> >> >> +	char *name; /**< name of core PMU listed under /sys/bus/event_source/devices */
> >> >> +	rte_spinlock_t lock; /**< serialize access to event group list */
> >> >> +	TAILQ_HEAD(, rte_pmu_event_group) event_group_list; /**< list of event groups */
> >> >> +	unsigned int num_group_events; /**< number of events in a group */
> >> >> +	TAILQ_HEAD(, rte_pmu_event) event_list; /**< list of matching events */
> >> >> +	unsigned int initialized; /**< initialization counter */ };
> >> >> +
> >> >> +/** lcore event group */
> >> >> +RTE_DECLARE_PER_LCORE(struct rte_pmu_event_group, _event_group);
> >> >> +
> >> >> +/** PMU state container */
> >> >> +extern struct rte_pmu rte_pmu;
> >> >> +
> >> >> +/** Each architecture supporting PMU needs to provide its own
> >> >> +version */ #ifndef rte_pmu_pmc_read #define
> >> >> +rte_pmu_pmc_read(index) ({ 0; }) #endif
> >> >> +
> >> >> +/**
> >> >> + * @warning
> >> >> + * @b EXPERIMENTAL: this API may change without prior notice
> >> >> + *
> >> >> + * Read PMU counter.
> >> >> + *
> >> >> + * @warning This should be not called directly.
> >> >> + *
> >> >> + * @param pc
> >> >> + *   Pointer to the mmapped user page.
> >> >> + * @return
> >> >> + *   Counter value read from hardware.
> >> >> + */
> >> >> +static __rte_always_inline uint64_t __rte_pmu_read_userpage(struct
> >> >> +perf_event_mmap_page *pc) {
> >> >> +	uint64_t width, offset;
> >> >> +	uint32_t seq, index;
> >> >> +	int64_t pmc;
> >> >> +
> >> >> +	for (;;) {
> >> >> +		seq = pc->lock;
> >> >> +		rte_compiler_barrier();
> >> >> +		index = pc->index;
> >> >> +		offset = pc->offset;
> >> >> +		width = pc->pmc_width;
> >> >> +
> >> >> +		/* index set to 0 means that particular counter cannot be used */
> >> >> +		if (likely(pc->cap_user_rdpmc && index)) {
> >> >
> >> >In mmap_events() you return EPERM if cap_user_rdpmc is not enabled.
> >> >Do you need another check here? Or this capability can be disabled by
> >> >kernel at run-time?
> >> >
> >>
> >> That extra check in mmap_event() may be removed actually. Some archs
> >> allow disabling reading rdpmc (I think that on x86 one can do that) so this check needs to stay.
> >>
> >> >
> >> >> +			pmc = rte_pmu_pmc_read(index - 1);
> >> >> +			pmc <<= 64 - width;
> >> >> +			pmc >>= 64 - width;
> >> >> +			offset += pmc;
> >> >> +		}
> >> >> +
> >> >> +		rte_compiler_barrier();
> >> >> +
> >> >> +		if (likely(pc->lock == seq))
> >> >> +			return offset;
> >> >> +	}
> >> >> +
> >> >> +	return 0;
> >> >> +}
> >> >> +
> >> >> +/**
> >> >> + * @warning
> >> >> + * @b EXPERIMENTAL: this API may change without prior notice
> >> >> + *
> >> >> + * Enable group of events on the calling lcore.
> >> >> + *
> >> >> + * @warning This should be not called directly.
> >> >
> >> >__rte_internal ?
> >> >
> >>
> >> No this cannot be internal because that will make functions calling it
> >> internal as well hence apps won't be able to use that. This has
> >> already been brought up by one of the reviewers.
> >
> >Ok, then we probably can mark it with ' @internal' tag in formal comments?
> >
> 
> I added a warning not to call that directly. Since function is not internal (in DPDK parlance) per se
> I don’t think we should add more confusion that extra tag.

We doing it in other places, why not to add it here? 
 
> >>
> >> >> + *
> >> >> + * @return
> >> >> + *   0 in case of success, negative value otherwise.
> >> >> + */
> >> >> +__rte_experimental
> >> >> +int
> >> >> +__rte_pmu_enable_group(void);
> >> >> +
> >> >> +/**
> >> >> + * @warning
> >> >> + * @b EXPERIMENTAL: this API may change without prior notice
> >> >> + *
> >> >> + * Initialize PMU library.
> >> >> + *
> >> >> + * @warning This should be not called directly.
> >> >
> >> >Hmm.. then who should call it?
> >> >If it not supposed to be called directly, why to declare it here?
> >> >
> >>
> >> This is inlined and has one caller i.e rte_pmu_read().
> >
> >I thought we are talking here about rte_pmu_init().
> >I don't see where it is inlined and still not clear why it can't be called directly.
> >
> 
> No this cannot be called by init because groups are configured in runtime. That is why
> __rte_pmu_enable_group() is called once in rte_pmu_read().
> 
> *Other* code should not call that directly. And yes, that is not inlined - my mistake.

Once again: we are discussing comments for rte_pmu_init() function.
Why it can't be called directly?
In test_pmu_read() you do call it directly.
 
> >> >> + *
> >> >> + * @return
> >> >> + *   0 in case of success, negative value otherwise.
> >> >> + */
> >> >
> >> >Probably worth to mention that this function is not MT safe.
> >> >Same for _fini_ and add_event.
> >> >Also worth to mention that all control-path functions
> >> >(init/fini/add_event) and data-path (pmu_read) can't be called concurrently.
> >> >
> >>
> >> Yes they are meant to be called from main thread.
> >
> >Ok, then please add that into formal API comments.
> >
> >> >> +__rte_experimental
> >> >> +int
> >> >> +rte_pmu_init(void);
> >> >> +

next prev parent reply	other threads:[~2023-02-28 11:58 UTC|newest]

Thread overview: 139+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-11  9:43 [PATCH 0/4] add support for self monitoring Tomasz Duszynski
2022-11-11  9:43 ` [PATCH 1/4] eal: add generic support for reading PMU events Tomasz Duszynski
2022-12-15  8:33   ` Mattias Rönnblom
2022-11-11  9:43 ` [PATCH 2/4] eal/arm: support reading ARM PMU events in runtime Tomasz Duszynski
2022-11-11  9:43 ` [PATCH 3/4] eal/x86: support reading Intel " Tomasz Duszynski
2022-11-11  9:43 ` [PATCH 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2022-11-21 12:11 ` [PATCH v2 0/4] add support for self monitoring Tomasz Duszynski
2022-11-21 12:11   ` [PATCH v2 1/4] eal: add generic support for reading PMU events Tomasz Duszynski
2022-11-21 12:11   ` [PATCH v2 2/4] eal/arm: support reading ARM PMU events in runtime Tomasz Duszynski
2022-11-21 12:11   ` [PATCH v2 3/4] eal/x86: support reading Intel " Tomasz Duszynski
2022-11-21 12:11   ` [PATCH v2 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2022-11-29  9:28   ` [PATCH v3 0/4] add support for self monitoring Tomasz Duszynski
2022-11-29  9:28     ` [PATCH v3 1/4] eal: add generic support for reading PMU events Tomasz Duszynski
2022-11-30  8:32       ` zhoumin
2022-12-13  8:05         ` [EXT] " Tomasz Duszynski
2022-11-29  9:28     ` [PATCH v3 2/4] eal/arm: support reading ARM PMU events in runtime Tomasz Duszynski
2022-11-29  9:28     ` [PATCH v3 3/4] eal/x86: support reading Intel " Tomasz Duszynski
2022-11-29  9:28     ` [PATCH v3 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2022-11-29 10:42     ` [PATCH v3 0/4] add support for self monitoring Morten Brørup
2022-12-13  8:23       ` Tomasz Duszynski
2022-12-13 10:43     ` [PATCH v4 " Tomasz Duszynski
2022-12-13 10:43       ` [PATCH v4 1/4] eal: add generic support for reading PMU events Tomasz Duszynski
2022-12-13 11:52         ` Morten Brørup
2022-12-14  9:38           ` Tomasz Duszynski
2022-12-14 10:41             ` Morten Brørup
2022-12-15  8:22               ` Morten Brørup
2022-12-16  7:33                 ` Morten Brørup
2023-01-05 21:14               ` Tomasz Duszynski
2023-01-05 22:07                 ` Morten Brørup
2023-01-08 15:41                   ` Tomasz Duszynski
2023-01-08 16:30                     ` Morten Brørup
2022-12-15  8:46         ` Mattias Rönnblom
2023-01-04 15:47           ` Tomasz Duszynski
2023-01-09  7:37         ` Ruifeng Wang
2023-01-09 15:40           ` Tomasz Duszynski
2022-12-13 10:43       ` [PATCH v4 2/4] eal/arm: support reading ARM PMU events in runtime Tomasz Duszynski
2022-12-13 10:43       ` [PATCH v4 3/4] eal/x86: support reading Intel " Tomasz Duszynski
2022-12-13 10:43       ` [PATCH v4 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-01-10 23:46       ` [PATCH v5 0/4] add support for self monitoring Tomasz Duszynski
2023-01-10 23:46         ` [PATCH v5 1/4] eal: add generic support for reading PMU events Tomasz Duszynski
2023-01-11  9:05           ` Morten Brørup
2023-01-11 16:20             ` Tomasz Duszynski
2023-01-11 16:54               ` Morten Brørup
2023-01-10 23:46         ` [PATCH v5 2/4] eal/arm: support reading ARM PMU events in runtime Tomasz Duszynski
2023-01-10 23:46         ` [PATCH v5 3/4] eal/x86: support reading Intel " Tomasz Duszynski
2023-01-10 23:46         ` [PATCH v5 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-01-11  0:32         ` [PATCH v5 0/4] add support for self monitoring Tyler Retzlaff
2023-01-11  9:31           ` Morten Brørup
2023-01-11 14:24             ` Tomasz Duszynski
2023-01-11 14:32               ` Bruce Richardson
2023-01-11  9:39           ` [EXT] " Tomasz Duszynski
2023-01-11 21:05             ` Tyler Retzlaff
2023-01-13  7:44               ` Tomasz Duszynski
2023-01-13 19:22                 ` Tyler Retzlaff
2023-01-14  9:53                   ` Morten Brørup
2023-01-19 23:39         ` [PATCH v6 " Tomasz Duszynski
2023-01-19 23:39           ` [PATCH v6 1/4] lib: add generic support for reading PMU events Tomasz Duszynski
2023-01-20  9:46             ` Morten Brørup
2023-01-26  9:40               ` Tomasz Duszynski
2023-01-26 12:29                 ` Morten Brørup
2023-01-26 12:59                   ` Bruce Richardson
2023-01-26 15:28                     ` [EXT] " Tomasz Duszynski
2023-02-02 14:27                       ` Morten Brørup
2023-01-26 15:17                   ` Tomasz Duszynski
2023-01-20 18:29             ` Tyler Retzlaff
2023-01-26  9:05               ` [EXT] " Tomasz Duszynski
2023-01-19 23:39           ` [PATCH v6 2/4] pmu: support reading ARM PMU events in runtime Tomasz Duszynski
2023-01-19 23:39           ` [PATCH v6 3/4] pmu: support reading Intel x86_64 " Tomasz Duszynski
2023-01-19 23:39           ` [PATCH v6 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-02-01 13:17           ` [PATCH v7 0/4] add support for self monitoring Tomasz Duszynski
2023-02-01 13:17             ` [PATCH v7 1/4] lib: add generic support for reading PMU events Tomasz Duszynski
2023-02-01 13:17             ` [PATCH v7 2/4] pmu: support reading ARM PMU events in runtime Tomasz Duszynski
2023-02-01 13:17             ` [PATCH v7 3/4] pmu: support reading Intel x86_64 " Tomasz Duszynski
2023-02-01 13:17             ` [PATCH v7 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-02-01 13:51             ` [PATCH v7 0/4] add support for self monitoring Morten Brørup
2023-02-02  7:54               ` Tomasz Duszynski
2023-02-02  9:43             ` [PATCH v8 " Tomasz Duszynski
2023-02-02  9:43               ` [PATCH v8 1/4] lib: add generic support for reading PMU events Tomasz Duszynski
2023-02-02 10:32                 ` Ruifeng Wang
2023-02-02  9:43               ` [PATCH v8 2/4] pmu: support reading ARM PMU events in runtime Tomasz Duszynski
2023-02-02  9:43               ` [PATCH v8 3/4] pmu: support reading Intel x86_64 " Tomasz Duszynski
2023-02-02  9:43               ` [PATCH v8 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-02-02 12:49               ` [PATCH v9 0/4] add support for self monitoring Tomasz Duszynski
2023-02-02 12:49                 ` [PATCH v9 1/4] lib: add generic support for reading PMU events Tomasz Duszynski
2023-02-06 11:02                   ` David Marchand
2023-02-09 11:09                     ` [EXT] " Tomasz Duszynski
2023-02-02 12:49                 ` [PATCH v9 2/4] pmu: support reading ARM PMU events in runtime Tomasz Duszynski
2023-02-02 12:49                 ` [PATCH v9 3/4] pmu: support reading Intel x86_64 " Tomasz Duszynski
2023-02-02 12:49                 ` [PATCH v9 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-02-13 11:31                 ` [PATCH v10 0/4] add support for self monitoring Tomasz Duszynski
2023-02-13 11:31                   ` [PATCH v10 1/4] lib: add generic support for reading PMU events Tomasz Duszynski
2023-02-16  7:39                     ` Ruifeng Wang
2023-02-16 14:44                       ` Tomasz Duszynski
2023-02-13 11:31                   ` [PATCH v10 2/4] pmu: support reading ARM PMU events in runtime Tomasz Duszynski
2023-02-16  7:41                     ` Ruifeng Wang
2023-02-13 11:31                   ` [PATCH v10 3/4] pmu: support reading Intel x86_64 " Tomasz Duszynski
2023-02-13 11:31                   ` [PATCH v10 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-02-16 17:54                   ` [PATCH v11 0/4] add support for self monitoring Tomasz Duszynski
2023-02-16 17:54                     ` [PATCH v11 1/4] lib: add generic support for reading PMU events Tomasz Duszynski
2023-02-16 23:50                       ` Konstantin Ananyev
2023-02-17  8:49                         ` [EXT] " Tomasz Duszynski
2023-02-17 10:14                           ` Konstantin Ananyev
2023-02-19 14:23                             ` Tomasz Duszynski
2023-02-20 14:31                               ` Konstantin Ananyev
2023-02-20 16:59                                 ` Tomasz Duszynski
2023-02-20 17:21                                   ` Konstantin Ananyev
2023-02-20 20:42                                     ` Tomasz Duszynski
2023-02-21  0:48                                       ` Konstantin Ananyev
2023-02-27  8:12                                         ` Tomasz Duszynski
2023-02-28 11:35                                           ` Konstantin Ananyev
2023-02-21 12:15                           ` Konstantin Ananyev
2023-02-21  2:17                       ` Konstantin Ananyev
2023-02-27  9:19                         ` [EXT] " Tomasz Duszynski
2023-02-27 20:53                           ` Konstantin Ananyev
2023-02-28  8:25                             ` Morten Brørup
2023-02-28 12:04                               ` Konstantin Ananyev
2023-02-28 13:15                                 ` Morten Brørup
2023-02-28 16:22                                 ` Morten Brørup
2023-03-05 16:30                                   ` Konstantin Ananyev
2023-02-28  9:57                             ` Tomasz Duszynski
2023-02-28 11:58                               ` Konstantin Ananyev [this message]
2023-02-16 17:55                     ` [PATCH v11 2/4] pmu: support reading ARM PMU events in runtime Tomasz Duszynski
2023-02-16 17:55                     ` [PATCH v11 3/4] pmu: support reading Intel x86_64 " Tomasz Duszynski
2023-02-16 17:55                     ` [PATCH v11 4/4] eal: add PMU support to tracing library Tomasz Duszynski
2023-02-16 18:03                     ` [PATCH v11 0/4] add support for self monitoring Ruifeng Wang
2023-05-04  8:02                     ` David Marchand
2023-07-31 12:33                       ` Thomas Monjalon
2023-08-07  8:11                         ` [EXT] " Tomasz Duszynski
2023-09-21  8:26                           ` David Marchand
2023-01-25 10:33         ` [PATCH 0/2] add platform bus Tomasz Duszynski
2023-01-25 10:33           ` [PATCH 1/2] lib: add helper to read strings from sysfs files Tomasz Duszynski
2023-01-25 10:39             ` Thomas Monjalon
2023-01-25 16:16               ` Tyler Retzlaff
2023-01-26  8:30                 ` [EXT] " Tomasz Duszynski
2023-01-26 17:21                   ` Tyler Retzlaff
2023-01-26  8:35               ` Tomasz Duszynski
2023-01-25 10:33           ` [PATCH 2/2] bus: add platform bus Tomasz Duszynski
2023-01-25 10:41           ` [PATCH 0/2] " Tomasz Duszynski
2023-02-16 20:56         ` [PATCH v5 0/4] add support for self monitoring Liang Ma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48ff47d1eb994c4fa78640658b752ec6@huawei.com \
    --to=konstantin.ananyev@huawei.com \
    --cc=dev@dpdk.org \
    --cc=konstantin.v.ananyev@yandex.ru \
    --cc=tduszynski@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).